2023-03-04 15:38:38 +02:00
|
|
|
import re
|
|
|
|
|
|
|
|
import requests
|
|
|
|
|
|
|
|
from modules.common.module import BotModule
|
|
|
|
|
2023-03-08 14:57:26 +02:00
|
|
|
|
2023-03-04 15:38:38 +02:00
|
|
|
# This module searches wikipedia for query, returns page summary and link.
|
|
|
|
class MatrixModule(BotModule):
|
|
|
|
def __init__(self, name):
|
|
|
|
super().__init__(name)
|
|
|
|
self.api_url = 'https://en.wikipedia.org/w/api.php'
|
|
|
|
|
|
|
|
async def matrix_message(self, bot, room, event):
|
|
|
|
args = event.body.split()
|
|
|
|
|
2023-03-04 15:38:53 +02:00
|
|
|
if len(args) > 1:
|
2023-03-08 14:57:26 +02:00
|
|
|
query = event.body[len(args[0]) + 1:]
|
2023-03-04 15:38:38 +02:00
|
|
|
try:
|
|
|
|
response = requests.get(self.api_url, params={
|
|
|
|
'action': 'query',
|
2023-03-08 14:57:26 +02:00
|
|
|
'format': 'json',
|
2023-03-04 15:38:38 +02:00
|
|
|
'exintro': True,
|
|
|
|
'explaintext': True,
|
2023-03-08 14:57:26 +02:00
|
|
|
'prop': 'extracts',
|
|
|
|
'redirects': 1,
|
2023-03-04 15:38:38 +02:00
|
|
|
'titles': query,
|
|
|
|
})
|
|
|
|
|
|
|
|
response.raise_for_status()
|
|
|
|
data = response.json()
|
|
|
|
|
2023-03-08 14:57:26 +02:00
|
|
|
# Get the page id
|
|
|
|
page_id = list(data['query']['pages'].keys())[0]
|
2023-03-04 15:38:38 +02:00
|
|
|
|
2023-03-08 14:57:26 +02:00
|
|
|
if page_id == '-1':
|
2023-03-04 15:38:38 +02:00
|
|
|
await bot.send_text(room, 'No results found')
|
|
|
|
return
|
|
|
|
|
2023-03-08 14:57:26 +02:00
|
|
|
# Get the page title
|
|
|
|
title = data['query']['pages'][page_id]['title']
|
|
|
|
|
|
|
|
# Get the page summary
|
|
|
|
summary = data['query']['pages'][page_id]['extract']
|
|
|
|
|
2023-03-04 15:38:38 +02:00
|
|
|
# Remove all html tags
|
2023-03-08 14:57:26 +02:00
|
|
|
extract = re.sub('<[^<]+?>', '', summary)
|
2023-03-04 15:38:38 +02:00
|
|
|
# Remove any multiple spaces
|
|
|
|
extract = re.sub(' +', ' ', extract)
|
|
|
|
# Remove any new lines
|
|
|
|
extract = re.sub('', '', extract)
|
|
|
|
# Remove any tabs
|
|
|
|
extract = re.sub('\t', '', extract)
|
|
|
|
|
2023-03-08 14:57:26 +02:00
|
|
|
# Truncate the extract, Element URL preview contains nonsense Wikipedia meta content
|
|
|
|
if len(extract) <= 256:
|
|
|
|
pass
|
|
|
|
else:
|
|
|
|
extract = ' '.join(extract[:256 + 1].split(' ')[0:-1]) + '...'
|
|
|
|
|
|
|
|
# Get the page url
|
|
|
|
url = f'https://en.wikipedia.org/wiki/{title}'
|
|
|
|
|
|
|
|
# Convert all spaces to underscores in url
|
|
|
|
url = re.sub(r'\s', '_', url)
|
2023-03-04 15:38:38 +02:00
|
|
|
|
2023-03-08 14:57:26 +02:00
|
|
|
# Format the response
|
|
|
|
response = f'{title}: {extract} \n{url}'
|
2023-03-04 15:38:38 +02:00
|
|
|
|
2023-03-08 14:57:26 +02:00
|
|
|
# Send the response
|
|
|
|
await bot.send_text(room, response)
|
2023-03-04 15:38:38 +02:00
|
|
|
return
|
|
|
|
except Exception as exc:
|
|
|
|
await bot.send_text(room, str(exc))
|
|
|
|
else:
|
|
|
|
await bot.send_text(room, 'Usage: !wikipedia <query>')
|
|
|
|
|
|
|
|
def help(self):
|
2023-03-04 15:38:53 +02:00
|
|
|
return ('Wikipedia bot')
|