hemppa/modules/wikipedia.py

import re

import requests

from modules.common.module import BotModule


# This module searches wikipedia for query, returns page summary and link.
class MatrixModule(BotModule):
    def __init__(self, name):
        super().__init__(name)
        self.api_url = 'https://en.wikipedia.org/w/api.php'

    async def matrix_message(self, bot, room, event):
        args = event.body.split()

        if len(args) > 1:
            query = event.body[len(args[0]) + 1:]
            try:
                response = requests.get(self.api_url, params={
                    'action': 'query',
                    'format': 'json',
                    'exintro': True,
                    'explaintext': True,
                    'prop': 'extracts',
                    'redirects': 1,
                    'titles': query,
                })

                response.raise_for_status()
                data = response.json()

                # Get the page id
                page_id = list(data['query']['pages'].keys())[0]

                if page_id == '-1':
                    await bot.send_text(room, 'No results found')
                    return

                # Get the page title
                title = data['query']['pages'][page_id]['title']

                # Get the page summary
                summary = data['query']['pages'][page_id]['extract']

                # Remove all html tags
                extract = re.sub('<[^<]+?>', '', summary)
                # Remove any multiple spaces
                extract = re.sub(' +', ' ', extract)
                # Remove any new lines
                extract = re.sub('', '', extract)
                # Remove any tabs
                extract = re.sub('\t', '', extract)

                # Truncate the extract, Element URL preview contains nonsense Wikipedia meta content
                if len(extract) <= 256:
                    pass
                else:
                    extract = ' '.join(extract[:256 + 1].split(' ')[0:-1]) + '...'

                # Get the page url
                url = f'https://en.wikipedia.org/wiki/{title}'

                # Convert all spaces to underscores in url
                url = re.sub(r'\s', '_', url)

                # Format the response
                response = f'{title}: {extract} \n{url}'

                # Send the response
                await bot.send_text(room, response)
                return
            except Exception as exc:
                await bot.send_text(room, str(exc))
        else:
            await bot.send_text(room, 'Usage: !wikipedia <query>')

    def help(self):
        return ('Wikipedia bot')
Wikipedia module initial commit 2023-03-04 15:38:38 +02:00			`import re`

			`import requests`

			`from modules.common.module import BotModule`

wikipedia.py: add smart trimming for extract, use redirects, use canonical URL in content, convert spaces to underscore in urls. 2023-03-08 14:57:26 +02:00
Wikipedia module initial commit 2023-03-04 15:38:38 +02:00			`# This module searches wikipedia for query, returns page summary and link.`
			`class MatrixModule(BotModule):`
			`def __init__(self, name):`
			`super().__init__(name)`
			`self.api_url = 'https://en.wikipedia.org/w/api.php'`

			`async def matrix_message(self, bot, room, event):`
			`args = event.body.split()`

Wikipedia module initial commit 2023-03-04 15:38:53 +02:00			`if len(args) > 1:`
wikipedia.py: add smart trimming for extract, use redirects, use canonical URL in content, convert spaces to underscore in urls. 2023-03-08 14:57:26 +02:00			`query = event.body[len(args[0]) + 1:]`
Wikipedia module initial commit 2023-03-04 15:38:38 +02:00			`try:`
			`response = requests.get(self.api_url, params={`
			`'action': 'query',`
wikipedia.py: add smart trimming for extract, use redirects, use canonical URL in content, convert spaces to underscore in urls. 2023-03-08 14:57:26 +02:00			`'format': 'json',`
Wikipedia module initial commit 2023-03-04 15:38:38 +02:00			`'exintro': True,`
			`'explaintext': True,`
wikipedia.py: add smart trimming for extract, use redirects, use canonical URL in content, convert spaces to underscore in urls. 2023-03-08 14:57:26 +02:00			`'prop': 'extracts',`
			`'redirects': 1,`
Wikipedia module initial commit 2023-03-04 15:38:38 +02:00			`'titles': query,`
			`})`

			`response.raise_for_status()`
			`data = response.json()`

wikipedia.py: add smart trimming for extract, use redirects, use canonical URL in content, convert spaces to underscore in urls. 2023-03-08 14:57:26 +02:00			`# Get the page id`
			`page_id = list(data['query']['pages'].keys())[0]`
Wikipedia module initial commit 2023-03-04 15:38:38 +02:00
wikipedia.py: add smart trimming for extract, use redirects, use canonical URL in content, convert spaces to underscore in urls. 2023-03-08 14:57:26 +02:00			`if page_id == '-1':`
Wikipedia module initial commit 2023-03-04 15:38:38 +02:00			`await bot.send_text(room, 'No results found')`
			`return`

wikipedia.py: add smart trimming for extract, use redirects, use canonical URL in content, convert spaces to underscore in urls. 2023-03-08 14:57:26 +02:00			`# Get the page title`
			`title = data['query']['pages'][page_id]['title']`

			`# Get the page summary`
			`summary = data['query']['pages'][page_id]['extract']`

Wikipedia module initial commit 2023-03-04 15:38:38 +02:00			`# Remove all html tags`
wikipedia.py: add smart trimming for extract, use redirects, use canonical URL in content, convert spaces to underscore in urls. 2023-03-08 14:57:26 +02:00			`extract = re.sub('<[^<]+?>', '', summary)`
Wikipedia module initial commit 2023-03-04 15:38:38 +02:00			`# Remove any multiple spaces`
			`extract = re.sub(' +', ' ', extract)`
			`# Remove any new lines`
			`extract = re.sub('', '', extract)`
			`# Remove any tabs`
			`extract = re.sub('\t', '', extract)`

wikipedia.py: add smart trimming for extract, use redirects, use canonical URL in content, convert spaces to underscore in urls. 2023-03-08 14:57:26 +02:00			`# Truncate the extract, Element URL preview contains nonsense Wikipedia meta content`
			`if len(extract) <= 256:`
			`pass`
			`else:`
			`extract = ' '.join(extract[:256 + 1].split(' ')[0:-1]) + '...'`

			`# Get the page url`
			`url = f'https://en.wikipedia.org/wiki/{title}'`

			`# Convert all spaces to underscores in url`
			`url = re.sub(r'\s', '_', url)`
Wikipedia module initial commit 2023-03-04 15:38:38 +02:00
wikipedia.py: add smart trimming for extract, use redirects, use canonical URL in content, convert spaces to underscore in urls. 2023-03-08 14:57:26 +02:00			`# Format the response`
			`response = f'{title}: {extract} \n{url}'`
Wikipedia module initial commit 2023-03-04 15:38:38 +02:00
wikipedia.py: add smart trimming for extract, use redirects, use canonical URL in content, convert spaces to underscore in urls. 2023-03-08 14:57:26 +02:00			`# Send the response`
			`await bot.send_text(room, response)`
Wikipedia module initial commit 2023-03-04 15:38:38 +02:00			`return`
			`except Exception as exc:`
			`await bot.send_text(room, str(exc))`
			`else:`
			`await bot.send_text(room, 'Usage: !wikipedia <query>')`

			`def help(self):`
Wikipedia module initial commit 2023-03-04 15:38:53 +02:00			`return ('Wikipedia bot')`