From f3d42aa1ed1776c385f02eff963a40af842c195a Mon Sep 17 00:00:00 2001 From: Tatu Wikman Date: Sat, 11 Jan 2020 00:11:35 +0200 Subject: [PATCH] New module, url titles If activated in a room this will spit out the titles of urls mentioned in the room. --- Pipfile | 4 ++ README.md | 33 ++++++++---- modules/urltitles.py | 124 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 152 insertions(+), 9 deletions(-) create mode 100644 modules/urltitles.py diff --git a/Pipfile b/Pipfile index 0f3d815..0d8aadc 100644 --- a/Pipfile +++ b/Pipfile @@ -14,12 +14,16 @@ google-auth-oauthlib = "*" requests = "*" igramscraper = "*" twitterscraper = "*" +httpx = "*" [dev-packages] pylint = "*" pycodestyle = "*" flake8 = "*" autopep8 = "*" +black = "*" +ipython = "*" +isort = "*" [requires] python_version = "3.7" diff --git a/README.md b/README.md index 6175a34..53525c3 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Hemppa - generic modular Matrix bot This bot is meant to be super easy platform to write Matrix bot functionality -in Python. It uses matrix-nio library https://github.com/poljar/matrix-nio/ for +in Python. It uses matrix-nio library https://github.com/poljar/matrix-nio/ for Matrix communications. Zero configuration except minimal Matrix account info is needed. Everything else can @@ -53,10 +53,10 @@ prefer it if possible. This bot polls the calendar every 5 minutes and notifies Howto: -* Create a calendar in Teamup https://teamup.com/ +* Create a calendar in Teamup https://teamup.com/ * Get api key at https://teamup.com/api-keys/request -* !teamup apikey [your api key] -* !teamup add [calendar id] +* !teamup apikey [your api key] +* !teamup add [calendar id] Commands: @@ -76,7 +76,7 @@ To set up, you'll need to generate oauth2 credentials.json file - see https://co Run the bot on *local* machine as OAuth2 wants to open localhost url in your browser. I haven't found out an easy way to do this on server. -There is a empty credentials.json file in the bot directory. Replace it with yours. When credentials.json is present, you must +There is a empty credentials.json file in the bot directory. Replace it with yours. When credentials.json is present, you must authenticate the bot to access calendar. There will be a link in console like this: ``` text @@ -137,24 +137,39 @@ Prefix with selected service, for example "!ig add accountname" or "!twitter lis * add [accountname] - Add account to this room (Must be done as room admin) * del [accountname] - Delete account from room (Must be done as room admin) -* list - List accounts in room +* list - List accounts in room * poll - Poll for new items (Must be done as bot owner) * clear - Clear all accounts from this room (Must be done as room admin) #### Instagram Polls instagram account(s). Uses instagram scraper library -without any authentication or api key. +without any authentication or api key. See: https://github.com/realsirjoe/instagram-scraper/ #### Twitter Polls twitter account(s). Uses twitter scraper library -without any authentication or api key. +without any authentication or api key. See: https://github.com/taspinar/twitterscraper/tree/master/twitterscraper +#### Url titles + +Watches all messages in a room and if a url is found tries to fetch it and +spit out the title if found. + +Commands: + +* !urltitles on - spam titles to room +* !urltitles off - stop spamming +* !urltitles status - show current status + +Example: + +* !urltitles status + ## Bot setup * Create a Matrix user @@ -202,7 +217,7 @@ You can set MATRIX_PASSWORD if you want to get access token. Normally you can us BOT_OWNERS is a comma-separated list of matrix id's for the owners of the bot. Some commands require sender to be bot owner. Typically set your own id into it. Don't include bot itself in BOT_OWNERS if cron -or any other module that can cause bot to send custom commands is used as it could potentially be used to run +or any other module that can cause bot to send custom commands is used as it could potentially be used to run owner commands as the bot itself. ## Module API diff --git a/modules/urltitles.py b/modules/urltitles.py new file mode 100644 index 0000000..7d11ef9 --- /dev/null +++ b/modules/urltitles.py @@ -0,0 +1,124 @@ +import re +import shlex +import httpx +from lxml.html.soupparser import fromstring +from nio import RoomMessageText +from functools import lru_cache + +class MatrixModule: + """ + Simple url fetch and spit out title module. + + Everytime a url is seen in a message we do http request to it and try to get a title tag contents to spit out to the room. + + TODO: on/off switch... + """ + + bot = None + onoff = dict() # room_id -> true or false + + def matrix_start(self, bot): + """ + Register callback for all RoomMessageText events on startup + """ + self.bot = bot + bot.client.add_event_callback(self.text_cb, RoomMessageText) + + async def text_cb(self, room, event): + """ + Handle client callbacks for all room text events + """ + # no content at all? + if len(event.body) < 1: + return + + # are we on in this room? + if self.onoff.get(room.room_id) is not True: + return + + # extract possible urls from message + urls = re.findall(r"(https?://\S+)", event.body) + + # no urls, nothing to do + if len(urls) == 0: + return + + # fetch the urls and if we can see a title spit it out + for url in urls: + title = self.get_title_from_url(url) + if title is not None: + await self.bot.send_html(room, f"Title: {title}", f"Title: {title}") + + @lru_cache(maxsize=128) + def get_title_from_url(self, url): + """ + Fetch url and try to get the title from the response, returns either the title or None + """ + try: + r = httpx.get(url) + except Exception as e: + # if it failed then it failed, no point in trying anything fancy + # this is just a title spitting bot :) + return None + + if r.status_code != 200: + return None + + # try parse and get the title + try: + elem = fromstring(r.text).find(".//head/title") + except Exception as e: + # again, no point in trying anything else + return None + + if elem is not None: + return elem.text + + # no bonus + return None + + async def matrix_message(self, bot, room, event): + """ + on off switch + """ + bot.must_be_admin(room, event) + + args = shlex.split(event.body) + args.pop(0) + + if len(args) == 1: + if args[0] == "on": + self.onoff[room.room_id] = True + bot.save_settings() + await bot.send_text( + room, "Ok, I will spam titles from urls I see on this room." + ) + return + if args[0] == "off": + self.onoff[room.room_id] = False + bot.save_settings() + await bot.send_text( + room, "Ok, not spamming titles in this room anymore." + ) + return + + if args[0] == "status": + if self.onoff.get(room.room_id) is not True: + await bot.send_text( + room, "Nope, I'm not spamming you with titles." + ) + else: + await bot.send_text( + room, "Yup, spamming you with titles from urls seen." + ) + return + + await bot.send_text( + room, + "Sorry, I did not understand. I only understand 'on', 'off' and 'status' commands", + ) + + return + + def help(self): + return "If I see a url in a message I will try to get the title from the page and spit it out"