diff --git a/Pipfile b/Pipfile index 0f3d815..0d8aadc 100644 --- a/Pipfile +++ b/Pipfile @@ -14,12 +14,16 @@ google-auth-oauthlib = "*" requests = "*" igramscraper = "*" twitterscraper = "*" +httpx = "*" [dev-packages] pylint = "*" pycodestyle = "*" flake8 = "*" autopep8 = "*" +black = "*" +ipython = "*" +isort = "*" [requires] python_version = "3.7" diff --git a/README.md b/README.md index 6175a34..bccfc8f 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Hemppa - generic modular Matrix bot This bot is meant to be super easy platform to write Matrix bot functionality -in Python. It uses matrix-nio library https://github.com/poljar/matrix-nio/ for +in Python. It uses matrix-nio library https://github.com/poljar/matrix-nio/ for Matrix communications. Zero configuration except minimal Matrix account info is needed. Everything else can @@ -53,10 +53,10 @@ prefer it if possible. This bot polls the calendar every 5 minutes and notifies Howto: -* Create a calendar in Teamup https://teamup.com/ +* Create a calendar in Teamup https://teamup.com/ * Get api key at https://teamup.com/api-keys/request -* !teamup apikey [your api key] -* !teamup add [calendar id] +* !teamup apikey [your api key] +* !teamup add [calendar id] Commands: @@ -76,7 +76,7 @@ To set up, you'll need to generate oauth2 credentials.json file - see https://co Run the bot on *local* machine as OAuth2 wants to open localhost url in your browser. I haven't found out an easy way to do this on server. -There is a empty credentials.json file in the bot directory. Replace it with yours. When credentials.json is present, you must +There is a empty credentials.json file in the bot directory. Replace it with yours. When credentials.json is present, you must authenticate the bot to access calendar. There will be a link in console like this: ``` text @@ -137,24 +137,43 @@ Prefix with selected service, for example "!ig add accountname" or "!twitter lis * add [accountname] - Add account to this room (Must be done as room admin) * del [accountname] - Delete account from room (Must be done as room admin) -* list - List accounts in room +* list - List accounts in room * poll - Poll for new items (Must be done as bot owner) * clear - Clear all accounts from this room (Must be done as room admin) #### Instagram Polls instagram account(s). Uses instagram scraper library -without any authentication or api key. +without any authentication or api key. See: https://github.com/realsirjoe/instagram-scraper/ #### Twitter Polls twitter account(s). Uses twitter scraper library -without any authentication or api key. +without any authentication or api key. See: https://github.com/taspinar/twitterscraper/tree/master/twitterscraper +#### Url + +Watches all messages in a room and if a url is found tries to fetch it and +spit out the title if found. + +Defaults to off and needs to be activated on every room you want this. + +Commands: + +* !url status - show current status +* !url title - spam titles to room +* !url description - spam descriptions +* !url both - spam both title and description +* !url off - stop spamming + +Example: + +* !url status + ## Bot setup * Create a Matrix user @@ -202,7 +221,7 @@ You can set MATRIX_PASSWORD if you want to get access token. Normally you can us BOT_OWNERS is a comma-separated list of matrix id's for the owners of the bot. Some commands require sender to be bot owner. Typically set your own id into it. Don't include bot itself in BOT_OWNERS if cron -or any other module that can cause bot to send custom commands is used as it could potentially be used to run +or any other module that can cause bot to send custom commands is used as it could potentially be used to run owner commands as the bot itself. ## Module API diff --git a/modules/url.py b/modules/url.py new file mode 100644 index 0000000..65e2b82 --- /dev/null +++ b/modules/url.py @@ -0,0 +1,155 @@ +import re +import shlex +from functools import lru_cache + +import httpx +from lxml.html.soupparser import fromstring +from nio import RoomMessageText + + +class MatrixModule: + """ + Simple url fetch and spit out title module. + + Everytime a url is seen in a message we do http request to it and try to get a title tag contents to spit out to the room. + """ + + bot = None + status = dict() # room_id -> what to do with urls + + STATUSES = { + "OFF": "Not spamming this channel", + "TITLE": "Spamming this channel with titles", + "DESCRIPTION": "Spamming this channel with descriptions", + "BOTH": "Spamming this channel with both title and description", + } + + def matrix_start(self, bot): + """ + Register callback for all RoomMessageText events on startup + """ + self.bot = bot + bot.client.add_event_callback(self.text_cb, RoomMessageText) + + async def text_cb(self, room, event): + """ + Handle client callbacks for all room text events + """ + # no content at all? + if len(event.body) < 1: + return + + # are we on in this room? + status = self.status.get(room.room_id, "OFF") + if status not in self.STATUSES: + return + if status == "OFF": + return + + # extract possible urls from message + urls = re.findall(r"(https?://\S+)", event.body) + + # no urls, nothing to do + if len(urls) == 0: + return + + # fetch the urls and if we can see a title spit it out + for url in urls: + try: + title, description = self.get_content_from_url(url) + except Exception: + # failed fetching, give up + continue + + msg = None + + if status == "TITLE" and title is not None: + msg = f"Title: {title}" + elif status == "DESCRIPTION" and description is not None: + msg = f"Description: {description}" + + elif status == "BOTH" and title is not None and description is not None: + msg = f"Title: {title}\nDescription: {description}" + + elif status == "BOTH" and title is not None: + msg = f"Title: {title}" + elif status == "BOTH" and description is not None: + msg = f"Description: {description}" + + if msg is not None: + await self.bot.send_text(room, msg) + + @lru_cache(maxsize=128) + def get_content_from_url(self, url): + """ + Fetch url and try to get the title and description from the response + """ + title = None + description = None + + try: + r = httpx.get(url) + except Exception as e: + # if it failed then it failed, no point in trying anything fancy + # this is just a title spitting bot :) + return (title, description) + + if r.status_code != 200: + return (title, description) + + # try parse and get the title + try: + titleelem = fromstring(r.text).find(".//head/title") + descriptionelem = fromstring(r.text).find( + './/head/meta[@name="description"]' + ) + except Exception: + # again, no point in trying anything else + return (title, description) + + try: + if titleelem is not None: + title = titleelem.text + if descriptionelem is not None: + description = descriptionelem.attrib.get("content") + except Exception: + # if it fails it fails + pass + + return (title, description) + + async def matrix_message(self, bot, room, event): + """ + commands for setting what to do in this channel + """ + bot.must_be_admin(room, event) + + args = shlex.split(event.body) + args.pop(0) + + # save the new status + if len(args) == 1 and self.STATUSES.get(args[0].upper()) is not None: + self.status[room.room_id] = args[0].upper() + bot.save_settings() + await bot.send_text( + room, f"Ok, {self.STATUSES.get(self.status[room.room_id])}" + ) + return + + # show status + elif len(args) == 1 and args[0] == "status": + await bot.send_text( + room, self.STATUSES.get(self.status.get(room.room_id, "OFF")) + ) + return + + # invalid command + await bot.send_text( + room, + "Sorry, I did not understand. I only understand 'title', 'description', 'both' and 'status' commands", + ) + + return + + def help(self): + return "If I see a url in a message I will try to get the title from the page and spit it out"