From f3d42aa1ed1776c385f02eff963a40af842c195a Mon Sep 17 00:00:00 2001 From: Tatu Wikman Date: Sat, 11 Jan 2020 00:11:35 +0200 Subject: [PATCH 1/5] New module, url titles If activated in a room this will spit out the titles of urls mentioned in the room. --- Pipfile | 4 ++ README.md | 33 ++++++++---- modules/urltitles.py | 124 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 152 insertions(+), 9 deletions(-) create mode 100644 modules/urltitles.py diff --git a/Pipfile b/Pipfile index 0f3d815..0d8aadc 100644 --- a/Pipfile +++ b/Pipfile @@ -14,12 +14,16 @@ google-auth-oauthlib = "*" requests = "*" igramscraper = "*" twitterscraper = "*" +httpx = "*" [dev-packages] pylint = "*" pycodestyle = "*" flake8 = "*" autopep8 = "*" +black = "*" +ipython = "*" +isort = "*" [requires] python_version = "3.7" diff --git a/README.md b/README.md index 6175a34..53525c3 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Hemppa - generic modular Matrix bot This bot is meant to be super easy platform to write Matrix bot functionality -in Python. It uses matrix-nio library https://github.com/poljar/matrix-nio/ for +in Python. It uses matrix-nio library https://github.com/poljar/matrix-nio/ for Matrix communications. Zero configuration except minimal Matrix account info is needed. Everything else can @@ -53,10 +53,10 @@ prefer it if possible. This bot polls the calendar every 5 minutes and notifies Howto: -* Create a calendar in Teamup https://teamup.com/ +* Create a calendar in Teamup https://teamup.com/ * Get api key at https://teamup.com/api-keys/request -* !teamup apikey [your api key] -* !teamup add [calendar id] +* !teamup apikey [your api key] +* !teamup add [calendar id] Commands: @@ -76,7 +76,7 @@ To set up, you'll need to generate oauth2 credentials.json file - see https://co Run the bot on *local* machine as OAuth2 wants to open localhost url in your browser. I haven't found out an easy way to do this on server. -There is a empty credentials.json file in the bot directory. Replace it with yours. When credentials.json is present, you must +There is a empty credentials.json file in the bot directory. Replace it with yours. When credentials.json is present, you must authenticate the bot to access calendar. There will be a link in console like this: ``` text @@ -137,24 +137,39 @@ Prefix with selected service, for example "!ig add accountname" or "!twitter lis * add [accountname] - Add account to this room (Must be done as room admin) * del [accountname] - Delete account from room (Must be done as room admin) -* list - List accounts in room +* list - List accounts in room * poll - Poll for new items (Must be done as bot owner) * clear - Clear all accounts from this room (Must be done as room admin) #### Instagram Polls instagram account(s). Uses instagram scraper library -without any authentication or api key. +without any authentication or api key. See: https://github.com/realsirjoe/instagram-scraper/ #### Twitter Polls twitter account(s). Uses twitter scraper library -without any authentication or api key. +without any authentication or api key. See: https://github.com/taspinar/twitterscraper/tree/master/twitterscraper +#### Url titles + +Watches all messages in a room and if a url is found tries to fetch it and +spit out the title if found. + +Commands: + +* !urltitles on - spam titles to room +* !urltitles off - stop spamming +* !urltitles status - show current status + +Example: + +* !urltitles status + ## Bot setup * Create a Matrix user @@ -202,7 +217,7 @@ You can set MATRIX_PASSWORD if you want to get access token. Normally you can us BOT_OWNERS is a comma-separated list of matrix id's for the owners of the bot. Some commands require sender to be bot owner. Typically set your own id into it. Don't include bot itself in BOT_OWNERS if cron -or any other module that can cause bot to send custom commands is used as it could potentially be used to run +or any other module that can cause bot to send custom commands is used as it could potentially be used to run owner commands as the bot itself. ## Module API diff --git a/modules/urltitles.py b/modules/urltitles.py new file mode 100644 index 0000000..7d11ef9 --- /dev/null +++ b/modules/urltitles.py @@ -0,0 +1,124 @@ +import re +import shlex +import httpx +from lxml.html.soupparser import fromstring +from nio import RoomMessageText +from functools import lru_cache + +class MatrixModule: + """ + Simple url fetch and spit out title module. + + Everytime a url is seen in a message we do http request to it and try to get a title tag contents to spit out to the room. + + TODO: on/off switch... + """ + + bot = None + onoff = dict() # room_id -> true or false + + def matrix_start(self, bot): + """ + Register callback for all RoomMessageText events on startup + """ + self.bot = bot + bot.client.add_event_callback(self.text_cb, RoomMessageText) + + async def text_cb(self, room, event): + """ + Handle client callbacks for all room text events + """ + # no content at all? + if len(event.body) < 1: + return + + # are we on in this room? + if self.onoff.get(room.room_id) is not True: + return + + # extract possible urls from message + urls = re.findall(r"(https?://\S+)", event.body) + + # no urls, nothing to do + if len(urls) == 0: + return + + # fetch the urls and if we can see a title spit it out + for url in urls: + title = self.get_title_from_url(url) + if title is not None: + await self.bot.send_html(room, f"Title: {title}", f"Title: {title}") + + @lru_cache(maxsize=128) + def get_title_from_url(self, url): + """ + Fetch url and try to get the title from the response, returns either the title or None + """ + try: + r = httpx.get(url) + except Exception as e: + # if it failed then it failed, no point in trying anything fancy + # this is just a title spitting bot :) + return None + + if r.status_code != 200: + return None + + # try parse and get the title + try: + elem = fromstring(r.text).find(".//head/title") + except Exception as e: + # again, no point in trying anything else + return None + + if elem is not None: + return elem.text + + # no bonus + return None + + async def matrix_message(self, bot, room, event): + """ + on off switch + """ + bot.must_be_admin(room, event) + + args = shlex.split(event.body) + args.pop(0) + + if len(args) == 1: + if args[0] == "on": + self.onoff[room.room_id] = True + bot.save_settings() + await bot.send_text( + room, "Ok, I will spam titles from urls I see on this room." + ) + return + if args[0] == "off": + self.onoff[room.room_id] = False + bot.save_settings() + await bot.send_text( + room, "Ok, not spamming titles in this room anymore." + ) + return + + if args[0] == "status": + if self.onoff.get(room.room_id) is not True: + await bot.send_text( + room, "Nope, I'm not spamming you with titles." + ) + else: + await bot.send_text( + room, "Yup, spamming you with titles from urls seen." + ) + return + + await bot.send_text( + room, + "Sorry, I did not understand. I only understand 'on', 'off' and 'status' commands", + ) + + return + + def help(self): + return "If I see a url in a message I will try to get the title from the page and spit it out" From a2f4d7c9991ef0d2f3f4f782db7708c139e126d0 Mon Sep 17 00:00:00 2001 From: Tatu Wikman Date: Sat, 11 Jan 2020 01:59:07 +0200 Subject: [PATCH 2/5] renamed to url Added different on off statuses * title, spams titles * description, spams descriptions * both, spams both * off, dont spam And a bit of refactoring --- README.md | 10 +-- modules/url.py | 156 +++++++++++++++++++++++++++++++++++++++++++ modules/urltitles.py | 124 ---------------------------------- 3 files changed, 162 insertions(+), 128 deletions(-) create mode 100644 modules/url.py delete mode 100644 modules/urltitles.py diff --git a/README.md b/README.md index 53525c3..0bf00fc 100644 --- a/README.md +++ b/README.md @@ -155,16 +155,18 @@ without any authentication or api key. See: https://github.com/taspinar/twitterscraper/tree/master/twitterscraper -#### Url titles +#### Url Watches all messages in a room and if a url is found tries to fetch it and spit out the title if found. Commands: -* !urltitles on - spam titles to room -* !urltitles off - stop spamming -* !urltitles status - show current status +* !url status - show current status +* !url title - spam titles to room +* !url description - spam descriptions +* !url both - spam both title and description +* !url off - stop spamming Example: diff --git a/modules/url.py b/modules/url.py new file mode 100644 index 0000000..8168ac7 --- /dev/null +++ b/modules/url.py @@ -0,0 +1,156 @@ +import re +import shlex +import httpx +from lxml.html.soupparser import fromstring +from nio import RoomMessageText +from functools import lru_cache + + +class MatrixModule: + """ + Simple url fetch and spit out title module. + + Everytime a url is seen in a message we do http request to it and try to get a title tag contents to spit out to the room. + + TODO: on/off switch... + """ + + bot = None + status = dict() # room_id -> what to do with urls + + STATUSES = { + "OFF": "Not spamming this channel", + "TITLE": "Spamming this channel with titles", + "DESCRIPTION": "Spamming this channel with descriptions", + "BOTH": "Spamming this channel with both title and description", + } + + def matrix_start(self, bot): + """ + Register callback for all RoomMessageText events on startup + """ + self.bot = bot + bot.client.add_event_callback(self.text_cb, RoomMessageText) + + async def text_cb(self, room, event): + """ + Handle client callbacks for all room text events + """ + # no content at all? + if len(event.body) < 1: + return + + # are we on in this room? + status = self.status.get(room.room_id, "OFF") + if status not in self.STATUSES: + return + if status == "OFF": + return + + # extract possible urls from message + urls = re.findall(r"(https?://\S+)", event.body) + + # no urls, nothing to do + if len(urls) == 0: + return + + # fetch the urls and if we can see a title spit it out + for url in urls: + try: + title, description = self.get_content_from_url(url) + except Exception: + # failed fetching, give up + continue + + msg = None + + if status == 'TITLE' and title is not None: + msg = f'Title: {title}' + elif status == 'DESCRIPTION' and description is not None: + msg = f'Description: {description}' + + elif status == 'BOTH' and title is not None and description is not None: + msg = f'Title: {title}\nDescription: {description}' + + elif status == 'BOTH' and title is not None: + msg = f'Title: {title}' + elif status == 'BOTH' and description is not None: + msg = f'Description: {description}' + + if msg is not None: + await self.bot.send_text(room, msg) + + @lru_cache(maxsize=128) + def get_content_from_url(self, url): + """ + Fetch url and try to get the title and description from the response + """ + title = None + description = None + + try: + r = httpx.get(url) + except Exception as e: + # if it failed then it failed, no point in trying anything fancy + # this is just a title spitting bot :) + return (title, description) + + if r.status_code != 200: + return (title, description) + + # try parse and get the title + try: + titleelem = fromstring(r.text).find(".//head/title") + descriptionelem = fromstring(r.text).find( + './/head/meta[@name="description"]' + ) + except Exception: + # again, no point in trying anything else + return (title, description) + + try: + if titleelem is not None: + title = titleelem.text + if descriptionelem is not None: + description = descriptionelem.attrib.get("content") + except Exception: + # if it fails it fails + pass + + return (title, description) + + async def matrix_message(self, bot, room, event): + """ + commands for setting what to do in this channel + """ + bot.must_be_admin(room, event) + + args = shlex.split(event.body) + args.pop(0) + + # save the new status + if len(args) == 1 and self.STATUSES.get(args[0].upper()) is not None: + self.status[room.room_id] = args[0].upper() + bot.save_settings() + await bot.send_text( + room, f"Ok, {self.STATUSES.get(self.status[room.room_id])}" + ) + return + + # show status + elif len(args) == 1 and args[0] == "status": + await bot.send_text( + room, self.STATUSES.get(self.status.get(room.room_id, "OFF")) + ) + return + + # invalid command + await bot.send_text( + room, + "Sorry, I did not understand. I only understand 'title', 'description', 'both' and 'status' commands", + ) + + return + + def help(self): + return "If I see a url in a message I will try to get the title from the page and spit it out" diff --git a/modules/urltitles.py b/modules/urltitles.py deleted file mode 100644 index 7d11ef9..0000000 --- a/modules/urltitles.py +++ /dev/null @@ -1,124 +0,0 @@ -import re -import shlex -import httpx -from lxml.html.soupparser import fromstring -from nio import RoomMessageText -from functools import lru_cache - -class MatrixModule: - """ - Simple url fetch and spit out title module. - - Everytime a url is seen in a message we do http request to it and try to get a title tag contents to spit out to the room. - - TODO: on/off switch... - """ - - bot = None - onoff = dict() # room_id -> true or false - - def matrix_start(self, bot): - """ - Register callback for all RoomMessageText events on startup - """ - self.bot = bot - bot.client.add_event_callback(self.text_cb, RoomMessageText) - - async def text_cb(self, room, event): - """ - Handle client callbacks for all room text events - """ - # no content at all? - if len(event.body) < 1: - return - - # are we on in this room? - if self.onoff.get(room.room_id) is not True: - return - - # extract possible urls from message - urls = re.findall(r"(https?://\S+)", event.body) - - # no urls, nothing to do - if len(urls) == 0: - return - - # fetch the urls and if we can see a title spit it out - for url in urls: - title = self.get_title_from_url(url) - if title is not None: - await self.bot.send_html(room, f"Title: {title}", f"Title: {title}") - - @lru_cache(maxsize=128) - def get_title_from_url(self, url): - """ - Fetch url and try to get the title from the response, returns either the title or None - """ - try: - r = httpx.get(url) - except Exception as e: - # if it failed then it failed, no point in trying anything fancy - # this is just a title spitting bot :) - return None - - if r.status_code != 200: - return None - - # try parse and get the title - try: - elem = fromstring(r.text).find(".//head/title") - except Exception as e: - # again, no point in trying anything else - return None - - if elem is not None: - return elem.text - - # no bonus - return None - - async def matrix_message(self, bot, room, event): - """ - on off switch - """ - bot.must_be_admin(room, event) - - args = shlex.split(event.body) - args.pop(0) - - if len(args) == 1: - if args[0] == "on": - self.onoff[room.room_id] = True - bot.save_settings() - await bot.send_text( - room, "Ok, I will spam titles from urls I see on this room." - ) - return - if args[0] == "off": - self.onoff[room.room_id] = False - bot.save_settings() - await bot.send_text( - room, "Ok, not spamming titles in this room anymore." - ) - return - - if args[0] == "status": - if self.onoff.get(room.room_id) is not True: - await bot.send_text( - room, "Nope, I'm not spamming you with titles." - ) - else: - await bot.send_text( - room, "Yup, spamming you with titles from urls seen." - ) - return - - await bot.send_text( - room, - "Sorry, I did not understand. I only understand 'on', 'off' and 'status' commands", - ) - - return - - def help(self): - return "If I see a url in a message I will try to get the title from the page and spit it out" From 9113aa19e53dbcd03d7307cc281dcebdfd901342 Mon Sep 17 00:00:00 2001 From: Tatu Wikman Date: Sat, 11 Jan 2020 02:01:41 +0200 Subject: [PATCH 3/5] blackened --- modules/url.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/modules/url.py b/modules/url.py index 8168ac7..1302cf0 100644 --- a/modules/url.py +++ b/modules/url.py @@ -1,9 +1,10 @@ import re import shlex +from functools import lru_cache + import httpx from lxml.html.soupparser import fromstring from nio import RoomMessageText -from functools import lru_cache class MatrixModule: @@ -64,18 +65,18 @@ class MatrixModule: msg = None - if status == 'TITLE' and title is not None: - msg = f'Title: {title}' - elif status == 'DESCRIPTION' and description is not None: - msg = f'Description: {description}' + if status == "TITLE" and title is not None: + msg = f"Title: {title}" + elif status == "DESCRIPTION" and description is not None: + msg = f"Description: {description}" - elif status == 'BOTH' and title is not None and description is not None: - msg = f'Title: {title}\nDescription: {description}' + elif status == "BOTH" and title is not None and description is not None: + msg = f"Title: {title}\nDescription: {description}" - elif status == 'BOTH' and title is not None: - msg = f'Title: {title}' - elif status == 'BOTH' and description is not None: - msg = f'Description: {description}' + elif status == "BOTH" and title is not None: + msg = f"Title: {title}" + elif status == "BOTH" and description is not None: + msg = f"Description: {description}" if msg is not None: await self.bot.send_text(room, msg) From ab9265bd4d7403aaae78e83a3738ed50c7d557cb Mon Sep 17 00:00:00 2001 From: Tatu Wikman Date: Mon, 20 Jan 2020 23:31:35 +0200 Subject: [PATCH 4/5] Fix readme fix a typo with the old module name and add information about the default setting for a room. --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0bf00fc..bccfc8f 100644 --- a/README.md +++ b/README.md @@ -158,7 +158,9 @@ See: https://github.com/taspinar/twitterscraper/tree/master/twitterscraper #### Url Watches all messages in a room and if a url is found tries to fetch it and -spit out the title if found. +spit out the title if found. + +Defaults to off and needs to be activated on every room you want this. Commands: @@ -170,7 +172,7 @@ Commands: Example: -* !urltitles status +* !url status ## Bot setup From 5dffd2db509d6f99142b7473202df33f98477e15 Mon Sep 17 00:00:00 2001 From: Tatu Wikman Date: Mon, 20 Jan 2020 23:32:26 +0200 Subject: [PATCH 5/5] Remove old todo comment --- modules/url.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/modules/url.py b/modules/url.py index 1302cf0..65e2b82 100644 --- a/modules/url.py +++ b/modules/url.py @@ -12,8 +12,6 @@ class MatrixModule: Simple url fetch and spit out title module. Everytime a url is seen in a message we do http request to it and try to get a title tag contents to spit out to the room. - - TODO: on/off switch... """ bot = None