New module, url titles

If activated in a room this will spit out the titles of urls mentioned in the room.
This commit is contained in:
Tatu Wikman 2020-01-11 00:11:35 +02:00
parent 0723dea55e
commit f3d42aa1ed
No known key found for this signature in database
GPG Key ID: BF214F789950B74E
3 changed files with 152 additions and 9 deletions

View File

@ -14,12 +14,16 @@ google-auth-oauthlib = "*"
requests = "*" requests = "*"
igramscraper = "*" igramscraper = "*"
twitterscraper = "*" twitterscraper = "*"
httpx = "*"
[dev-packages] [dev-packages]
pylint = "*" pylint = "*"
pycodestyle = "*" pycodestyle = "*"
flake8 = "*" flake8 = "*"
autopep8 = "*" autopep8 = "*"
black = "*"
ipython = "*"
isort = "*"
[requires] [requires]
python_version = "3.7" python_version = "3.7"

View File

@ -155,6 +155,21 @@ without any authentication or api key.
See: https://github.com/taspinar/twitterscraper/tree/master/twitterscraper See: https://github.com/taspinar/twitterscraper/tree/master/twitterscraper
#### Url titles
Watches all messages in a room and if a url is found tries to fetch it and
spit out the title if found.
Commands:
* !urltitles on - spam titles to room
* !urltitles off - stop spamming
* !urltitles status - show current status
Example:
* !urltitles status
## Bot setup ## Bot setup
* Create a Matrix user * Create a Matrix user

124
modules/urltitles.py Normal file
View File

@ -0,0 +1,124 @@
import re
import shlex
import httpx
from lxml.html.soupparser import fromstring
from nio import RoomMessageText
from functools import lru_cache
class MatrixModule:
"""
Simple url fetch and spit out title module.
Everytime a url is seen in a message we do http request to it and try to get a title tag contents to spit out to the room.
TODO: on/off switch...
"""
bot = None
onoff = dict() # room_id -> true or false
def matrix_start(self, bot):
"""
Register callback for all RoomMessageText events on startup
"""
self.bot = bot
bot.client.add_event_callback(self.text_cb, RoomMessageText)
async def text_cb(self, room, event):
"""
Handle client callbacks for all room text events
"""
# no content at all?
if len(event.body) < 1:
return
# are we on in this room?
if self.onoff.get(room.room_id) is not True:
return
# extract possible urls from message
urls = re.findall(r"(https?://\S+)", event.body)
# no urls, nothing to do
if len(urls) == 0:
return
# fetch the urls and if we can see a title spit it out
for url in urls:
title = self.get_title_from_url(url)
if title is not None:
await self.bot.send_html(room, f"Title: {title}", f"Title: {title}")
@lru_cache(maxsize=128)
def get_title_from_url(self, url):
"""
Fetch url and try to get the title from the response, returns either the title or None
"""
try:
r = httpx.get(url)
except Exception as e:
# if it failed then it failed, no point in trying anything fancy
# this is just a title spitting bot :)
return None
if r.status_code != 200:
return None
# try parse and get the title
try:
elem = fromstring(r.text).find(".//head/title")
except Exception as e:
# again, no point in trying anything else
return None
if elem is not None:
return elem.text
# no bonus
return None
async def matrix_message(self, bot, room, event):
"""
on off switch
"""
bot.must_be_admin(room, event)
args = shlex.split(event.body)
args.pop(0)
if len(args) == 1:
if args[0] == "on":
self.onoff[room.room_id] = True
bot.save_settings()
await bot.send_text(
room, "Ok, I will spam titles from urls I see on this room."
)
return
if args[0] == "off":
self.onoff[room.room_id] = False
bot.save_settings()
await bot.send_text(
room, "Ok, not spamming titles in this room anymore."
)
return
if args[0] == "status":
if self.onoff.get(room.room_id) is not True:
await bot.send_text(
room, "Nope, I'm not spamming you with titles."
)
else:
await bot.send_text(
room, "Yup, spamming you with titles from urls seen."
)
return
await bot.send_text(
room,
"Sorry, I did not understand. I only understand 'on', 'off' and 'status' commands",
)
return
def help(self):
return "If I see a url in a message I will try to get the title from the page and spit it out"