2020-01-11 01:59:07 +02:00
|
|
|
import re
|
|
|
|
import shlex
|
2020-01-11 02:01:41 +02:00
|
|
|
from functools import lru_cache
|
|
|
|
|
2020-01-11 01:59:07 +02:00
|
|
|
import httpx
|
2020-01-29 11:43:23 +02:00
|
|
|
from bs4 import BeautifulSoup
|
2020-02-06 21:56:53 +02:00
|
|
|
from nio import RoomMessageText, AsyncClient
|
2020-01-11 01:59:07 +02:00
|
|
|
|
2020-02-02 23:08:15 +02:00
|
|
|
from modules.common.module import BotModule
|
2020-01-11 01:59:07 +02:00
|
|
|
|
2020-02-02 23:08:15 +02:00
|
|
|
|
|
|
|
class MatrixModule(BotModule):
|
2020-01-11 01:59:07 +02:00
|
|
|
"""
|
|
|
|
Simple url fetch and spit out title module.
|
|
|
|
|
|
|
|
Everytime a url is seen in a message we do http request to it and try to get a title tag contents to spit out to the room.
|
|
|
|
"""
|
|
|
|
|
2020-02-06 21:56:53 +02:00
|
|
|
def __init__(self, name):
|
|
|
|
super().__init__(name)
|
2020-01-11 01:59:07 +02:00
|
|
|
|
2020-02-06 21:56:53 +02:00
|
|
|
self.bot = None
|
|
|
|
self.status = dict() # room_id -> what to do with urls
|
2020-03-28 22:04:15 +02:00
|
|
|
self.type = "m.notice" # notice or text
|
2020-02-06 21:56:53 +02:00
|
|
|
|
|
|
|
self.STATUSES = {
|
|
|
|
"OFF": "Not spamming this channel",
|
|
|
|
"TITLE": "Spamming this channel with titles",
|
|
|
|
"DESCRIPTION": "Spamming this channel with descriptions",
|
|
|
|
"BOTH": "Spamming this channel with both title and description",
|
|
|
|
}
|
2020-01-11 01:59:07 +02:00
|
|
|
|
|
|
|
def matrix_start(self, bot):
|
|
|
|
"""
|
|
|
|
Register callback for all RoomMessageText events on startup
|
|
|
|
"""
|
2020-02-06 21:56:53 +02:00
|
|
|
super().matrix_start(bot)
|
2020-01-11 01:59:07 +02:00
|
|
|
self.bot = bot
|
|
|
|
bot.client.add_event_callback(self.text_cb, RoomMessageText)
|
|
|
|
|
2020-02-06 21:56:53 +02:00
|
|
|
def matrix_stop(self, bot):
|
|
|
|
super().matrix_stop(bot)
|
|
|
|
bot.remove_callback(self.text_cb)
|
|
|
|
|
2020-01-11 01:59:07 +02:00
|
|
|
async def text_cb(self, room, event):
|
|
|
|
"""
|
|
|
|
Handle client callbacks for all room text events
|
|
|
|
"""
|
|
|
|
# no content at all?
|
|
|
|
if len(event.body) < 1:
|
|
|
|
return
|
|
|
|
|
|
|
|
# are we on in this room?
|
|
|
|
status = self.status.get(room.room_id, "OFF")
|
|
|
|
if status not in self.STATUSES:
|
|
|
|
return
|
|
|
|
if status == "OFF":
|
|
|
|
return
|
|
|
|
|
|
|
|
# extract possible urls from message
|
|
|
|
urls = re.findall(r"(https?://\S+)", event.body)
|
|
|
|
|
|
|
|
# no urls, nothing to do
|
|
|
|
if len(urls) == 0:
|
|
|
|
return
|
|
|
|
|
|
|
|
# fetch the urls and if we can see a title spit it out
|
|
|
|
for url in urls:
|
|
|
|
try:
|
|
|
|
title, description = self.get_content_from_url(url)
|
|
|
|
except Exception:
|
|
|
|
# failed fetching, give up
|
|
|
|
continue
|
|
|
|
|
|
|
|
msg = None
|
|
|
|
|
2020-01-11 02:01:41 +02:00
|
|
|
if status == "TITLE" and title is not None:
|
|
|
|
msg = f"Title: {title}"
|
|
|
|
elif status == "DESCRIPTION" and description is not None:
|
|
|
|
msg = f"Description: {description}"
|
2020-01-11 01:59:07 +02:00
|
|
|
|
2020-01-11 02:01:41 +02:00
|
|
|
elif status == "BOTH" and title is not None and description is not None:
|
|
|
|
msg = f"Title: {title}\nDescription: {description}"
|
2020-01-11 01:59:07 +02:00
|
|
|
|
2020-01-11 02:01:41 +02:00
|
|
|
elif status == "BOTH" and title is not None:
|
|
|
|
msg = f"Title: {title}"
|
|
|
|
elif status == "BOTH" and description is not None:
|
|
|
|
msg = f"Description: {description}"
|
2020-01-11 01:59:07 +02:00
|
|
|
|
|
|
|
if msg is not None:
|
2020-03-28 22:04:15 +02:00
|
|
|
await self.bot.send_text(room, msg, self.type)
|
2020-01-11 01:59:07 +02:00
|
|
|
|
|
|
|
@lru_cache(maxsize=128)
|
|
|
|
def get_content_from_url(self, url):
|
|
|
|
"""
|
|
|
|
Fetch url and try to get the title and description from the response
|
|
|
|
"""
|
|
|
|
title = None
|
|
|
|
description = None
|
|
|
|
|
|
|
|
try:
|
|
|
|
r = httpx.get(url)
|
|
|
|
except Exception as e:
|
2020-02-08 23:16:19 +02:00
|
|
|
self.logger.error(f"Failed fetching url {url}. Error: {e}")
|
2020-01-11 01:59:07 +02:00
|
|
|
return (title, description)
|
|
|
|
|
|
|
|
if r.status_code != 200:
|
2020-02-08 23:16:19 +02:00
|
|
|
self.logger.info(f"Failed fetching url {url}. Status code: {r.status_code}")
|
2020-01-11 01:59:07 +02:00
|
|
|
return (title, description)
|
|
|
|
|
|
|
|
# try parse and get the title
|
|
|
|
try:
|
2020-01-29 11:43:23 +02:00
|
|
|
soup = BeautifulSoup(r.text, "html.parser")
|
2020-02-08 12:46:40 +02:00
|
|
|
title = soup.title.string.strip()
|
2020-01-29 11:43:23 +02:00
|
|
|
descr_tag = soup.find("meta", attrs={"name": "description"})
|
|
|
|
if descr_tag:
|
|
|
|
description = descr_tag.get("content", None)
|
|
|
|
except Exception as e:
|
2020-02-08 23:16:19 +02:00
|
|
|
self.logger.error(f"Failed parsing response from url {url}. Error: {e}")
|
2020-01-11 01:59:07 +02:00
|
|
|
return (title, description)
|
|
|
|
|
2020-03-19 22:27:49 +02:00
|
|
|
# Issue 63 patch - Title should not contain newlines or tabs
|
|
|
|
if title is not None:
|
|
|
|
assert isinstance(title, str)
|
|
|
|
title = title.replace('\n', '')
|
|
|
|
title = title.replace('\t', '')
|
2020-01-11 01:59:07 +02:00
|
|
|
return (title, description)
|
|
|
|
|
|
|
|
async def matrix_message(self, bot, room, event):
|
|
|
|
"""
|
|
|
|
commands for setting what to do in this channel
|
|
|
|
"""
|
|
|
|
bot.must_be_admin(room, event)
|
|
|
|
|
|
|
|
args = shlex.split(event.body)
|
|
|
|
args.pop(0)
|
|
|
|
|
|
|
|
# save the new status
|
|
|
|
if len(args) == 1 and self.STATUSES.get(args[0].upper()) is not None:
|
|
|
|
self.status[room.room_id] = args[0].upper()
|
|
|
|
bot.save_settings()
|
|
|
|
await bot.send_text(
|
|
|
|
room, f"Ok, {self.STATUSES.get(self.status[room.room_id])}"
|
|
|
|
)
|
|
|
|
return
|
|
|
|
|
|
|
|
# show status
|
|
|
|
elif len(args) == 1 and args[0] == "status":
|
|
|
|
await bot.send_text(
|
|
|
|
room, self.STATUSES.get(self.status.get(room.room_id, "OFF"))
|
|
|
|
)
|
|
|
|
return
|
|
|
|
|
2020-03-28 22:04:15 +02:00
|
|
|
# set type to notice
|
|
|
|
elif len(args) == 1 and args[0] == "notice":
|
|
|
|
bot.must_be_owner(event)
|
|
|
|
self.type = "m.notice"
|
|
|
|
bot.save_settings()
|
|
|
|
await bot.send_text(
|
|
|
|
room, "Sending titles as notices from now on."
|
|
|
|
)
|
|
|
|
return
|
|
|
|
|
|
|
|
# show status
|
|
|
|
elif len(args) == 1 and args[0] == "text":
|
|
|
|
bot.must_be_owner(event)
|
|
|
|
self.type = "m.text"
|
|
|
|
bot.save_settings()
|
|
|
|
await bot.send_text(
|
|
|
|
room, "Sending titles as text from now on."
|
|
|
|
)
|
|
|
|
return
|
|
|
|
|
2020-01-11 01:59:07 +02:00
|
|
|
# invalid command
|
|
|
|
await bot.send_text(
|
|
|
|
room,
|
2020-03-28 22:04:15 +02:00
|
|
|
"Sorry, I did not understand. See README for command list.",
|
2020-01-11 01:59:07 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
return
|
|
|
|
|
2020-01-29 12:34:22 +02:00
|
|
|
def get_settings(self):
|
2020-02-06 01:19:45 +02:00
|
|
|
data = super().get_settings()
|
|
|
|
data['status'] = self.status
|
|
|
|
return data
|
2020-01-29 12:34:22 +02:00
|
|
|
|
|
|
|
def set_settings(self, data):
|
2020-02-06 01:19:45 +02:00
|
|
|
super().set_settings(data)
|
2020-01-29 12:34:22 +02:00
|
|
|
if data.get("status"):
|
|
|
|
self.status = data["status"]
|
2020-03-28 22:04:15 +02:00
|
|
|
if data.get("type"):
|
|
|
|
self.type = data["type"]
|
2020-01-29 12:34:22 +02:00
|
|
|
|
2020-01-11 01:59:07 +02:00
|
|
|
def help(self):
|
|
|
|
return "If I see a url in a message I will try to get the title from the page and spit it out"
|