Merge pull request #15 from tswfi/url_title_spam

New url module
This commit is contained in:
Ville Ranki 2020-01-20 23:34:39 +02:00 committed by GitHub
commit c6adf105f5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 187 additions and 9 deletions

View File

@ -14,12 +14,16 @@ google-auth-oauthlib = "*"
requests = "*" requests = "*"
igramscraper = "*" igramscraper = "*"
twitterscraper = "*" twitterscraper = "*"
httpx = "*"
[dev-packages] [dev-packages]
pylint = "*" pylint = "*"
pycodestyle = "*" pycodestyle = "*"
flake8 = "*" flake8 = "*"
autopep8 = "*" autopep8 = "*"
black = "*"
ipython = "*"
isort = "*"
[requires] [requires]
python_version = "3.7" python_version = "3.7"

View File

@ -155,6 +155,25 @@ without any authentication or api key.
See: https://github.com/taspinar/twitterscraper/tree/master/twitterscraper See: https://github.com/taspinar/twitterscraper/tree/master/twitterscraper
#### Url
Watches all messages in a room and if a url is found tries to fetch it and
spit out the title if found.
Defaults to off and needs to be activated on every room you want this.
Commands:
* !url status - show current status
* !url title - spam titles to room
* !url description - spam descriptions
* !url both - spam both title and description
* !url off - stop spamming
Example:
* !url status
## Bot setup ## Bot setup
* Create a Matrix user * Create a Matrix user

155
modules/url.py Normal file
View File

@ -0,0 +1,155 @@
import re
import shlex
from functools import lru_cache
import httpx
from lxml.html.soupparser import fromstring
from nio import RoomMessageText
class MatrixModule:
"""
Simple url fetch and spit out title module.
Everytime a url is seen in a message we do http request to it and try to get a title tag contents to spit out to the room.
"""
bot = None
status = dict() # room_id -> what to do with urls
STATUSES = {
"OFF": "Not spamming this channel",
"TITLE": "Spamming this channel with titles",
"DESCRIPTION": "Spamming this channel with descriptions",
"BOTH": "Spamming this channel with both title and description",
}
def matrix_start(self, bot):
"""
Register callback for all RoomMessageText events on startup
"""
self.bot = bot
bot.client.add_event_callback(self.text_cb, RoomMessageText)
async def text_cb(self, room, event):
"""
Handle client callbacks for all room text events
"""
# no content at all?
if len(event.body) < 1:
return
# are we on in this room?
status = self.status.get(room.room_id, "OFF")
if status not in self.STATUSES:
return
if status == "OFF":
return
# extract possible urls from message
urls = re.findall(r"(https?://\S+)", event.body)
# no urls, nothing to do
if len(urls) == 0:
return
# fetch the urls and if we can see a title spit it out
for url in urls:
try:
title, description = self.get_content_from_url(url)
except Exception:
# failed fetching, give up
continue
msg = None
if status == "TITLE" and title is not None:
msg = f"Title: {title}"
elif status == "DESCRIPTION" and description is not None:
msg = f"Description: {description}"
elif status == "BOTH" and title is not None and description is not None:
msg = f"Title: {title}\nDescription: {description}"
elif status == "BOTH" and title is not None:
msg = f"Title: {title}"
elif status == "BOTH" and description is not None:
msg = f"Description: {description}"
if msg is not None:
await self.bot.send_text(room, msg)
@lru_cache(maxsize=128)
def get_content_from_url(self, url):
"""
Fetch url and try to get the title and description from the response
"""
title = None
description = None
try:
r = httpx.get(url)
except Exception as e:
# if it failed then it failed, no point in trying anything fancy
# this is just a title spitting bot :)
return (title, description)
if r.status_code != 200:
return (title, description)
# try parse and get the title
try:
titleelem = fromstring(r.text).find(".//head/title")
descriptionelem = fromstring(r.text).find(
'.//head/meta[@name="description"]'
)
except Exception:
# again, no point in trying anything else
return (title, description)
try:
if titleelem is not None:
title = titleelem.text
if descriptionelem is not None:
description = descriptionelem.attrib.get("content")
except Exception:
# if it fails it fails
pass
return (title, description)
async def matrix_message(self, bot, room, event):
"""
commands for setting what to do in this channel
"""
bot.must_be_admin(room, event)
args = shlex.split(event.body)
args.pop(0)
# save the new status
if len(args) == 1 and self.STATUSES.get(args[0].upper()) is not None:
self.status[room.room_id] = args[0].upper()
bot.save_settings()
await bot.send_text(
room, f"Ok, {self.STATUSES.get(self.status[room.room_id])}"
)
return
# show status
elif len(args) == 1 and args[0] == "status":
await bot.send_text(
room, self.STATUSES.get(self.status.get(room.room_id, "OFF"))
)
return
# invalid command
await bot.send_text(
room,
"Sorry, I did not understand. I only understand 'title', 'description', 'both' and 'status' commands",
)
return
def help(self):
return "If I see a url in a message I will try to get the title from the page and spit it out"