Url: support blacklisting certain urls (to work around youtube suckiness)
This commit is contained in:
parent
2bc62c7a37
commit
35593de3b9
|
@ -260,6 +260,7 @@ Defaults to off and needs to be activated on every room you want this.
|
|||
|
||||
You can choose to send titles as notices (as in Matrix spec) or normal
|
||||
messages (IRC users might prefer this). This is a global setting currently.
|
||||
You can set a blacklist to ignore URLs containing words from the blacklist.
|
||||
|
||||
Commands:
|
||||
|
||||
|
@ -270,10 +271,13 @@ Commands:
|
|||
* !url off - stop spamming
|
||||
* !url text - send titles as normal text (must be owner)
|
||||
* !url notice - sends titles as notices (must be owner)
|
||||
* !url blacklist list - blacklist comma separated list of url substrings
|
||||
* !url blacklist clear - clear blacklist
|
||||
|
||||
Example:
|
||||
|
||||
* !url status
|
||||
* !url blacklist www.youtube.com,www.somethingelse.com
|
||||
|
||||
NOTE: Disabled by default, i.e. you also need to enable it before activating it
|
||||
|
||||
|
|
|
@ -33,7 +33,7 @@ class MatrixModule(BotModule):
|
|||
"DESCRIPTION": "Spamming this channel with descriptions",
|
||||
"BOTH": "Spamming this channel with both title and description",
|
||||
}
|
||||
|
||||
self.blacklist = [ ]
|
||||
self.enabled = False
|
||||
|
||||
def matrix_start(self, bot):
|
||||
|
@ -45,9 +45,6 @@ class MatrixModule(BotModule):
|
|||
bot.client.add_event_callback(self.text_cb, RoomMessageText)
|
||||
# extend the useragent string to contain version and bot name
|
||||
self.useragent = f"Mozilla/5.0 (compatible; Hemppa/{self.bot.version}; {self.bot.client.user}; +https://github.com/vranki/hemppa/)"
|
||||
# Actually no - for example Youtube doesn't server titles for proper Hemppa user agent!
|
||||
# Lie and say we are generic Firefox. Blame Youtube..
|
||||
self.useragent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0"
|
||||
self.logger.debug(f"useragent: {self.useragent}")
|
||||
|
||||
|
||||
|
@ -83,6 +80,7 @@ class MatrixModule(BotModule):
|
|||
if status == "OFF":
|
||||
return
|
||||
|
||||
try:
|
||||
# extract possible urls from message
|
||||
urls = re.findall(r"(https?://\S+)", event.body)
|
||||
|
||||
|
@ -99,6 +97,14 @@ class MatrixModule(BotModule):
|
|||
self.logger.debug(f"Skipping matrix.to url (#98): {url}")
|
||||
continue
|
||||
|
||||
url_blacklisted = False
|
||||
for blacklisted in self.blacklist:
|
||||
if blacklisted in url:
|
||||
url_blacklisted = True
|
||||
if url_blacklisted:
|
||||
self.logger.debug(f"Skipping blacklisted url {url}")
|
||||
continue
|
||||
|
||||
try:
|
||||
title, description = self.get_content_from_url(url)
|
||||
except Exception as e:
|
||||
|
@ -124,6 +130,9 @@ class MatrixModule(BotModule):
|
|||
|
||||
if msg is not None:
|
||||
await self.bot.send_text(room, msg, msgtype=self.type, bot_ignore=True)
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Unexpected error in url module text_cb: {e}")
|
||||
traceback.print_exc(file=sys.stderr)
|
||||
|
||||
@lru_cache(maxsize=128)
|
||||
def get_content_from_url(self, url):
|
||||
|
@ -178,7 +187,7 @@ class MatrixModule(BotModule):
|
|||
title_tag = soup.find("meta", attrs={"name": "title"})
|
||||
ogtitle = soup.find("meta", property="og:title")
|
||||
if title_tag:
|
||||
title = descr_tag.get("content", None)
|
||||
title = title_tag.get("content", None)
|
||||
elif ogtitle:
|
||||
title = ogtitle["content"]
|
||||
elif soup.head and soup.head.title:
|
||||
|
@ -217,8 +226,9 @@ class MatrixModule(BotModule):
|
|||
|
||||
# show status
|
||||
elif len(args) == 1 and args[0] == "status":
|
||||
status = self.STATUSES.get(self.status.get(room.room_id, "OFF")) + f', URL blacklist: {self.blacklist}'
|
||||
await bot.send_text(
|
||||
room, self.STATUSES.get(self.status.get(room.room_id, "OFF"))
|
||||
room, status
|
||||
)
|
||||
return
|
||||
|
||||
|
@ -238,6 +248,17 @@ class MatrixModule(BotModule):
|
|||
await bot.send_text(room, "Sending titles as text from now on.")
|
||||
return
|
||||
|
||||
# set blacklist
|
||||
elif len(args) == 2 and args[0] == "blacklist":
|
||||
bot.must_be_owner(event)
|
||||
if args[1] == 'clear':
|
||||
self.blacklist = []
|
||||
else:
|
||||
self.blacklist = args[1].split(',')
|
||||
bot.save_settings()
|
||||
await bot.send_text(room, f"Blacklisted URLs set to {self.blacklist}")
|
||||
return
|
||||
|
||||
# invalid command
|
||||
await bot.send_text(
|
||||
room,
|
||||
|
@ -250,6 +271,7 @@ class MatrixModule(BotModule):
|
|||
data = super().get_settings()
|
||||
data["status"] = self.status
|
||||
data["type"] = self.type
|
||||
data["blacklist"] = self.blacklist
|
||||
return data
|
||||
|
||||
def set_settings(self, data):
|
||||
|
@ -258,6 +280,8 @@ class MatrixModule(BotModule):
|
|||
self.status = data["status"]
|
||||
if data.get("type"):
|
||||
self.type = data["type"]
|
||||
if data.get("blacklist"):
|
||||
self.blacklist = data["blacklist"]
|
||||
|
||||
def help(self):
|
||||
return "If I see a url in a message I will try to get the title from the page and spit it out"
|
||||
|
|
Loading…
Reference in New Issue