Lie user agent to fix Youtube. Fixes #115.
This commit is contained in:
parent
19aaa1a546
commit
9b6d8efaf7
|
@ -45,6 +45,9 @@ class MatrixModule(BotModule):
|
||||||
bot.client.add_event_callback(self.text_cb, RoomMessageText)
|
bot.client.add_event_callback(self.text_cb, RoomMessageText)
|
||||||
# extend the useragent string to contain version and bot name
|
# extend the useragent string to contain version and bot name
|
||||||
self.useragent = f"Mozilla/5.0 (compatible; Hemppa/{self.bot.version}; {self.bot.client.user}; +https://github.com/vranki/hemppa/)"
|
self.useragent = f"Mozilla/5.0 (compatible; Hemppa/{self.bot.version}; {self.bot.client.user}; +https://github.com/vranki/hemppa/)"
|
||||||
|
# Actually no - for example Youtube doesn't server titles for proper Hemppa user agent!
|
||||||
|
# Lie and say we are generic Firefox. Blame Youtube..
|
||||||
|
self.useragent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0"
|
||||||
self.logger.debug(f"useragent: {self.useragent}")
|
self.logger.debug(f"useragent: {self.useragent}")
|
||||||
|
|
||||||
|
|
||||||
|
@ -168,12 +171,18 @@ class MatrixModule(BotModule):
|
||||||
# try parse and get the title
|
# try parse and get the title
|
||||||
try:
|
try:
|
||||||
soup = BeautifulSoup(responsetext, "html.parser")
|
soup = BeautifulSoup(responsetext, "html.parser")
|
||||||
# Prefer og:title first (for example Youtube uses this)
|
|
||||||
ogtitle = soup.find("meta", property="og:title")
|
if soup.title and len(soup.title.string) > 0:
|
||||||
if ogtitle:
|
title = soup.title.string
|
||||||
title = ogtitle["content"]
|
else:
|
||||||
elif soup.head and soup.head.title:
|
title_tag = soup.find("meta", attrs={"name": "title"})
|
||||||
title = soup.head.title.string.strip()
|
ogtitle = soup.find("meta", property="og:title")
|
||||||
|
if title_tag:
|
||||||
|
title = descr_tag.get("content", None)
|
||||||
|
elif ogtitle:
|
||||||
|
title = ogtitle["content"]
|
||||||
|
elif soup.head and soup.head.title:
|
||||||
|
title = soup.head.title.string.strip()
|
||||||
descr_tag = soup.find("meta", attrs={"name": "description"})
|
descr_tag = soup.find("meta", attrs={"name": "description"})
|
||||||
if descr_tag:
|
if descr_tag:
|
||||||
description = descr_tag.get("content", None)
|
description = descr_tag.get("content", None)
|
||||||
|
@ -181,7 +190,7 @@ class MatrixModule(BotModule):
|
||||||
self.logger.warning(f"Failed parsing response from url {url}. Error: {e}")
|
self.logger.warning(f"Failed parsing response from url {url}. Error: {e}")
|
||||||
return (title, description)
|
return (title, description)
|
||||||
|
|
||||||
# Issue 63 patch - Title should not contain newlines or tabs
|
# Title should not contain newlines or tabs
|
||||||
if title is not None:
|
if title is not None:
|
||||||
assert isinstance(title, str)
|
assert isinstance(title, str)
|
||||||
title = title.replace("\n", "")
|
title = title.replace("\n", "")
|
||||||
|
|
Loading…
Reference in New Issue