url: fix reading streaming responses forever, fixes #79
This commit is contained in:
parent
2cc98e3524
commit
b818353885
|
@ -4,7 +4,7 @@ from functools import lru_cache
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from nio import RoomMessageText, AsyncClient
|
from nio import RoomMessageText
|
||||||
|
|
||||||
from modules.common.module import BotModule
|
from modules.common.module import BotModule
|
||||||
|
|
||||||
|
@ -56,7 +56,7 @@ class MatrixModule(BotModule):
|
||||||
# skip edited content to prevent spamming the same thing multiple times
|
# skip edited content to prevent spamming the same thing multiple times
|
||||||
if "content" in event.source:
|
if "content" in event.source:
|
||||||
if "m.new_content" in event.source["content"]:
|
if "m.new_content" in event.source["content"]:
|
||||||
self.logger.debug(f"Skipping edited event to prevent spam")
|
self.logger.debug("Skipping edited event to prevent spam")
|
||||||
return
|
return
|
||||||
|
|
||||||
# are we on in this room?
|
# are we on in this room?
|
||||||
|
@ -114,9 +114,29 @@ class MatrixModule(BotModule):
|
||||||
"""
|
"""
|
||||||
title = None
|
title = None
|
||||||
description = None
|
description = None
|
||||||
|
# timeout will still handle network timeouts
|
||||||
timeout = httpx.Timeout(10.0, connect=2.0, read=5.0)
|
timeout = httpx.Timeout(10.0, connect=2.0, read=5.0)
|
||||||
|
responsetext = "" # read our response here
|
||||||
try:
|
try:
|
||||||
r = httpx.get(url, timeout=timeout)
|
self.logger.debug(f"start streaming {url}")
|
||||||
|
# stream the response so that we can set a upper limit on how much we want to fetch.
|
||||||
|
# as we are using stream the r.text wont be available, save our read data ourself
|
||||||
|
|
||||||
|
# maximum size to read of the response in characters (this prevents us from reading stream forever)
|
||||||
|
maxsize = 100000
|
||||||
|
with httpx.stream("GET", url, timeout=timeout) as r:
|
||||||
|
for part in r.iter_text():
|
||||||
|
self.logger.debug(
|
||||||
|
f"reading response stream, limiting in {maxsize} bytes"
|
||||||
|
)
|
||||||
|
|
||||||
|
responsetext += part
|
||||||
|
maxsize -= len(part)
|
||||||
|
|
||||||
|
if maxsize < 0:
|
||||||
|
break
|
||||||
|
|
||||||
|
self.logger.debug(f"end streaming {url}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Failed fetching url {url}. Error: {e}")
|
self.logger.warning(f"Failed fetching url {url}. Error: {e}")
|
||||||
return (title, description)
|
return (title, description)
|
||||||
|
@ -129,7 +149,7 @@ class MatrixModule(BotModule):
|
||||||
|
|
||||||
# try parse and get the title
|
# try parse and get the title
|
||||||
try:
|
try:
|
||||||
soup = BeautifulSoup(r.text, "html.parser")
|
soup = BeautifulSoup(responsetext, "html.parser")
|
||||||
# Prefer og:title first (for example Youtube uses this)
|
# Prefer og:title first (for example Youtube uses this)
|
||||||
ogtitle = soup.find("meta", property="og:title")
|
ogtitle = soup.find("meta", property="og:title")
|
||||||
if ogtitle:
|
if ogtitle:
|
||||||
|
|
Loading…
Reference in New Issue