From 9ce20216b5f11949be7c818c3f78d956bb5e7162 Mon Sep 17 00:00:00 2001 From: I-Al-Istannen Date: Mon, 24 May 2021 18:32:18 +0200 Subject: [PATCH] Do not set a timeout for whole HTTP request Downloads might take longer! --- PFERD/crawl/http_crawler.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py index f0370a3..177972b 100644 --- a/PFERD/crawl/http_crawler.py +++ b/PFERD/crawl/http_crawler.py @@ -155,7 +155,15 @@ class HttpCrawler(Crawler): async with aiohttp.ClientSession( headers={"User-Agent": f"{NAME}/{VERSION}"}, cookie_jar=self._cookie_jar, - timeout=ClientTimeout(total=self._http_timeout) + timeout=ClientTimeout( + # 30 minutes. No download in the history of downloads was longer than 30 minutes. + # This is enough to transfer a 600 MB file over a 3 Mib/s connection. + # Allowing an arbitrary value could be annoying for overnight batch jobs + total=15 * 60, + connect=self._http_timeout, + sock_connect=self._http_timeout, + sock_read=self._http_timeout, + ) ) as session: self.session = session try: