Catch errors when requesting resource version

This commit is contained in:
Scriptim 2024-10-27 17:04:58 +01:00
parent eb80c67535
commit 2193adadb4
No known key found for this signature in database
GPG key ID: 1ABB18EA42CCAAF6

View file

@ -198,23 +198,26 @@ class HttpCrawler(Crawler):
Requests the ETag and Last-Modified headers of a resource via a HEAD request. Requests the ETag and Last-Modified headers of a resource via a HEAD request.
If no entity tag / modification date can be obtained, the according value will be None. If no entity tag / modification date can be obtained, the according value will be None.
""" """
async with self.session.head(resource_url) as resp: try:
if resp.status != 200: async with self.session.head(resource_url) as resp:
return None, None if resp.status != 200:
return None, None
etag_header = resp.headers.get("ETag") etag_header = resp.headers.get("ETag")
last_modified_header = resp.headers.get("Last-Modified") last_modified_header = resp.headers.get("Last-Modified")
if last_modified_header: if last_modified_header:
try: try:
# https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Last-Modified#directives # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Last-Modified#directives
datetime_format = "%a, %d %b %Y %H:%M:%S GMT" datetime_format = "%a, %d %b %Y %H:%M:%S GMT"
last_modified = datetime.strptime(last_modified_header, datetime_format) last_modified = datetime.strptime(last_modified_header, datetime_format)
except ValueError: except ValueError:
# last_modified remains None # last_modified remains None
pass pass
return etag_header, last_modified return etag_header, last_modified
except aiohttp.ClientError:
return None, None
async def run(self) -> None: async def run(self) -> None:
self._request_count = 0 self._request_count = 0