From f47d2f11d843bfd3307815b231dd3e3df0265cef Mon Sep 17 00:00:00 2001 From: I-Al-Istannen Date: Tue, 25 Oct 2022 20:28:06 +0200 Subject: [PATCH] Append trailing slash to kit-ipd links to ensure urljoin works as expected --- CHANGELOG.md | 1 + PFERD/crawl/kit_ipd_crawler.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c7a9899..24d9fa6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ ambiguous situations. ### Fixed - Forum crawling crashing when parsing empty (= 0 messages) threads - Forum crawling crashing when a forum has no threads at all +- kit-ipd crawler if URL did not end with a trailing slash ## 3.4.1 - 2022-08-17 diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py index d9fac32..338e059 100644 --- a/PFERD/crawl/kit_ipd_crawler.py +++ b/PFERD/crawl/kit_ipd_crawler.py @@ -24,6 +24,9 @@ class KitIpdCrawlerSection(HttpCrawlerSection): if not target.startswith("https://"): self.invalid_value("target", target, "Should be a URL") + if not target.endswith("/"): + target = target + "/" + return target def link_regex(self) -> Pattern[str]: