From 9f7aff761d86405bd223f4594370bb3ed243d948 Mon Sep 17 00:00:00 2001 From: Toorero <22551563+Toorero@users.noreply.github.com> Date: Mon, 1 Nov 2021 00:22:28 +0100 Subject: [PATCH] fixed regex needs to match whole href --- PFERD/crawl/kit_ipd_crawler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py index ffeb734..6af22f6 100644 --- a/PFERD/crawl/kit_ipd_crawler.py +++ b/PFERD/crawl/kit_ipd_crawler.py @@ -28,7 +28,7 @@ class KitIpdCrawlerSection(HttpCrawlerSection): return target def link_regex(self) -> Pattern[AnyStr]: - regex = self.s.get("link_regex", ".*/[^/]*\.(?:pdf|zip|c|java)") + regex = self.s.get("link_regex", "^.*/[^/]*\.(?:pdf|zip|c|java)$") return re.compile(regex)