mirror of
https://github.com/Garmelon/PFERD.git
synced 2026-04-12 15:35:05 +02:00
Simplified default regex
This commit is contained in:
parent
13d4835d70
commit
deadfb2bf8
1 changed files with 1 additions and 1 deletions
|
|
@ -125,7 +125,7 @@ class KitIpdCrawler(HttpCrawler):
|
|||
def _fetch_file_regex(self) -> re.Pattern:
|
||||
if "link_regex" in self._config:
|
||||
return re.compile(self._config["link_regex"])
|
||||
return re.compile(".*\/[^\/]*\.(?:(?:pdf)|(?:zip)|(?:c)|(?:java))")
|
||||
return re.compile(".*\/[^\/]*\.(?:pdf|zip|c|java)")
|
||||
def _abs_url_from_link(self, link_tag: Tag) -> str:
|
||||
return urljoin(self._url, link_tag.get("href"))
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue