mirror of
https://github.com/Garmelon/PFERD.git
synced 2026-04-12 23:45:05 +02:00
Only call should_crawl once
This commit is contained in:
parent
81301f3a76
commit
8cfa818f04
1 changed files with 6 additions and 0 deletions
|
|
@ -130,6 +130,12 @@ class KitIliasWebCrawler(HttpCrawler):
|
|||
@arepeat(3)
|
||||
@anoncritical
|
||||
async def _handle_ilias_page(self, url: str, parent: IliasPageElement, path: PurePath) -> None:
|
||||
# We might not want to crawl this directory-ish page.
|
||||
# This is not in #handle_element, as the download methods check it themselves and therefore
|
||||
# would perform this check twice - messing with the explain output
|
||||
if not self.should_crawl(path):
|
||||
return
|
||||
|
||||
tasks = []
|
||||
async with self.crawl_bar(path):
|
||||
soup = await self._get_page(url)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue