mirror of
https://github.com/Garmelon/PFERD.git
synced 2026-04-12 23:45:05 +02:00
Use raw paths for --debug-transforms
Previously, the already-transformed paths were used, which meant that --debug-transforms was cumbersome to use (as you had to remove all transforms and crawl once before getting useful results).
This commit is contained in:
parent
64a2960751
commit
7b062883f6
2 changed files with 19 additions and 1 deletions
|
|
@ -264,6 +264,7 @@ class Crawler(ABC):
|
|||
async def crawl(self, path: PurePath) -> Optional[CrawlToken]:
|
||||
log.explain_topic(f"Decision: Crawl {fmt_path(path)}")
|
||||
path = self._deduplicator.mark(path)
|
||||
self._output_dir.report.found(path)
|
||||
|
||||
if self._transformer.transform(path) is None:
|
||||
log.explain("Answer: No")
|
||||
|
|
@ -282,6 +283,7 @@ class Crawler(ABC):
|
|||
) -> Optional[DownloadToken]:
|
||||
log.explain_topic(f"Decision: Download {fmt_path(path)}")
|
||||
path = self._deduplicator.mark(path)
|
||||
self._output_dir.report.found(path)
|
||||
|
||||
transformed_path = self._transformer.transform(path)
|
||||
if transformed_path is None:
|
||||
|
|
@ -339,7 +341,7 @@ class Crawler(ABC):
|
|||
return
|
||||
|
||||
seen: Set[PurePath] = set()
|
||||
for known in sorted(self.prev_report.known_files):
|
||||
for known in sorted(self.prev_report.found_paths):
|
||||
looking_at = list(reversed(known.parents)) + [known]
|
||||
for path in looking_at:
|
||||
if path in seen:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue