From 38d4f5b4c985f1d865f1538b15fe9b436de19970 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 3 Nov 2020 20:09:54 +0100
Subject: [PATCH 001/524] Do not fail only empty courses

---
 PFERD/organizer.py | 2 ++
 1 file changed, 2 insertions(+)
diff --git a/PFERD/organizer.py b/PFERD/organizer.py
index 1665f23..87bc684 100644
--- a/PFERD/organizer.py
+++ b/PFERD/organizer.py
@@ -124,6 +124,8 @@ class Organizer(Location):
         self._cleanup(self.path)
 
     def _cleanup(self, start_dir: Path) -> None:
+        if not start_dir.exists():
+            return
         paths: List[Path] = list(start_dir.iterdir())
 
         # Recursively clean paths

From f4abe3197ca976dfa8d075225dd5c17e9bde0d63 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 3 Nov 2020 20:40:09 +0100
Subject: [PATCH 002/524] Add ipd crawler

---
 PFERD/ipd.py   | 150 +++++++++++++++++++++++++++++++++++++++++++++++++
 PFERD/pferd.py |  51 +++++++++++++++++
 2 files changed, 201 insertions(+)
 create mode 100644 PFERD/ipd.py

diff --git a/PFERD/ipd.py b/PFERD/ipd.py
new file mode 100644
index 0000000..33aaff1
--- /dev/null
+++ b/PFERD/ipd.py
@@ -0,0 +1,150 @@
+"""
+Utility functions and a scraper/downloader for the IPD pages.
+"""
+import datetime
+import logging
+import math
+import os
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Callable, List, Optional
+from urllib.parse import urljoin
+
+import bs4
+import requests
+
+from PFERD.errors import FatalException
+from PFERD.utils import soupify
+
+from .logging import PrettyLogger
+from .organizer import Organizer
+from .tmp_dir import TmpDir
+from .transform import Transformable
+from .utils import stream_to_path
+
+LOGGER = logging.getLogger(__name__)
+PRETTY = PrettyLogger(LOGGER)
+
+
+@dataclass
+class IpdDownloadInfo(Transformable):
+    """
+    Information about an ipd entry.
+    """
+    url: str
+    modification_date: Optional[datetime.datetime]
+
+
+IpdDownloadStrategy = Callable[[Organizer, IpdDownloadInfo], bool]
+
+
+def ipd_download_new_or_modified(organizer: Organizer, info: IpdDownloadInfo) -> bool:
+    """
+    Accepts new files or files with a more recent modification date.
+    """
+    resolved_file = organizer.resolve(info.path)
+    if not resolved_file.exists():
+        return True
+    if not info.modification_date:
+        PRETTY.ignored_file(info.path, "could not find modification time, file exists")
+        return False
+
+    resolved_mod_time_seconds = resolved_file.stat().st_mtime
+
+    # Download if the info is newer
+    if info.modification_date.timestamp() > resolved_mod_time_seconds:
+        return True
+
+    PRETTY.ignored_file(info.path, "local file has newer or equal modification time")
+    return False
+
+
+class IpdCrawler:
+    # pylint: disable=too-few-public-methods
+    """
+    A crawler for IPD pages.
+    """
+
+    def __init__(self, base_url: str):
+        self._base_url = base_url
+
+    def _abs_url_from_link(self, link_tag: bs4.Tag) -> str:
+        """
+        Create an absolute url from an <a> tag.
+        """
+        return urljoin(self._base_url, link_tag.get("href"))
+
+    def crawl(self) -> List[IpdDownloadInfo]:
+        """
+        Crawls the playlist given in the constructor.
+        """
+        page = soupify(requests.get(self._base_url))
+
+        items: List[IpdDownloadInfo] = []
+
+        for link in page.findAll(name="a", attrs={"href": lambda x: x and x.endswith("pdf")}):
+            href: str = link.attrs.get("href")
+            name = href.split("/")[-1]
+
+            modification_date: Optional[datetime.datetime]
+            try:
+                enclosing_row: bs4.Tag = link.findParent(name="tr")
+                date_text = enclosing_row.find(name="td").text
+                modification_date = datetime.datetime.strptime(date_text, "%d.%m.%Y")
+            except ValueError:
+                modification_date = None
+
+            items.append(IpdDownloadInfo(
+                Path(name),
+                url=self._abs_url_from_link(link),
+                modification_date=modification_date
+            ))
+
+        return items
+
+
+class IpdDownloader:
+    """
+    A downloader for ipd files.
+    """
+
+    def __init__(self, tmp_dir: TmpDir, organizer: Organizer, strategy: IpdDownloadStrategy):
+        self._tmp_dir = tmp_dir
+        self._organizer = organizer
+        self._strategy = strategy
+        self._session = requests.session()
+
+    def download_all(self, infos: List[IpdDownloadInfo]) -> None:
+        """
+        Download multiple files one after the other.
+        """
+        for info in infos:
+            self.download(info)
+
+    def download(self, info: IpdDownloadInfo) -> None:
+        """
+        Download a single file.
+        """
+        if not self._strategy(self._organizer, info):
+            self._organizer.mark(info.path)
+            return
+
+        with self._session.get(info.url, stream=True) as response:
+            if response.status_code == 200:
+                tmp_file = self._tmp_dir.new_path()
+                stream_to_path(response, tmp_file, info.path.name)
+                dst_path = self._organizer.accept_file(tmp_file, info.path)
+
+                if dst_path and info.modification_date:
+                    os.utime(
+                        dst_path,
+                        times=(
+                            math.ceil(info.modification_date.timestamp()),
+                            math.ceil(info.modification_date.timestamp())
+                        )
+                    )
+
+            elif response.status_code == 403:
+                raise FatalException("Received 403. Are you not using the KIT VPN?")
+            else:
+                PRETTY.warning(f"Could not download file, got response {response.status_code}")
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index 042dd93..f57f078 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -14,6 +14,8 @@ from .errors import FatalException, swallow_and_print_errors
 from .ilias import (IliasAuthenticator, IliasCrawler, IliasDirectoryFilter,
                     IliasDownloader, IliasDownloadInfo, IliasDownloadStrategy,
                     KitShibbolethAuthenticator, download_modified_or_new)
+from .ipd import (IpdCrawler, IpdDownloader, IpdDownloadInfo,
+                  IpdDownloadStrategy, ipd_download_new_or_modified)
 from .location import Location
 from .logging import PrettyLogger, enable_logging
 from .organizer import Organizer
@@ -294,6 +296,55 @@ class Pferd(Location):
 
         return organizer
 
+    @swallow_and_print_errors
+    def ipd_kit(
+            self,
+            target: Union[PathLike, Organizer],
+            url: str,
+            transform: Transform = lambda x: x,
+            download_strategy: IpdDownloadStrategy = ipd_download_new_or_modified,
+            clean: bool = True
+    ) -> Organizer:
+        """
+        Synchronizes a folder with a DIVA playlist.
+
+        Arguments:
+            target {Union[PathLike, Organizer]} -- The organizer / target folder to use.
+            url {str} -- the url to the page
+
+        Keyword Arguments:
+            transform {Transform} -- A transformation function for the output paths. Return None
+                to ignore a file. (default: {lambdax:x})
+            download_strategy {DivaDownloadStrategy} -- A function to determine which files need to
+                be downloaded. Can save bandwidth and reduce the number of requests.
+                (default: {diva_download_new})
+            clean {bool} -- Whether to clean up when the method finishes.
+        """
+        tmp_dir = self._tmp_dir.new_subdir()
+
+        if target is None:
+            PRETTY.starting_synchronizer("None", "IPD", url)
+            raise FatalException("Got 'None' as target directory, aborting")
+
+        if isinstance(target, Organizer):
+            organizer = target
+        else:
+            organizer = Organizer(self.resolve(to_path(target)))
+
+        PRETTY.starting_synchronizer(organizer.path, "IPD", url)
+
+        elements: List[IpdDownloadInfo] = IpdCrawler(url).crawl()
+        transformed = apply_transform(transform, elements)
+
+        if self._test_run:
+            self._print_transformables(transformed)
+            return organizer
+
+        downloader = IpdDownloader(tmp_dir=tmp_dir, organizer=organizer, strategy=download_strategy)
+        downloader.download_all(transformed)
+
+        return organizer
+
     @swallow_and_print_errors
     def diva_kit(
             self,

From 0da2fafcd8b7dbe258775cb5ff4f84fa671b846f Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 4 Nov 2020 14:38:15 +0100
Subject: [PATCH 003/524] Fix links outside tables

---
 PFERD/ipd.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/PFERD/ipd.py b/PFERD/ipd.py
index 33aaff1..d602e0e 100644
--- a/PFERD/ipd.py
+++ b/PFERD/ipd.py
@@ -86,11 +86,12 @@ class IpdCrawler:
             href: str = link.attrs.get("href")
             name = href.split("/")[-1]
 
-            modification_date: Optional[datetime.datetime]
+            modification_date: Optional[datetime.datetime] = None
             try:
                 enclosing_row: bs4.Tag = link.findParent(name="tr")
-                date_text = enclosing_row.find(name="td").text
-                modification_date = datetime.datetime.strptime(date_text, "%d.%m.%Y")
+                if enclosing_row:
+                    date_text = enclosing_row.find(name="td").text
+                    modification_date = datetime.datetime.strptime(date_text, "%d.%m.%Y")
             except ValueError:
                 modification_date = None
 

From ef343dec7c9ad7554ffbaf0b7301ce99666caaa9 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 4 Nov 2020 15:06:58 +0100
Subject: [PATCH 004/524] Merge organizer download summaries

---
 PFERD/pferd.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index f57f078..c01b5fd 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -343,6 +343,11 @@ class Pferd(Location):
         downloader = IpdDownloader(tmp_dir=tmp_dir, organizer=organizer, strategy=download_strategy)
         downloader.download_all(transformed)
 
+        if clean:
+            organizer.cleanup()
+
+        self._download_summary.merge(organizer.download_summary)
+
         return organizer
 
     @swallow_and_print_errors
@@ -403,4 +408,6 @@ class Pferd(Location):
         if clean:
             organizer.cleanup()
 
+        self._download_summary.merge(organizer.download_summary)
+
         return organizer

From f830b42a3600519393341c6e720ae4612cbad75a Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 4 Nov 2020 21:49:35 +0100
Subject: [PATCH 005/524] Fix duplicate files in download summary

---
 PFERD/download_summary.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/PFERD/download_summary.py b/PFERD/download_summary.py
index 28d51b5..c8135de 100644
--- a/PFERD/download_summary.py
+++ b/PFERD/download_summary.py
@@ -40,9 +40,9 @@ class DownloadSummary:
         """
         Merges ourselves with the passed summary. Modifies this object, but not the passed one.
         """
-        self._new_files += summary.new_files
-        self._modified_files += summary.modified_files
-        self._deleted_files += summary.deleted_files
+        self._new_files = list(set(self._new_files + summary.new_files))
+        self._modified_files = list(set(self._modified_files + summary.modified_files))
+        self._deleted_files = list(set(self._deleted_files + summary.deleted_files))
 
     def add_deleted_file(self, path: Path) -> None:
         """

From 6f78fef6047886c5ce3a3aa1b0bee99157a554d7 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 4 Nov 2020 22:08:33 +0100
Subject: [PATCH 006/524] Add quoting instructions to README

---
 .github/workflows/package.yml | 2 +-
 README.md                     | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/package.yml b/.github/workflows/package.yml
index c451789..c217735 100644
--- a/.github/workflows/package.yml
+++ b/.github/workflows/package.yml
@@ -67,7 +67,7 @@ jobs:
     - name: "Upload release artifacts"
       uses: softprops/action-gh-release@v1
       with:
-        body: "Download the correct sync_url for your platform and run it in the terminal or CMD. You might need to make it executable on Linux/Mac with `chmod +x <file>`."
+        body: "Download the correct sync_url for your platform and run it in the terminal or CMD. You might need to make it executable on Linux/Mac with `chmod +x <file>`. Also please *quote the url you pass to the program* or your shell might silently screw it up!"
         files: |
           pferd_sync_url_mac
           pferd_sync_url_linux
diff --git a/README.md b/README.md
index a1cd1dd..d82f557 100644
--- a/README.md
+++ b/README.md
@@ -25,7 +25,8 @@ use, but doesn't expose all the configuration options and tweaks a full install
 does.
 
 1. Download the `sync_url` binary from the [latest release](https://github.com/Garmelon/PFERD/releases/latest).
-2. Run the binary in your terminal (`./sync_url` or `sync_url.exe` in the CMD) to see the help and use it. I'd recommend using the `--cookies` option.  
+2. Recognize that you most likely need to enclose the URL in `''` quotes to prevent your shell from interpreting `&` and other symbols
+3. Run the binary in your terminal (`./sync_url` or `sync_url.exe` in the CMD) to see the help and use it. I'd recommend using the `--cookies` option.  
   If you are on **Linux/Mac**, you need to *make the file executable* using `chmod +x <file>`.  
   If you are on **Mac**, you need to allow this unverified program to run (see e.g. [here](https://www.switchingtomac.com/tutorials/osx/how-to-run-unverified-apps-on-macos/))
 

From 6f30adcd2292556c5906976230dc27e7722417ad Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 4 Nov 2020 22:12:33 +0100
Subject: [PATCH 007/524] Fix quote type in README

---
 .github/workflows/package.yml | 2 +-
 README.md                     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/package.yml b/.github/workflows/package.yml
index c217735..1c0c353 100644
--- a/.github/workflows/package.yml
+++ b/.github/workflows/package.yml
@@ -67,7 +67,7 @@ jobs:
     - name: "Upload release artifacts"
       uses: softprops/action-gh-release@v1
       with:
-        body: "Download the correct sync_url for your platform and run it in the terminal or CMD. You might need to make it executable on Linux/Mac with `chmod +x <file>`. Also please *quote the url you pass to the program* or your shell might silently screw it up!"
+        body: "Download the correct sync_url for your platform and run it in the terminal or CMD. You might need to make it executable on Linux/Mac with `chmod +x <file>`. Also please enclose the *url you pass to the program in double quotes* or your shell might silently screw it up!"
         files: |
           pferd_sync_url_mac
           pferd_sync_url_linux
diff --git a/README.md b/README.md
index d82f557..2df0722 100644
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@ use, but doesn't expose all the configuration options and tweaks a full install
 does.
 
 1. Download the `sync_url` binary from the [latest release](https://github.com/Garmelon/PFERD/releases/latest).
-2. Recognize that you most likely need to enclose the URL in `''` quotes to prevent your shell from interpreting `&` and other symbols
+2. Recognize that you most likely need to enclose the URL in `""` quotes to prevent your shell from interpreting `&` and other symbols
 3. Run the binary in your terminal (`./sync_url` or `sync_url.exe` in the CMD) to see the help and use it. I'd recommend using the `--cookies` option.  
   If you are on **Linux/Mac**, you need to *make the file executable* using `chmod +x <file>`.  
   If you are on **Mac**, you need to allow this unverified program to run (see e.g. [here](https://www.switchingtomac.com/tutorials/osx/how-to-run-unverified-apps-on-macos/))

From 316b9d7bf4bfd864fa9ca8cb8fb3c2ca995d137f Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 4 Nov 2020 22:20:40 +0100
Subject: [PATCH 008/524] Prevent too many retries when fetching an ILIAS page

---
 PFERD/ilias/crawler.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/PFERD/ilias/crawler.py b/PFERD/ilias/crawler.py
index 7ce460e..036a479 100644
--- a/PFERD/ilias/crawler.py
+++ b/PFERD/ilias/crawler.py
@@ -593,10 +593,17 @@ class IliasCrawler:
 
         return results
 
-    def _get_page(self, url: str, params: Dict[str, Any]) -> bs4.BeautifulSoup:
+    def _get_page(self, url: str, params: Dict[str, Any],
+                  retry_count: int = 0) -> bs4.BeautifulSoup:
         """
         Fetches a page from ILIAS, authenticating when needed.
         """
+
+        if retry_count >= 4:
+            raise FatalException("Could not get a proper page after 4 tries. "
+                                 "Maybe your URL is wrong, authentication fails continuously, "
+                                 "your ILIAS connection is spotty or ILIAS is not well.")
+
         LOGGER.debug("Fetching %r", url)
 
         response = self._session.get(url, params=params)
@@ -617,7 +624,7 @@ class IliasCrawler:
 
         self._authenticator.authenticate(self._session)
 
-        return self._get_page(url, params)
+        return self._get_page(url, params, retry_count + 1)
 
     @staticmethod
     def _is_logged_in(soup: bs4.BeautifulSoup) -> bool:

From 9c4759103a0d80b17161f58ee8776d2409c46999 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 5 Nov 2020 11:25:06 +0100
Subject: [PATCH 009/524] Bump patch version

---
 README.md | 6 +++---
 setup.py  | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 2df0722..babd760 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ Ensure that you have at least Python 3.8 installed.
 To install PFERD or update your installation to the latest version, run this
 wherever you want to install or have already installed PFERD:
 ```
-$ pip install git+https://github.com/Garmelon/PFERD@v2.4.1
+$ pip install git+https://github.com/Garmelon/PFERD@v2.4.2
 ```
 
 The use of [venv] is recommended.
@@ -60,8 +60,8 @@ $ mkdir Vorlesungen
 $ cd Vorlesungen
 $ python3 -m venv .venv
 $ .venv/bin/activate
-$ pip install git+https://github.com/Garmelon/PFERD@v2.4.1
-$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.4.1/example_config.py
+$ pip install git+https://github.com/Garmelon/PFERD@v2.4.2
+$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.4.2/example_config.py
 $ python3 example_config.py
 $ deactivate
 ```
diff --git a/setup.py b/setup.py
index bac40d9..526669a 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@ from setuptools import find_packages, setup
 
 setup(
     name="PFERD",
-    version="2.4.1",
+    version="2.4.2",
     packages=find_packages(),
     install_requires=[
         "requests>=2.21.0",

From f2aba970fd572161f614f773916f85d03d8dc34d Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 10 Nov 2020 15:27:12 +0100
Subject: [PATCH 010/524] [sync_url] Sanitize path names on windows

---
 sync_url.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/sync_url.py b/sync_url.py
index d2dce94..cb837a2 100755
--- a/sync_url.py
+++ b/sync_url.py
@@ -5,16 +5,27 @@ A simple script to download a course by name from ILIAS.
 """
 
 import argparse
-from pathlib import Path
+import os
+import re
+from pathlib import Path, PurePath
+from typing import Optional
 from urllib.parse import urlparse
 
 from PFERD import Pferd
 from PFERD.cookie_jar import CookieJar
 from PFERD.ilias import (IliasCrawler, IliasElementType,
                          KitShibbolethAuthenticator)
+from PFERD.transform import re_rename
 from PFERD.utils import to_path
 
 
+def sanitize_path(path: PurePath) -> Optional[PurePath]:
+    # Escape windows illegal path characters
+    if os.name == 'nt':
+        return PurePath(re.sub(r'[<>:"/\\|?]', "", str(path)))
+    return path
+
+
 def main() -> None:
     parser = argparse.ArgumentParser()
     parser.add_argument("--test-run", action="store_true")
@@ -59,7 +70,8 @@ def main() -> None:
         target=folder,
         full_url=args.url,
         cookies=args.cookies,
-        dir_filter=dir_filter
+        dir_filter=dir_filter,
+        transform=sanitize_path
     )
 
 

From 4ac51048c115e2dfc4c04228cda65b4c16346daf Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 10 Nov 2020 20:49:14 +0100
Subject: [PATCH 011/524] Use "_" as a replacement for illegal characters

---
 sync_url.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sync_url.py b/sync_url.py
index cb837a2..ebb635b 100755
--- a/sync_url.py
+++ b/sync_url.py
@@ -15,14 +15,13 @@ from PFERD import Pferd
 from PFERD.cookie_jar import CookieJar
 from PFERD.ilias import (IliasCrawler, IliasElementType,
                          KitShibbolethAuthenticator)
-from PFERD.transform import re_rename
 from PFERD.utils import to_path
 
 
 def sanitize_path(path: PurePath) -> Optional[PurePath]:
     # Escape windows illegal path characters
     if os.name == 'nt':
-        return PurePath(re.sub(r'[<>:"/\\|?]', "", str(path)))
+        return PurePath(re.sub(r'[<>:"/\\|?]', "_", str(path)))
     return path
 
 

From 733e1ae136d69e9885046858cc907970dc6884be Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 10 Nov 2020 20:50:31 +0100
Subject: [PATCH 012/524] Bump version

---
 README.md | 6 +++---
 setup.py  | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index babd760..e35f209 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ Ensure that you have at least Python 3.8 installed.
 To install PFERD or update your installation to the latest version, run this
 wherever you want to install or have already installed PFERD:
 ```
-$ pip install git+https://github.com/Garmelon/PFERD@v2.4.2
+$ pip install git+https://github.com/Garmelon/PFERD@v2.4.3
 ```
 
 The use of [venv] is recommended.
@@ -60,8 +60,8 @@ $ mkdir Vorlesungen
 $ cd Vorlesungen
 $ python3 -m venv .venv
 $ .venv/bin/activate
-$ pip install git+https://github.com/Garmelon/PFERD@v2.4.2
-$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.4.2/example_config.py
+$ pip install git+https://github.com/Garmelon/PFERD@v2.4.3
+$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.4.3/example_config.py
 $ python3 example_config.py
 $ deactivate
 ```
diff --git a/setup.py b/setup.py
index 526669a..8335f7f 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@ from setuptools import find_packages, setup
 
 setup(
     name="PFERD",
-    version="2.4.2",
+    version="2.4.3",
     packages=find_packages(),
     install_requires=[
         "requests>=2.21.0",

From 1486a63854d38662035630efbccb29d5ccb931a9 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 10 Nov 2020 22:53:47 +0100
Subject: [PATCH 013/524] Do not collapse directory structure when sanitizing

---
 sync_url.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sync_url.py b/sync_url.py
index ebb635b..998584f 100755
--- a/sync_url.py
+++ b/sync_url.py
@@ -21,7 +21,7 @@ from PFERD.utils import to_path
 def sanitize_path(path: PurePath) -> Optional[PurePath]:
     # Escape windows illegal path characters
     if os.name == 'nt':
-        return PurePath(re.sub(r'[<>:"/\\|?]', "_", str(path)))
+        return PurePath(re.sub(r'[<>:"/|?]', "_", str(path)))
     return path
 
 

From a0ae9aee2730a75558d525bdb1d36f89f9da27ae Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 11 Nov 2020 09:36:20 +0100
Subject: [PATCH 014/524] Sanitize individual path parts

---
 sync_url.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sync_url.py b/sync_url.py
index 998584f..2b8bc56 100755
--- a/sync_url.py
+++ b/sync_url.py
@@ -21,7 +21,8 @@ from PFERD.utils import to_path
 def sanitize_path(path: PurePath) -> Optional[PurePath]:
     # Escape windows illegal path characters
     if os.name == 'nt':
-        return PurePath(re.sub(r'[<>:"/|?]', "_", str(path)))
+        sanitized_parts = [re.sub(r'[<>:"/|?]', "_", x) for x in list(path.parts)]
+        return PurePath(*sanitized_parts)
     return path
 
 

From 55e9e719ad405171d7f8de66a4831bd7f659d9fe Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 12 Nov 2020 19:32:45 +0100
Subject: [PATCH 015/524] Sanitize "/" in ilias path names

---
 PFERD/ilias/crawler.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/PFERD/ilias/crawler.py b/PFERD/ilias/crawler.py
index 036a479..f5b1ae8 100644
--- a/PFERD/ilias/crawler.py
+++ b/PFERD/ilias/crawler.py
@@ -26,6 +26,10 @@ LOGGER = logging.getLogger(__name__)
 PRETTY = PrettyLogger(LOGGER)
 
 
+def _sanitize_path_name(name: str) -> str:
+    return name.replace("/", "-")
+
+
 class IliasElementType(Enum):
     """
     The type of an ilias element.
@@ -260,7 +264,7 @@ class IliasCrawler:
         links: List[bs4.Tag] = soup.select("a.il_ContainerItemTitle")
         for link in links:
             abs_url = self._abs_url_from_link(link)
-            element_path = Path(folder_path, link.getText().strip())
+            element_path = Path(folder_path, _sanitize_path_name(link.getText().strip()))
             element_type = self._find_type_from_link(element_path, link, abs_url)
 
             if element_type == IliasElementType.REGULAR_FILE:
@@ -377,7 +381,7 @@ class IliasCrawler:
             modification_date = demangle_date(modification_date_str)
 
         # Grab the name from the link text
-        name = link_element.getText()
+        name = _sanitize_path_name(link_element.getText())
         full_path = Path(path, name + "." + file_type)
 
         return [
@@ -508,7 +512,7 @@ class IliasCrawler:
         ).getText().strip()
         title += ".mp4"
 
-        video_path: Path = Path(parent_path, title)
+        video_path: Path = Path(parent_path, _sanitize_path_name(title))
 
         video_url = self._abs_url_from_link(link)
 
@@ -580,6 +584,7 @@ class IliasCrawler:
                 # Two divs, side by side. Left is the name, right is the link ==> get left
                 # sibling
                 file_name = file_link.parent.findPrevious(name="div").getText().strip()
+                file_name = _sanitize_path_name(file_name)
                 url = self._abs_url_from_link(file_link)
 
                 LOGGER.debug("Found file %r at %r", file_name, url)

From 98834c9c951c5e7eff26986004ddc54059bcc785 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 12 Nov 2020 20:23:36 +0100
Subject: [PATCH 016/524] Bump version

---
 README.md | 6 +++---
 setup.py  | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index e35f209..388f9a4 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ Ensure that you have at least Python 3.8 installed.
 To install PFERD or update your installation to the latest version, run this
 wherever you want to install or have already installed PFERD:
 ```
-$ pip install git+https://github.com/Garmelon/PFERD@v2.4.3
+$ pip install git+https://github.com/Garmelon/PFERD@v2.4.4
 ```
 
 The use of [venv] is recommended.
@@ -60,8 +60,8 @@ $ mkdir Vorlesungen
 $ cd Vorlesungen
 $ python3 -m venv .venv
 $ .venv/bin/activate
-$ pip install git+https://github.com/Garmelon/PFERD@v2.4.3
-$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.4.3/example_config.py
+$ pip install git+https://github.com/Garmelon/PFERD@v2.4.4
+$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.4.4/example_config.py
 $ python3 example_config.py
 $ deactivate
 ```
diff --git a/setup.py b/setup.py
index 8335f7f..05fe3c2 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@ from setuptools import find_packages, setup
 
 setup(
     name="PFERD",
-    version="2.4.3",
+    version="2.4.4",
     packages=find_packages(),
     install_requires=[
         "requests>=2.21.0",

From cd90a60dee340057b75196c01f70d79a504c6fe7 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 12 Nov 2020 20:52:46 +0100
Subject: [PATCH 017/524] Move "sanitize_windows_path" to PFERD.transform

---
 PFERD/transform.py | 17 ++++++++++++++++-
 sync_url.py        | 16 +++-------------
 2 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/PFERD/transform.py b/PFERD/transform.py
index 16769df..7a05dd1 100644
--- a/PFERD/transform.py
+++ b/PFERD/transform.py
@@ -5,6 +5,8 @@ only files whose names match a regex, or renaming files from one numbering
 scheme to another.
 """
 
+import os
+import re
 from dataclasses import dataclass
 from pathlib import PurePath
 from typing import Callable, List, Optional, TypeVar
@@ -45,7 +47,8 @@ def apply_transform(
 
 # Transform combinators
 
-keep = lambda path: path
+def keep(path: PurePath) -> Optional[PurePath]:
+    return path
 
 def attempt(*args: Transform) -> Transform:
     def inner(path: PurePath) -> Optional[PurePath]:
@@ -125,3 +128,15 @@ def re_rename(regex: Regex, target: str) -> Transform:
             return path.with_name(target.format(*groups))
         return None
     return inner
+
+
+def sanitize_windows_path(path: PurePath) -> Optional[PurePath]:
+    """
+    A small function to escape characters that are forbidden in windows path names.
+    This method is a no-op on other operating systems.
+    """
+    # Escape windows illegal path characters
+    if os.name == 'nt':
+        sanitized_parts = [re.sub(r'[<>:"/|?]', "_", x) for x in list(path.parts)]
+        return PurePath(*sanitized_parts)
+    return path
diff --git a/sync_url.py b/sync_url.py
index 2b8bc56..ddd239a 100755
--- a/sync_url.py
+++ b/sync_url.py
@@ -5,27 +5,17 @@ A simple script to download a course by name from ILIAS.
 """
 
 import argparse
-import os
-import re
-from pathlib import Path, PurePath
-from typing import Optional
+from pathlib import Path
 from urllib.parse import urlparse
 
 from PFERD import Pferd
 from PFERD.cookie_jar import CookieJar
 from PFERD.ilias import (IliasCrawler, IliasElementType,
                          KitShibbolethAuthenticator)
+from PFERD.transform import sanitize_windows_path
 from PFERD.utils import to_path
 
 
-def sanitize_path(path: PurePath) -> Optional[PurePath]:
-    # Escape windows illegal path characters
-    if os.name == 'nt':
-        sanitized_parts = [re.sub(r'[<>:"/|?]', "_", x) for x in list(path.parts)]
-        return PurePath(*sanitized_parts)
-    return path
-
-
 def main() -> None:
     parser = argparse.ArgumentParser()
     parser.add_argument("--test-run", action="store_true")
@@ -71,7 +61,7 @@ def main() -> None:
         full_url=args.url,
         cookies=args.cookies,
         dir_filter=dir_filter,
-        transform=sanitize_path
+        transform=sanitize_windows_path
     )
 
 

From 8ebf0eab169a72eb0f82e410a2391886e3a6aeb1 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 17 Nov 2020 21:36:04 +0100
Subject: [PATCH 018/524] Sort download summary

---
 PFERD/download_summary.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/PFERD/download_summary.py b/PFERD/download_summary.py
index c8135de..3b9a024 100644
--- a/PFERD/download_summary.py
+++ b/PFERD/download_summary.py
@@ -5,6 +5,12 @@ from pathlib import Path
 from typing import List
 
 
+def _mergeNoDuplicate(first: List[Path], second: List[Path]) -> List[Path]:
+    tmp = list(set(first + second))
+    tmp.sort(key=lambda x: str(x.resolve()))
+    return tmp
+
+
 class DownloadSummary:
     """
     Keeps track of all new, modified or deleted files and provides a summary.
@@ -40,9 +46,9 @@ class DownloadSummary:
         """
         Merges ourselves with the passed summary. Modifies this object, but not the passed one.
         """
-        self._new_files = list(set(self._new_files + summary.new_files))
-        self._modified_files = list(set(self._modified_files + summary.modified_files))
-        self._deleted_files = list(set(self._deleted_files + summary.deleted_files))
+        self._new_files = _mergeNoDuplicate(self._new_files, summary.new_files)
+        self._modified_files = _mergeNoDuplicate(self._modified_files, summary.modified_files)
+        self._deleted_files = _mergeNoDuplicate(self._deleted_files, summary.deleted_files)
 
     def add_deleted_file(self, path: Path) -> None:
         """

From ba9215ebe81e67940f88c52eb1a42b2dc480661b Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 18 Nov 2020 10:09:45 +0100
Subject: [PATCH 019/524] Bump version

---
 README.md | 6 +++---
 setup.py  | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 388f9a4..3a877c1 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ Ensure that you have at least Python 3.8 installed.
 To install PFERD or update your installation to the latest version, run this
 wherever you want to install or have already installed PFERD:
 ```
-$ pip install git+https://github.com/Garmelon/PFERD@v2.4.4
+$ pip install git+https://github.com/Garmelon/PFERD@v2.4.5
 ```
 
 The use of [venv] is recommended.
@@ -60,8 +60,8 @@ $ mkdir Vorlesungen
 $ cd Vorlesungen
 $ python3 -m venv .venv
 $ .venv/bin/activate
-$ pip install git+https://github.com/Garmelon/PFERD@v2.4.4
-$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.4.4/example_config.py
+$ pip install git+https://github.com/Garmelon/PFERD@v2.4.5
+$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.4.5/example_config.py
 $ python3 example_config.py
 $ deactivate
 ```
diff --git a/setup.py b/setup.py
index 05fe3c2..9b226f8 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@ from setuptools import find_packages, setup
 
 setup(
     name="PFERD",
-    version="2.4.4",
+    version="2.4.5",
     packages=find_packages(),
     install_requires=[
         "requests>=2.21.0",

From ba3c7f85fae0e046889a5579586281df83999ff8 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 19 Nov 2020 19:37:28 +0100
Subject: [PATCH 020/524] Replace "\" in ILIAS paths as well

I am not sure whether anybody really uses a backslash in their names,
but I guess it can't hurt to do this for windows users.
---
 PFERD/ilias/crawler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/ilias/crawler.py b/PFERD/ilias/crawler.py
index f5b1ae8..2e37e36 100644
--- a/PFERD/ilias/crawler.py
+++ b/PFERD/ilias/crawler.py
@@ -27,7 +27,7 @@ PRETTY = PrettyLogger(LOGGER)
 
 
 def _sanitize_path_name(name: str) -> str:
-    return name.replace("/", "-")
+    return name.replace("/", "-").replace("\\", "-")
 
 
 class IliasElementType(Enum):

From 9cbea5fe06b81f37ce50c871b127b17648493367 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 23 Nov 2020 10:16:40 +0100
Subject: [PATCH 021/524] Add requirements.txt

---
 requirements.txt | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 requirements.txt

diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..f851c23
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+requests>=2.21.0
+beautifulsoup4>=4.7.1
+rich>=2.1.0

From ecdbca8fb6f40aed2ffce34b6aeb69643e35edd3 Mon Sep 17 00:00:00 2001
From: Christophe <christophe.misc+git@protonmail.ch>
Date: Wed, 2 Dec 2020 16:50:30 +0100
Subject: [PATCH 022/524] Make sync_url work relative to cwd like sane programs

---
 sync_url.py | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/sync_url.py b/sync_url.py
index ddd239a..c327b9d 100755
--- a/sync_url.py
+++ b/sync_url.py
@@ -35,19 +35,15 @@ def main() -> None:
 
     cookie_jar.load_cookies()
 
-    if args.folder is not None:
-        folder = args.folder
-        # Initialize pferd at the *parent of the passed folder*
-        # This is needed so Pferd's internal protections against escaping the working directory
-        # do not trigger (e.g. if somebody names a file in ILIAS '../../bad thing.txt')
-        pferd = Pferd(Path(Path(__file__).parent, folder).parent, test_run=args.test_run)
-    else:
-        # fetch course name from ilias
+    folder = args.folder
+    if args.folder is None:
         folder = crawler.find_element_name(args.url)
         cookie_jar.save_cookies()
 
-        # Initialize pferd at the location of the script
-        pferd = Pferd(Path(__file__).parent, test_run=args.test_run)
+    # files may not escape the pferd_root with relative paths
+    # note: Path(Path.cwd, Path(folder)) == Path(folder) if it is an absolute path
+    pferd_root = Path(Path.cwd(), Path(folder)).parent
+    pferd = Pferd(pferd_root, test_run=args.test_run)
 
     def dir_filter(_: Path, element: IliasElementType) -> bool:
         if args.no_videos:

From f3a46634913d4b9d7c21389195d8b10eb5488017 Mon Sep 17 00:00:00 2001
From: Christophe <christophe.misc+git@protonmail.ch>
Date: Wed, 2 Dec 2020 16:58:36 +0100
Subject: [PATCH 023/524] Add passive/no_prompt flag

---
 PFERD/organizer.py | 17 +++++++++++++----
 PFERD/pferd.py     |  7 +++++--
 sync_url.py        |  5 ++++-
 3 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/PFERD/organizer.py b/PFERD/organizer.py
index 87bc684..346df76 100644
--- a/PFERD/organizer.py
+++ b/PFERD/organizer.py
@@ -26,7 +26,7 @@ class FileAcceptException(Exception):
 class Organizer(Location):
     """A helper for managing downloaded files."""
 
-    def __init__(self, path: Path):
+    def __init__(self, path: Path, no_prompt: bool = False):
         """Create a new organizer for a given path."""
         super().__init__(path)
         self._known_files: Set[Path] = set()
@@ -36,6 +36,8 @@ class Organizer(Location):
 
         self.download_summary = DownloadSummary()
 
+        self.not_prompting = no_prompt
+
     def accept_file(self, src: Path, dst: PurePath) -> Optional[Path]:
         """
         Move a file to this organizer and mark it.
@@ -67,13 +69,18 @@ class Organizer(Location):
 
         if self._is_marked(dst):
             PRETTY.warning(f"File {str(dst_absolute)!r} was already written!")
-            if not prompt_yes_no(f"Overwrite file?", default=False):
+            default_action: bool = False
+            if self.not_prompting and not default_action \
+                    or not self.not_prompting and not prompt_yes_no(f"Overwrite file?", default=default_action):
                 PRETTY.ignored_file(dst_absolute, "file was written previously")
                 return None
 
         # Destination file is directory
         if dst_absolute.exists() and dst_absolute.is_dir():
-            if prompt_yes_no(f"Overwrite folder {dst_absolute} with file?", default=False):
+            default_action: bool = False
+            if self.not_prompting and default_action \
+                    or not self.not_prompting \
+                    and prompt_yes_no(f"Overwrite folder {dst_absolute} with file?", default=default_action):
                 shutil.rmtree(dst_absolute)
             else:
                 PRETTY.warning(f"Could not add file {str(dst_absolute)!r}")
@@ -144,6 +151,8 @@ class Organizer(Location):
     def _delete_file_if_confirmed(self, path: Path) -> None:
         prompt = f"Do you want to delete {path}"
 
-        if prompt_yes_no(prompt, False):
+        default_action: bool = False
+        if self.not_prompting and default_action or \
+                not self.not_prompting and prompt_yes_no(prompt, default_action):
             self.download_summary.add_deleted_file(path)
             path.unlink()
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index c01b5fd..57b15f6 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -76,12 +76,13 @@ class Pferd(Location):
             download_strategy: IliasDownloadStrategy,
             timeout: int,
             clean: bool = True,
+            no_prompt: bool = None
     ) -> Organizer:
         # pylint: disable=too-many-locals
         cookie_jar = CookieJar(to_path(cookies) if cookies else None)
         session = cookie_jar.create_session()
         tmp_dir = self._tmp_dir.new_subdir()
-        organizer = Organizer(self.resolve(to_path(target)))
+        organizer = Organizer(self.resolve(to_path(target)), no_prompt if no_prompt is not None else False)
 
         crawler = IliasCrawler(base_url, session, authenticator, dir_filter)
         downloader = IliasDownloader(tmp_dir, organizer, session,
@@ -245,6 +246,7 @@ class Pferd(Location):
             download_strategy: IliasDownloadStrategy = download_modified_or_new,
             clean: bool = True,
             timeout: int = 5,
+            no_prompt: bool = None
     ) -> Organizer:
         """
         Synchronizes a folder with a given folder on the ILIAS instance of the KIT.
@@ -289,7 +291,8 @@ class Pferd(Location):
             transform=transform,
             download_strategy=download_strategy,
             clean=clean,
-            timeout=timeout
+            timeout=timeout,
+            no_prompt=no_prompt
         )
 
         self._download_summary.merge(organizer.download_summary)
diff --git a/sync_url.py b/sync_url.py
index c327b9d..c2ffb93 100755
--- a/sync_url.py
+++ b/sync_url.py
@@ -21,6 +21,8 @@ def main() -> None:
     parser.add_argument("--test-run", action="store_true")
     parser.add_argument('-c', '--cookies', nargs='?', default=None, help="File to store cookies in")
     parser.add_argument('--no-videos', nargs='?', default=None, help="Don't download videos")
+    parser.add_argument('-p', '--passive', action="store_true",
+                        help="Don't prompt for confirmations and use sane defaults")
     parser.add_argument('url', help="URL to the course page")
     parser.add_argument('folder', nargs='?', default=None, help="Folder to put stuff into")
     args = parser.parse_args()
@@ -57,7 +59,8 @@ def main() -> None:
         full_url=args.url,
         cookies=args.cookies,
         dir_filter=dir_filter,
-        transform=sanitize_windows_path
+        transform=sanitize_windows_path,
+        no_prompt=args.passive
     )
 
 

From 49a0ca7a7c149399c93014a0de9769fd20e050bf Mon Sep 17 00:00:00 2001
From: Christophe <christophe.misc+git@protonmail.ch>
Date: Wed, 2 Dec 2020 16:59:29 +0100
Subject: [PATCH 024/524] Add myself to LICENSE

This should've been done back when I added a PR for adding sync_url but people are lazy smh.
---
 LICENSE | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/LICENSE b/LICENSE
index 2e3fa8c..26bcc0a 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-Copyright 2019-2020 Garmelon, I-Al-Istannen, danstooamerican, pavelzw
+Copyright 2019-2020 Garmelon, I-Al-Istannen, danstooamerican, pavelzw, TheChristophe
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in

From 6426060804e0212795b59adff85dbced8cff4b5b Mon Sep 17 00:00:00 2001
From: Christophe <christophe.misc+git@protonmail.ch>
Date: Wed, 2 Dec 2020 18:40:45 +0100
Subject: [PATCH 025/524] Fix relative paths bug

Introduced in 74ea03945876c94c260b590e6140a7ee50630477
---
 sync_url.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sync_url.py b/sync_url.py
index c2ffb93..14c2c9e 100755
--- a/sync_url.py
+++ b/sync_url.py
@@ -37,14 +37,15 @@ def main() -> None:
 
     cookie_jar.load_cookies()
 
-    folder = args.folder
+    folder = Path(args.folder)
     if args.folder is None:
-        folder = crawler.find_element_name(args.url)
+        folder = Path(crawler.find_element_name(args.url))
         cookie_jar.save_cookies()
 
     # files may not escape the pferd_root with relative paths
     # note: Path(Path.cwd, Path(folder)) == Path(folder) if it is an absolute path
     pferd_root = Path(Path.cwd(), Path(folder)).parent
+    folder = folder.name
     pferd = Pferd(pferd_root, test_run=args.test_run)
 
     def dir_filter(_: Path, element: IliasElementType) -> bool:

From 9f6dc56a7b88104a726af4059a2f709209ce54ea Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 2 Dec 2020 19:29:52 +0100
Subject: [PATCH 026/524] Use a strategy to decide conflict resolution

---
 PFERD/organizer.py | 54 +++++++++++++++++++++++++++++++++++-----------
 PFERD/pferd.py     | 38 ++++++++++++++++++++++----------
 sync_url.py        | 34 +++++++++++++++++++++++------
 3 files changed, 96 insertions(+), 30 deletions(-)

diff --git a/PFERD/organizer.py b/PFERD/organizer.py
index 346df76..f63e92a 100644
--- a/PFERD/organizer.py
+++ b/PFERD/organizer.py
@@ -7,8 +7,9 @@ import filecmp
 import logging
 import os
 import shutil
+from enum import Enum
 from pathlib import Path, PurePath
-from typing import List, Optional, Set
+from typing import Callable, List, Optional, Set
 
 from .download_summary import DownloadSummary
 from .location import Location
@@ -19,6 +20,25 @@ LOGGER = logging.getLogger(__name__)
 PRETTY = PrettyLogger(LOGGER)
 
 
+class FileConflictResolution(Enum):
+    """
+    The reaction when confronted with a file conflict.
+    """
+
+    OVERWRITE_EXISTING = "overwrite"
+    KEEP_EXISTING = "keep"
+    DEFAULT = "default"
+    PROMPT = "prompt"
+
+
+FileConflictResolver = Callable[[PurePath], FileConflictResolution]
+
+
+def resolve_prompt_user(_path: PurePath) -> FileConflictResolution:
+    """Resolves conflicts by always asking the user."""
+    return FileConflictResolution.PROMPT
+
+
 class FileAcceptException(Exception):
     """An exception while accepting a file."""
 
@@ -26,7 +46,7 @@ class FileAcceptException(Exception):
 class Organizer(Location):
     """A helper for managing downloaded files."""
 
-    def __init__(self, path: Path, no_prompt: bool = False):
+    def __init__(self, path: Path, conflict_resolver: FileConflictResolver = resolve_prompt_user):
         """Create a new organizer for a given path."""
         super().__init__(path)
         self._known_files: Set[Path] = set()
@@ -36,7 +56,7 @@ class Organizer(Location):
 
         self.download_summary = DownloadSummary()
 
-        self.not_prompting = no_prompt
+        self.conflict_resolver = conflict_resolver
 
     def accept_file(self, src: Path, dst: PurePath) -> Optional[Path]:
         """
@@ -69,18 +89,14 @@ class Organizer(Location):
 
         if self._is_marked(dst):
             PRETTY.warning(f"File {str(dst_absolute)!r} was already written!")
-            default_action: bool = False
-            if self.not_prompting and not default_action \
-                    or not self.not_prompting and not prompt_yes_no(f"Overwrite file?", default=default_action):
+            if self._resolve_conflict(f"Overwrite file?", dst_absolute, default=False):
                 PRETTY.ignored_file(dst_absolute, "file was written previously")
                 return None
 
         # Destination file is directory
         if dst_absolute.exists() and dst_absolute.is_dir():
-            default_action: bool = False
-            if self.not_prompting and default_action \
-                    or not self.not_prompting \
-                    and prompt_yes_no(f"Overwrite folder {dst_absolute} with file?", default=default_action):
+            prompt = f"Overwrite folder {dst_absolute} with file?"
+            if self._resolve_conflict(prompt, dst_absolute, default=False):
                 shutil.rmtree(dst_absolute)
             else:
                 PRETTY.warning(f"Could not add file {str(dst_absolute)!r}")
@@ -151,8 +167,20 @@ class Organizer(Location):
     def _delete_file_if_confirmed(self, path: Path) -> None:
         prompt = f"Do you want to delete {path}"
 
-        default_action: bool = False
-        if self.not_prompting and default_action or \
-                not self.not_prompting and prompt_yes_no(prompt, default_action):
+        if self._resolve_conflict(prompt, path, default=False):
             self.download_summary.add_deleted_file(path)
             path.unlink()
+
+    def _resolve_conflict(self, prompt: str, path: Path, default: bool) -> bool:
+        if not self.conflict_resolver:
+            return prompt_yes_no(prompt, default=default)
+
+        result = self.conflict_resolver(path)
+        if result == FileConflictResolution.DEFAULT:
+            return default
+        if result == FileConflictResolution.KEEP_EXISTING:
+            return False
+        if result == FileConflictResolution.OVERWRITE_EXISTING:
+            return True
+
+        return prompt_yes_no(prompt, default=default)
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index 57b15f6..12ead8b 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -18,7 +18,7 @@ from .ipd import (IpdCrawler, IpdDownloader, IpdDownloadInfo,
                   IpdDownloadStrategy, ipd_download_new_or_modified)
 from .location import Location
 from .logging import PrettyLogger, enable_logging
-from .organizer import Organizer
+from .organizer import FileConflictResolver, Organizer, resolve_prompt_user
 from .tmp_dir import TmpDir
 from .transform import TF, Transform, apply_transform
 from .utils import PathLike, to_path
@@ -76,13 +76,13 @@ class Pferd(Location):
             download_strategy: IliasDownloadStrategy,
             timeout: int,
             clean: bool = True,
-            no_prompt: bool = None
+            file_conflict_resolver: FileConflictResolver = resolve_prompt_user
     ) -> Organizer:
         # pylint: disable=too-many-locals
         cookie_jar = CookieJar(to_path(cookies) if cookies else None)
         session = cookie_jar.create_session()
         tmp_dir = self._tmp_dir.new_subdir()
-        organizer = Organizer(self.resolve(to_path(target)), no_prompt if no_prompt is not None else False)
+        organizer = Organizer(self.resolve(to_path(target)), file_conflict_resolver)
 
         crawler = IliasCrawler(base_url, session, authenticator, dir_filter)
         downloader = IliasDownloader(tmp_dir, organizer, session,
@@ -118,6 +118,7 @@ class Pferd(Location):
             download_strategy: IliasDownloadStrategy = download_modified_or_new,
             clean: bool = True,
             timeout: int = 5,
+            file_conflict_resolver: FileConflictResolver = resolve_prompt_user
     ) -> Organizer:
         """
         Synchronizes a folder with the ILIAS instance of the KIT.
@@ -145,6 +146,8 @@ class Pferd(Location):
             clean {bool} -- Whether to clean up when the method finishes.
             timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
                 requests bug.
+            file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
+                with overwriting or deleting files. The default always asks the user.
         """
         # This authenticator only works with the KIT ilias instance.
         authenticator = KitShibbolethAuthenticator(username=username, password=password)
@@ -160,7 +163,8 @@ class Pferd(Location):
             transform=transform,
             download_strategy=download_strategy,
             clean=clean,
-            timeout=timeout
+            timeout=timeout,
+            file_conflict_resolver=file_conflict_resolver
         )
 
         self._download_summary.merge(organizer.download_summary)
@@ -185,6 +189,7 @@ class Pferd(Location):
             download_strategy: IliasDownloadStrategy = download_modified_or_new,
             clean: bool = True,
             timeout: int = 5,
+            file_conflict_resolver: FileConflictResolver = resolve_prompt_user
     ) -> Organizer:
         """
         Synchronizes a folder with the ILIAS instance of the KIT. This method will crawl the ILIAS
@@ -211,6 +216,8 @@ class Pferd(Location):
             clean {bool} -- Whether to clean up when the method finishes.
             timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
                 requests bug.
+            file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
+                with overwriting or deleting files. The default always asks the user.
         """
         # This authenticator only works with the KIT ilias instance.
         authenticator = KitShibbolethAuthenticator(username=username, password=password)
@@ -226,7 +233,8 @@ class Pferd(Location):
             transform=transform,
             download_strategy=download_strategy,
             clean=clean,
-            timeout=timeout
+            timeout=timeout,
+            file_conflict_resolver=file_conflict_resolver
         )
 
         self._download_summary.merge(organizer.download_summary)
@@ -246,7 +254,7 @@ class Pferd(Location):
             download_strategy: IliasDownloadStrategy = download_modified_or_new,
             clean: bool = True,
             timeout: int = 5,
-            no_prompt: bool = None
+            file_conflict_resolver: FileConflictResolver = resolve_prompt_user
     ) -> Organizer:
         """
         Synchronizes a folder with a given folder on the ILIAS instance of the KIT.
@@ -273,6 +281,8 @@ class Pferd(Location):
             clean {bool} -- Whether to clean up when the method finishes.
             timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
                 requests bug.
+            file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
+                with overwriting or deleting files. The default always asks the user.
         """
         # This authenticator only works with the KIT ilias instance.
         authenticator = KitShibbolethAuthenticator(username=username, password=password)
@@ -292,7 +302,7 @@ class Pferd(Location):
             download_strategy=download_strategy,
             clean=clean,
             timeout=timeout,
-            no_prompt=no_prompt
+            file_conflict_resolver=file_conflict_resolver
         )
 
         self._download_summary.merge(organizer.download_summary)
@@ -306,7 +316,8 @@ class Pferd(Location):
             url: str,
             transform: Transform = lambda x: x,
             download_strategy: IpdDownloadStrategy = ipd_download_new_or_modified,
-            clean: bool = True
+            clean: bool = True,
+            file_conflict_resolver: FileConflictResolver = resolve_prompt_user
     ) -> Organizer:
         """
         Synchronizes a folder with a DIVA playlist.
@@ -322,6 +333,8 @@ class Pferd(Location):
                 be downloaded. Can save bandwidth and reduce the number of requests.
                 (default: {diva_download_new})
             clean {bool} -- Whether to clean up when the method finishes.
+            file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
+                with overwriting or deleting files. The default always asks the user.
         """
         tmp_dir = self._tmp_dir.new_subdir()
 
@@ -332,7 +345,7 @@ class Pferd(Location):
         if isinstance(target, Organizer):
             organizer = target
         else:
-            organizer = Organizer(self.resolve(to_path(target)))
+            organizer = Organizer(self.resolve(to_path(target)), file_conflict_resolver)
 
         PRETTY.starting_synchronizer(organizer.path, "IPD", url)
 
@@ -360,7 +373,8 @@ class Pferd(Location):
             playlist_location: str,
             transform: Transform = lambda x: x,
             download_strategy: DivaDownloadStrategy = diva_download_new,
-            clean: bool = True
+            clean: bool = True,
+            file_conflict_resolver: FileConflictResolver = resolve_prompt_user
     ) -> Organizer:
         """
         Synchronizes a folder with a DIVA playlist.
@@ -377,6 +391,8 @@ class Pferd(Location):
                 be downloaded. Can save bandwidth and reduce the number of requests.
                 (default: {diva_download_new})
             clean {bool} -- Whether to clean up when the method finishes.
+            file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
+                with overwriting or deleting files. The default always asks the user.
         """
         tmp_dir = self._tmp_dir.new_subdir()
 
@@ -392,7 +408,7 @@ class Pferd(Location):
         if isinstance(target, Organizer):
             organizer = target
         else:
-            organizer = Organizer(self.resolve(to_path(target)))
+            organizer = Organizer(self.resolve(to_path(target)), file_conflict_resolver)
 
         PRETTY.starting_synchronizer(organizer.path, "DIVA", playlist_id)
 
diff --git a/sync_url.py b/sync_url.py
index 14c2c9e..e06deb6 100755
--- a/sync_url.py
+++ b/sync_url.py
@@ -5,24 +5,35 @@ A simple script to download a course by name from ILIAS.
 """
 
 import argparse
-from pathlib import Path
+from pathlib import Path, PurePath
 from urllib.parse import urlparse
 
 from PFERD import Pferd
 from PFERD.cookie_jar import CookieJar
 from PFERD.ilias import (IliasCrawler, IliasElementType,
                          KitShibbolethAuthenticator)
+from PFERD.organizer import FileConflictResolution, resolve_prompt_user
 from PFERD.transform import sanitize_windows_path
 from PFERD.utils import to_path
 
 
+def _resolve_overwrite(_path: PurePath) -> FileConflictResolution:
+    return FileConflictResolution.OVERWRITE_EXISTING
+
+
+def _resolve_default(_path: PurePath) -> FileConflictResolution:
+    return FileConflictResolution.DEFAULT
+
+
 def main() -> None:
     parser = argparse.ArgumentParser()
     parser.add_argument("--test-run", action="store_true")
     parser.add_argument('-c', '--cookies', nargs='?', default=None, help="File to store cookies in")
     parser.add_argument('--no-videos', nargs='?', default=None, help="Don't download videos")
-    parser.add_argument('-p', '--passive', action="store_true",
+    parser.add_argument('-d', '--default', action="store_true",
                         help="Don't prompt for confirmations and use sane defaults")
+    parser.add_argument('-r', '--remove', action="store_true",
+                        help="Remove and overwrite files without prompting for confirmation")
     parser.add_argument('url', help="URL to the course page")
     parser.add_argument('folder', nargs='?', default=None, help="Folder to put stuff into")
     args = parser.parse_args()
@@ -39,13 +50,17 @@ def main() -> None:
 
     folder = Path(args.folder)
     if args.folder is None:
-        folder = Path(crawler.find_element_name(args.url))
+        element_name = crawler.find_element_name(args.url)
+        if not element_name:
+            print("Error, could not get element name. Please specify a folder yourself.")
+            return
+        folder = Path(element_name)
         cookie_jar.save_cookies()
 
     # files may not escape the pferd_root with relative paths
     # note: Path(Path.cwd, Path(folder)) == Path(folder) if it is an absolute path
     pferd_root = Path(Path.cwd(), Path(folder)).parent
-    folder = folder.name
+    target = folder.name
     pferd = Pferd(pferd_root, test_run=args.test_run)
 
     def dir_filter(_: Path, element: IliasElementType) -> bool:
@@ -53,15 +68,22 @@ def main() -> None:
             return element not in [IliasElementType.VIDEO_FILE, IliasElementType.VIDEO_FOLDER]
         return True
 
+    if args.default:
+        file_confilict_resolver = _resolve_default
+    elif args.remove:
+        file_confilict_resolver = _resolve_overwrite
+    else:
+        file_confilict_resolver = resolve_prompt_user
+
     pferd.enable_logging()
     # fetch
     pferd.ilias_kit_folder(
-        target=folder,
+        target=target,
         full_url=args.url,
         cookies=args.cookies,
         dir_filter=dir_filter,
         transform=sanitize_windows_path,
-        no_prompt=args.passive
+        file_conflict_resolver=file_confilict_resolver
     )
 
 

From fcb3884a8fcd0c8cef6f79b0083c49b45afb4ff9 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 5 Dec 2020 13:47:53 +0100
Subject: [PATCH 027/524] Add --remote-first, --local-first and --no-delete
 flags

---
 PFERD/organizer.py | 40 ++++++++++++++++++++++++++++++----------
 sync_url.py        | 39 ++++++++++++++++++++++++++-------------
 2 files changed, 56 insertions(+), 23 deletions(-)

diff --git a/PFERD/organizer.py b/PFERD/organizer.py
index f63e92a..a41d0d2 100644
--- a/PFERD/organizer.py
+++ b/PFERD/organizer.py
@@ -20,21 +20,37 @@ LOGGER = logging.getLogger(__name__)
 PRETTY = PrettyLogger(LOGGER)
 
 
+class ConflictType(Enum):
+    """
+    The type of the conflict. A file might not exist anymore and will be deleted
+    or it might be overwritten with a newer version.
+    """
+    FILE_OVERWRITTEN = "overwritten"
+    FILE_DELETED = "deleted"
+
+
 class FileConflictResolution(Enum):
     """
-    The reaction when confronted with a file conflict.
+    The reaction when confronted with a file conflict:
     """
 
-    OVERWRITE_EXISTING = "overwrite"
+    DESTROY_EXISTING = "destroy"
+    """Delete/overwrite the current file"""
+
     KEEP_EXISTING = "keep"
+    """Keep the current file"""
+
     DEFAULT = "default"
+    """Do whatever the PFERD authors thought is sensible"""
+
     PROMPT = "prompt"
+    """Interactively ask the user"""
 
 
-FileConflictResolver = Callable[[PurePath], FileConflictResolution]
+FileConflictResolver = Callable[[PurePath, ConflictType], FileConflictResolution]
 
 
-def resolve_prompt_user(_path: PurePath) -> FileConflictResolution:
+def resolve_prompt_user(_path: PurePath, _conflict: ConflictType) -> FileConflictResolution:
     """Resolves conflicts by always asking the user."""
     return FileConflictResolution.PROMPT
 
@@ -89,14 +105,16 @@ class Organizer(Location):
 
         if self._is_marked(dst):
             PRETTY.warning(f"File {str(dst_absolute)!r} was already written!")
-            if self._resolve_conflict(f"Overwrite file?", dst_absolute, default=False):
+            conflict = ConflictType.FILE_OVERWRITTEN
+            if self._resolve_conflict(f"Overwrite file?", dst_absolute, conflict, default=False):
                 PRETTY.ignored_file(dst_absolute, "file was written previously")
                 return None
 
         # Destination file is directory
         if dst_absolute.exists() and dst_absolute.is_dir():
             prompt = f"Overwrite folder {dst_absolute} with file?"
-            if self._resolve_conflict(prompt, dst_absolute, default=False):
+            conflict = ConflictType.FILE_OVERWRITTEN
+            if self._resolve_conflict(prompt, dst_absolute, conflict, default=False):
                 shutil.rmtree(dst_absolute)
             else:
                 PRETTY.warning(f"Could not add file {str(dst_absolute)!r}")
@@ -167,20 +185,22 @@ class Organizer(Location):
     def _delete_file_if_confirmed(self, path: Path) -> None:
         prompt = f"Do you want to delete {path}"
 
-        if self._resolve_conflict(prompt, path, default=False):
+        if self._resolve_conflict(prompt, path, ConflictType.FILE_DELETED, default=False):
             self.download_summary.add_deleted_file(path)
             path.unlink()
 
-    def _resolve_conflict(self, prompt: str, path: Path, default: bool) -> bool:
+    def _resolve_conflict(
+            self, prompt: str, path: Path, conflict: ConflictType, default: bool
+    ) -> bool:
         if not self.conflict_resolver:
             return prompt_yes_no(prompt, default=default)
 
-        result = self.conflict_resolver(path)
+        result = self.conflict_resolver(path, conflict)
         if result == FileConflictResolution.DEFAULT:
             return default
         if result == FileConflictResolution.KEEP_EXISTING:
             return False
-        if result == FileConflictResolution.OVERWRITE_EXISTING:
+        if result == FileConflictResolution.DESTROY_EXISTING:
             return True
 
         return prompt_yes_no(prompt, default=default)
diff --git a/sync_url.py b/sync_url.py
index e06deb6..91a7521 100755
--- a/sync_url.py
+++ b/sync_url.py
@@ -12,17 +12,26 @@ from PFERD import Pferd
 from PFERD.cookie_jar import CookieJar
 from PFERD.ilias import (IliasCrawler, IliasElementType,
                          KitShibbolethAuthenticator)
-from PFERD.organizer import FileConflictResolution, resolve_prompt_user
+from PFERD.organizer import (ConflictType, FileConflictResolution,
+                             FileConflictResolver, resolve_prompt_user)
 from PFERD.transform import sanitize_windows_path
 from PFERD.utils import to_path
 
 
-def _resolve_overwrite(_path: PurePath) -> FileConflictResolution:
-    return FileConflictResolution.OVERWRITE_EXISTING
+def _resolve_remote_first(_path: PurePath, _conflict: ConflictType) -> FileConflictResolution:
+    return FileConflictResolution.DESTROY_EXISTING
 
 
-def _resolve_default(_path: PurePath) -> FileConflictResolution:
-    return FileConflictResolution.DEFAULT
+def _resolve_local_first(_path: PurePath, _conflict: ConflictType) -> FileConflictResolution:
+    return FileConflictResolution.KEEP_EXISTING
+
+
+def _resolve_no_delete(_path: PurePath, conflict: ConflictType) -> FileConflictResolution:
+    # Update files
+    if conflict == ConflictType.FILE_OVERWRITTEN:
+        return FileConflictResolution.DESTROY_EXISTING
+    # But do not delete them
+    return FileConflictResolution.KEEP_EXISTING
 
 
 def main() -> None:
@@ -30,10 +39,12 @@ def main() -> None:
     parser.add_argument("--test-run", action="store_true")
     parser.add_argument('-c', '--cookies', nargs='?', default=None, help="File to store cookies in")
     parser.add_argument('--no-videos', nargs='?', default=None, help="Don't download videos")
-    parser.add_argument('-d', '--default', action="store_true",
-                        help="Don't prompt for confirmations and use sane defaults")
-    parser.add_argument('-r', '--remove', action="store_true",
-                        help="Remove and overwrite files without prompting for confirmation")
+    parser.add_argument('--local-first', action="store_true",
+                        help="Don't prompt for confirmation, keep existing files")
+    parser.add_argument('--remote-first', action="store_true",
+                        help="Don't prompt for confirmation, delete and overwrite local files")
+    parser.add_argument('--no-delete', action="store_true",
+                        help="Don't prompt for confirmation, overwrite local files, don't delete")
     parser.add_argument('url', help="URL to the course page")
     parser.add_argument('folder', nargs='?', default=None, help="Folder to put stuff into")
     args = parser.parse_args()
@@ -68,10 +79,12 @@ def main() -> None:
             return element not in [IliasElementType.VIDEO_FILE, IliasElementType.VIDEO_FOLDER]
         return True
 
-    if args.default:
-        file_confilict_resolver = _resolve_default
-    elif args.remove:
-        file_confilict_resolver = _resolve_overwrite
+    if args.remote_first:
+        file_confilict_resolver: FileConflictResolver = _resolve_remote_first
+    elif args.local_first:
+        file_confilict_resolver = _resolve_local_first
+    elif args.no_delete:
+        file_confilict_resolver = _resolve_no_delete
     else:
         file_confilict_resolver = resolve_prompt_user
 

From 2d644095429e39b6b307c50a7a22fb0e0a1ee1eb Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 5 Dec 2020 13:50:46 +0100
Subject: [PATCH 028/524] Fix handling of empty args.folder

---
 sync_url.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sync_url.py b/sync_url.py
index 91a7521..97c0c81 100755
--- a/sync_url.py
+++ b/sync_url.py
@@ -59,7 +59,6 @@ def main() -> None:
 
     cookie_jar.load_cookies()
 
-    folder = Path(args.folder)
     if args.folder is None:
         element_name = crawler.find_element_name(args.url)
         if not element_name:
@@ -67,6 +66,8 @@ def main() -> None:
             return
         folder = Path(element_name)
         cookie_jar.save_cookies()
+    else:
+        folder = Path(args.folder)
 
     # files may not escape the pferd_root with relative paths
     # note: Path(Path.cwd, Path(folder)) == Path(folder) if it is an absolute path

From 4ce385b262daa1064002d000ea75ea9f705c151e Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 5 Dec 2020 14:03:43 +0100
Subject: [PATCH 029/524] Treat file overwrite and marked file overwrite
 differently

---
 PFERD/organizer.py | 29 ++++++++++++++++++++++-------
 sync_url.py        |  2 ++
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/PFERD/organizer.py b/PFERD/organizer.py
index a41d0d2..1038ae7 100644
--- a/PFERD/organizer.py
+++ b/PFERD/organizer.py
@@ -24,34 +24,44 @@ class ConflictType(Enum):
     """
     The type of the conflict. A file might not exist anymore and will be deleted
     or it might be overwritten with a newer version.
+
+    FILE_OVERWRITTEN: An existing file will be updated
+    MARKED_FILE_OVERWRITTEN: A file is written for the second+ time in this run
+    FILE_DELETED: The file was deleted
     """
     FILE_OVERWRITTEN = "overwritten"
+    MARKED_FILE_OVERWRITTEN = "marked_file_overwritten"
     FILE_DELETED = "deleted"
 
 
 class FileConflictResolution(Enum):
     """
     The reaction when confronted with a file conflict:
+
+    DESTROY_EXISTING: Delete/overwrite the current file
+    KEEP_EXISTING: Keep the current file
+    DEFAULT: Do whatever the PFERD authors thought is sensible
+    PROMPT: Interactively ask the user
     """
 
     DESTROY_EXISTING = "destroy"
-    """Delete/overwrite the current file"""
 
     KEEP_EXISTING = "keep"
-    """Keep the current file"""
 
     DEFAULT = "default"
-    """Do whatever the PFERD authors thought is sensible"""
 
     PROMPT = "prompt"
-    """Interactively ask the user"""
 
 
 FileConflictResolver = Callable[[PurePath, ConflictType], FileConflictResolution]
 
 
-def resolve_prompt_user(_path: PurePath, _conflict: ConflictType) -> FileConflictResolution:
-    """Resolves conflicts by always asking the user."""
+def resolve_prompt_user(_path: PurePath, conflict: ConflictType) -> FileConflictResolution:
+    """
+    Resolves conflicts by asking the user if a file was written twice or will be deleted.
+    """
+    if conflict == ConflictType.FILE_OVERWRITTEN:
+        return FileConflictResolution.DESTROY_EXISTING
     return FileConflictResolution.PROMPT
 
 
@@ -105,7 +115,7 @@ class Organizer(Location):
 
         if self._is_marked(dst):
             PRETTY.warning(f"File {str(dst_absolute)!r} was already written!")
-            conflict = ConflictType.FILE_OVERWRITTEN
+            conflict = ConflictType.MARKED_FILE_OVERWRITTEN
             if self._resolve_conflict(f"Overwrite file?", dst_absolute, conflict, default=False):
                 PRETTY.ignored_file(dst_absolute, "file was written previously")
                 return None
@@ -128,6 +138,11 @@ class Organizer(Location):
                 self.mark(dst)
                 return dst_absolute
 
+            prompt = f"Overwrite file {dst_absolute}?"
+            conflict = ConflictType.FILE_OVERWRITTEN
+            if not self._resolve_conflict(prompt, dst_absolute, conflict, default=True):
+                return None
+
             self.download_summary.add_modified_file(dst_absolute)
             PRETTY.modified_file(dst_absolute)
         else:
diff --git a/sync_url.py b/sync_url.py
index 97c0c81..c6231e4 100755
--- a/sync_url.py
+++ b/sync_url.py
@@ -30,6 +30,8 @@ def _resolve_no_delete(_path: PurePath, conflict: ConflictType) -> FileConflictR
     # Update files
     if conflict == ConflictType.FILE_OVERWRITTEN:
         return FileConflictResolution.DESTROY_EXISTING
+    if conflict == ConflictType.MARKED_FILE_OVERWRITTEN:
+        return FileConflictResolution.DESTROY_EXISTING
     # But do not delete them
     return FileConflictResolution.KEEP_EXISTING
 

From 57259e21f462b56826acf4b041d9005be4c26365 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 5 Dec 2020 14:08:00 +0100
Subject: [PATCH 030/524] Print download summary in sync_url

---
 sync_url.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sync_url.py b/sync_url.py
index c6231e4..6629a18 100755
--- a/sync_url.py
+++ b/sync_url.py
@@ -102,6 +102,8 @@ def main() -> None:
         file_conflict_resolver=file_confilict_resolver
     )
 
+    pferd.print_summary()
+
 
 if __name__ == "__main__":
     main()

From 0f5e55648be99b73fbe349ecc6a97b110d8dbe66 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 5 Dec 2020 14:11:51 +0100
Subject: [PATCH 031/524] Tell user when the conflict resolver kept existing
 files

---
 PFERD/organizer.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/PFERD/organizer.py b/PFERD/organizer.py
index 1038ae7..a15e751 100644
--- a/PFERD/organizer.py
+++ b/PFERD/organizer.py
@@ -141,6 +141,7 @@ class Organizer(Location):
             prompt = f"Overwrite file {dst_absolute}?"
             conflict = ConflictType.FILE_OVERWRITTEN
             if not self._resolve_conflict(prompt, dst_absolute, conflict, default=True):
+                PRETTY.ignored_file(dst_absolute, "user conflict resolution")
                 return None
 
             self.download_summary.add_modified_file(dst_absolute)
@@ -203,6 +204,8 @@ class Organizer(Location):
         if self._resolve_conflict(prompt, path, ConflictType.FILE_DELETED, default=False):
             self.download_summary.add_deleted_file(path)
             path.unlink()
+        else:
+            PRETTY.ignored_file(path, "user conflict resolution")
 
     def _resolve_conflict(
             self, prompt: str, path: Path, conflict: ConflictType, default: bool

From 1e0343bba667ca47e7470e5d2d5aca84d832bf9a Mon Sep 17 00:00:00 2001
From: Lucas <24826124+Luro02@users.noreply.github.com>
Date: Tue, 3 Nov 2020 13:38:33 +0100
Subject: [PATCH 032/524] sync_url: Add username and password args

---
 sync_url.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/sync_url.py b/sync_url.py
index 6629a18..e4c4c9a 100755
--- a/sync_url.py
+++ b/sync_url.py
@@ -40,6 +40,8 @@ def main() -> None:
     parser = argparse.ArgumentParser()
     parser.add_argument("--test-run", action="store_true")
     parser.add_argument('-c', '--cookies', nargs='?', default=None, help="File to store cookies in")
+    parser.add_argument('-u', '--username', nargs='?', default=None, help="Username for Ilias")
+    parser.add_argument('-p', '--password', nargs='?', default=None, help="Password for Ilias")
     parser.add_argument('--no-videos', nargs='?', default=None, help="Don't download videos")
     parser.add_argument('--local-first', action="store_true",
                         help="Don't prompt for confirmation, keep existing files")
@@ -55,7 +57,7 @@ def main() -> None:
 
     cookie_jar = CookieJar(to_path(args.cookies) if args.cookies else None)
     session = cookie_jar.create_session()
-    authenticator = KitShibbolethAuthenticator()
+    authenticator = KitShibbolethAuthenticator(username=args.username, password=args.password)
     crawler = IliasCrawler(url.scheme + '://' + url.netloc, session,
                            authenticator, lambda x, y: True)
 
@@ -98,6 +100,8 @@ def main() -> None:
         full_url=args.url,
         cookies=args.cookies,
         dir_filter=dir_filter,
+        username=args.username,
+        password=args.password,
         transform=sanitize_windows_path,
         file_conflict_resolver=file_confilict_resolver
     )

From 75471c46d1a62447c38763517479cb31be4949da Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 4 Nov 2020 21:18:48 +0100
Subject: [PATCH 033/524] Use credential file

---
 sync_url.py | 45 ++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 38 insertions(+), 7 deletions(-)

diff --git a/sync_url.py b/sync_url.py
index e4c4c9a..beba144 100755
--- a/sync_url.py
+++ b/sync_url.py
@@ -5,18 +5,42 @@ A simple script to download a course by name from ILIAS.
 """
 
 import argparse
+import logging
+import sys
 from pathlib import Path, PurePath
+from typing import Optional, Tuple
 from urllib.parse import urlparse
 
 from PFERD import Pferd
 from PFERD.cookie_jar import CookieJar
 from PFERD.ilias import (IliasCrawler, IliasElementType,
                          KitShibbolethAuthenticator)
+from PFERD.logging import PrettyLogger, enable_logging
 from PFERD.organizer import (ConflictType, FileConflictResolution,
                              FileConflictResolver, resolve_prompt_user)
 from PFERD.transform import sanitize_windows_path
 from PFERD.utils import to_path
 
+_LOGGER = logging.getLogger("sync_url")
+_PRETTY = PrettyLogger(_LOGGER)
+
+
+def _extract_credentials(file_path: Optional[str]) -> Tuple[Optional[str], Optional[str]]:
+    if not file_path:
+        return (None, None)
+
+    if not Path(file_path).exists():
+        _PRETTY.error("Credential file does not exist")
+        sys.exit(1)
+
+    with open(file_path, "r") as file:
+        first_line = file.read().splitlines()[0]
+        read_name, *read_password = first_line.split(":", 1)
+
+        name = read_name if read_name else None
+        password = read_password[0] if read_password else None
+        return (name, password)
+
 
 def _resolve_remote_first(_path: PurePath, _conflict: ConflictType) -> FileConflictResolution:
     return FileConflictResolution.DESTROY_EXISTING
@@ -37,11 +61,16 @@ def _resolve_no_delete(_path: PurePath, conflict: ConflictType) -> FileConflictR
 
 
 def main() -> None:
+    enable_logging(name="sync_url")
+
     parser = argparse.ArgumentParser()
     parser.add_argument("--test-run", action="store_true")
     parser.add_argument('-c', '--cookies', nargs='?', default=None, help="File to store cookies in")
     parser.add_argument('-u', '--username', nargs='?', default=None, help="Username for Ilias")
     parser.add_argument('-p', '--password', nargs='?', default=None, help="Password for Ilias")
+    parser.add_argument('--credential-file', nargs='?', default=None,
+                        help="Path to a file containing credentials for Ilias. The file must have "
+                        "one line in the following format: '<user>:<password>'")
     parser.add_argument('--no-videos', nargs='?', default=None, help="Don't download videos")
     parser.add_argument('--local-first', action="store_true",
                         help="Don't prompt for confirmation, keep existing files")
@@ -53,11 +82,13 @@ def main() -> None:
     parser.add_argument('folder', nargs='?', default=None, help="Folder to put stuff into")
     args = parser.parse_args()
 
-    url = urlparse(args.url)
-
     cookie_jar = CookieJar(to_path(args.cookies) if args.cookies else None)
     session = cookie_jar.create_session()
-    authenticator = KitShibbolethAuthenticator(username=args.username, password=args.password)
+
+    username, password = _extract_credentials(args.credential_file)
+    authenticator = KitShibbolethAuthenticator(username=username, password=password)
+
+    url = urlparse(args.url)
     crawler = IliasCrawler(url.scheme + '://' + url.netloc, session,
                            authenticator, lambda x, y: True)
 
@@ -100,10 +131,10 @@ def main() -> None:
         full_url=args.url,
         cookies=args.cookies,
         dir_filter=dir_filter,
-        username=args.username,
-        password=args.password,
-        transform=sanitize_windows_path,
-        file_conflict_resolver=file_confilict_resolver
+        username=username,
+        password=password,
+        file_conflict_resolver=file_confilict_resolver,
+        transform=sanitize_windows_path
     )
 
     pferd.print_summary()

From 83ea15ee83e42cbdbf23a0a56e82d142c38ff636 Mon Sep 17 00:00:00 2001
From: Scriptim <Scriptim@gmx.de>
Date: Wed, 4 Nov 2020 00:18:27 +0100
Subject: [PATCH 034/524] Use system keyring service for password auth

---
 LICENSE                       |  4 ++
 PFERD/authenticators.py       | 89 +++++++++++++++++++++++++++++++++++
 PFERD/ilias/__init__.py       |  3 +-
 PFERD/ilias/authenticators.py |  8 +++-
 requirements.txt              |  1 +
 setup.py                      |  3 +-
 sync_url.py                   | 27 ++++++++---
 7 files changed, 125 insertions(+), 10 deletions(-)

diff --git a/LICENSE b/LICENSE
index 26bcc0a..7e4f54e 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,8 @@
+<<<<<<< HEAD
 Copyright 2019-2020 Garmelon, I-Al-Istannen, danstooamerican, pavelzw, TheChristophe
+=======
+Copyright 2019-2020 Garmelon, I-Al-Istannen, danstooamerican, pavelzw, Scriptim
+>>>>>>> f89226c (Use system keyring service for password auth)
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
diff --git a/PFERD/authenticators.py b/PFERD/authenticators.py
index b8cfe28..5537cc1 100644
--- a/PFERD/authenticators.py
+++ b/PFERD/authenticators.py
@@ -3,8 +3,19 @@ General authenticators useful in many situations
 """
 
 import getpass
+import logging
 from typing import Optional, Tuple
 
+from .logging import PrettyLogger
+
+LOGGER = logging.getLogger(__name__)
+PRETTY = PrettyLogger(LOGGER)
+
+try:
+    import keyring
+except ImportError:
+    PRETTY.warning("Keyring module not found, KeyringAuthenticator won't work!")
+
 
 class TfaAuthenticator:
     # pylint: disable=too-few-public-methods
@@ -123,3 +134,81 @@ class UserPassAuthenticator:
         if self._given_username is not None and self._given_password is not None:
             self._given_username = None
             self._given_password = None
+
+
+class KeyringAuthenticator(UserPassAuthenticator):
+    """
+    An authenticator for username-password combinations that stores the
+    password using the system keyring service and prompts the user for missing
+    information.
+    """
+
+    def get_credentials(self) -> Tuple[str, str]:
+        """
+        Returns a tuple (username, password). Prompts user for username or
+        password when necessary.
+        """
+
+        if self._username is None and self._given_username is not None:
+            self._username = self._given_username
+
+        if self._password is None and self._given_password is not None:
+            self._password = self._given_password
+
+        if self._username is not None and self._password is None:
+            self._load_password()
+
+        if self._username is None or self._password is None:
+            print(f"Enter credentials ({self._reason})")
+
+        username: str
+        if self._username is None:
+            username = input("Username: ")
+            self._username = username
+        else:
+            username = self._username
+
+        if self._password is None:
+            self._load_password()
+
+        password: str
+        if self._password is None:
+            password = getpass.getpass(prompt="Password: ")
+            self._password = password
+            self._save_password()
+        else:
+            password = self._password
+
+        return (username, password)
+
+    def _load_password(self) -> None:
+        """
+        Loads the saved password associated with self._username from the system
+        keyring service (or None if not password has been saved yet) and stores
+        it in self._password.
+        """
+        self._password = keyring.get_password("pferd-ilias", self._username)
+
+    def _save_password(self) -> None:
+        """
+        Saves self._password to the system keyring service and associates it
+        with self._username.
+        """
+        keyring.set_password("pferd-ilias", self._username, self._password)
+
+    def invalidate_credentials(self) -> None:
+        """
+        Marks the credentials as invalid. If only a username was supplied in
+        the constructor, assumes that the username is valid and only the
+        password is invalid. If only a password was supplied in the
+        constructor, assumes that the password is valid and only the username
+        is invalid. Otherwise, assumes that username and password are both
+        invalid.
+        """
+
+        try:
+            keyring.delete_password("pferd-ilias", self._username)
+        except keyring.errors.PasswordDeleteError:
+            pass
+
+        super().invalidate_credentials()
diff --git a/PFERD/ilias/__init__.py b/PFERD/ilias/__init__.py
index 0a5f08b..379d244 100644
--- a/PFERD/ilias/__init__.py
+++ b/PFERD/ilias/__init__.py
@@ -2,7 +2,8 @@
 Synchronizing files from ILIAS instances (https://www.ilias.de/).
 """
 
-from .authenticators import IliasAuthenticator, KitShibbolethAuthenticator
+from .authenticators import (IliasAuthenticator, KitShibbolethAuthenticator,
+                             KeyringKitShibbolethAuthenticator)
 from .crawler import (IliasCrawler, IliasCrawlerEntry, IliasDirectoryFilter,
                       IliasElementType)
 from .downloader import (IliasDownloader, IliasDownloadInfo,
diff --git a/PFERD/ilias/authenticators.py b/PFERD/ilias/authenticators.py
index 763ed38..e70f459 100644
--- a/PFERD/ilias/authenticators.py
+++ b/PFERD/ilias/authenticators.py
@@ -37,8 +37,12 @@ class KitShibbolethAuthenticator(IliasAuthenticator):
     Authenticate via KIT's shibboleth system.
     """
 
-    def __init__(self, username: Optional[str] = None, password: Optional[str] = None) -> None:
-        self._auth = UserPassAuthenticator("KIT ILIAS Shibboleth", username, password)
+    def __init__(self, authenticator: Optional[UserPassAuthenticator] = None) -> None:
+        if authenticator:
+            self._auth = authenticator
+        else:
+            self._auth = UserPassAuthenticator("KIT ILIAS Shibboleth")
+
         self._tfa_auth = TfaAuthenticator("KIT ILIAS Shibboleth")
 
     def authenticate(self, sess: requests.Session) -> None:
diff --git a/requirements.txt b/requirements.txt
index f851c23..2d852e1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 requests>=2.21.0
 beautifulsoup4>=4.7.1
 rich>=2.1.0
+keyring>=21.5.0
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 9b226f8..6650016 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,8 @@ setup(
     install_requires=[
         "requests>=2.21.0",
         "beautifulsoup4>=4.7.1",
-        "rich>=2.1.0"
+        "rich>=2.1.0",
+        "keyring>=21.5.0"
     ],
 )
 
diff --git a/sync_url.py b/sync_url.py
index beba144..fe0b3c4 100755
--- a/sync_url.py
+++ b/sync_url.py
@@ -8,10 +8,11 @@ import argparse
 import logging
 import sys
 from pathlib import Path, PurePath
-from typing import Optional, Tuple
+from typing import Optional
 from urllib.parse import urlparse
 
 from PFERD import Pferd
+from PFERD.authenticators import KeyringAuthenticator, UserPassAuthenticator
 from PFERD.cookie_jar import CookieJar
 from PFERD.ilias import (IliasCrawler, IliasElementType,
                          KitShibbolethAuthenticator)
@@ -25,9 +26,9 @@ _LOGGER = logging.getLogger("sync_url")
 _PRETTY = PrettyLogger(_LOGGER)
 
 
-def _extract_credentials(file_path: Optional[str]) -> Tuple[Optional[str], Optional[str]]:
+def _extract_credentials(file_path: Optional[str]) -> UserPassAuthenticator:
     if not file_path:
-        return (None, None)
+        return UserPassAuthenticator("KIT ILIAS Shibboleth", None, None)
 
     if not Path(file_path).exists():
         _PRETTY.error("Credential file does not exist")
@@ -39,7 +40,7 @@ def _extract_credentials(file_path: Optional[str]) -> Tuple[Optional[str], Optio
 
         name = read_name if read_name else None
         password = read_password[0] if read_password else None
-        return (name, password)
+        return UserPassAuthenticator("KIT ILIAS Shibboleth", username=name, password=password)
 
 
 def _resolve_remote_first(_path: PurePath, _conflict: ConflictType) -> FileConflictResolution:
@@ -71,6 +72,8 @@ def main() -> None:
     parser.add_argument('--credential-file', nargs='?', default=None,
                         help="Path to a file containing credentials for Ilias. The file must have "
                         "one line in the following format: '<user>:<password>'")
+    parser.add_argument("-k", "--keyring", action="store_true",
+                        help="Use the system keyring service for authentication")
     parser.add_argument('--no-videos', nargs='?', default=None, help="Don't download videos")
     parser.add_argument('--local-first', action="store_true",
                         help="Don't prompt for confirmation, keep existing files")
@@ -85,10 +88,21 @@ def main() -> None:
     cookie_jar = CookieJar(to_path(args.cookies) if args.cookies else None)
     session = cookie_jar.create_session()
 
-    username, password = _extract_credentials(args.credential_file)
-    authenticator = KitShibbolethAuthenticator(username=username, password=password)
+    if args.keyring:
+        if not args.username:
+            _PRETTY.error("Keyring auth selected but no --username passed!")
+            return
+        inner_auth: UserPassAuthenticator = KeyringAuthenticator(
+            "KIT ILIAS Shibboleth", username=args.username, password=args.password
+        )
+    else:
+        inner_auth = _extract_credentials(args.credential_file)
+
+    username, password = inner_auth.get_credentials()
+    authenticator = KitShibbolethAuthenticator(inner_auth)
 
     url = urlparse(args.url)
+
     crawler = IliasCrawler(url.scheme + '://' + url.netloc, session,
                            authenticator, lambda x, y: True)
 
@@ -125,6 +139,7 @@ def main() -> None:
         file_confilict_resolver = resolve_prompt_user
 
     pferd.enable_logging()
+
     # fetch
     pferd.ilias_kit_folder(
         target=target,

From f47b137b593628e45d34f9674342c039532329e0 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 5 Dec 2020 23:35:20 +0100
Subject: [PATCH 035/524] Fix ILIAS init.py and Pferd.py authenticators

---
 PFERD/ilias/__init__.py |  3 +--
 PFERD/pferd.py          | 14 +++++++++++---
 mypy.ini                |  2 +-
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/PFERD/ilias/__init__.py b/PFERD/ilias/__init__.py
index 379d244..0a5f08b 100644
--- a/PFERD/ilias/__init__.py
+++ b/PFERD/ilias/__init__.py
@@ -2,8 +2,7 @@
 Synchronizing files from ILIAS instances (https://www.ilias.de/).
 """
 
-from .authenticators import (IliasAuthenticator, KitShibbolethAuthenticator,
-                             KeyringKitShibbolethAuthenticator)
+from .authenticators import IliasAuthenticator, KitShibbolethAuthenticator
 from .crawler import (IliasCrawler, IliasCrawlerEntry, IliasDirectoryFilter,
                       IliasElementType)
 from .downloader import (IliasDownloader, IliasDownloadInfo,
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index 12ead8b..1bb6f78 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -6,6 +6,7 @@ import logging
 from pathlib import Path
 from typing import Callable, List, Optional, Union
 
+from .authenticators import UserPassAuthenticator
 from .cookie_jar import CookieJar
 from .diva import (DivaDownloader, DivaDownloadStrategy, DivaPlaylistCrawler,
                    diva_download_new)
@@ -64,6 +65,13 @@ class Pferd(Location):
         for transformable in transformables:
             LOGGER.info(transformable.path)
 
+    @staticmethod
+    def _get_authenticator(
+            username: Optional[str], password: Optional[str]
+    ) -> KitShibbolethAuthenticator:
+        inner_auth = UserPassAuthenticator("ILIAS - Pferd.py", username, password)
+        return KitShibbolethAuthenticator(inner_auth)
+
     def _ilias(
             self,
             target: PathLike,
@@ -150,7 +158,7 @@ class Pferd(Location):
                 with overwriting or deleting files. The default always asks the user.
         """
         # This authenticator only works with the KIT ilias instance.
-        authenticator = KitShibbolethAuthenticator(username=username, password=password)
+        authenticator = Pferd._get_authenticator(username=username, password=password)
         PRETTY.starting_synchronizer(target, "ILIAS", course_id)
 
         organizer = self._ilias(
@@ -220,7 +228,7 @@ class Pferd(Location):
                 with overwriting or deleting files. The default always asks the user.
         """
         # This authenticator only works with the KIT ilias instance.
-        authenticator = KitShibbolethAuthenticator(username=username, password=password)
+        authenticator = Pferd._get_authenticator(username, password)
         PRETTY.starting_synchronizer(target, "ILIAS", "Personal Desktop")
 
         organizer = self._ilias(
@@ -285,7 +293,7 @@ class Pferd(Location):
                 with overwriting or deleting files. The default always asks the user.
         """
         # This authenticator only works with the KIT ilias instance.
-        authenticator = KitShibbolethAuthenticator(username=username, password=password)
+        authenticator = Pferd._get_authenticator(username=username, password=password)
         PRETTY.starting_synchronizer(target, "ILIAS", "An ILIAS element by url")
 
         if not full_url.startswith("https://ilias.studium.kit.edu"):
diff --git a/mypy.ini b/mypy.ini
index 91792d8..60306de 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -3,5 +3,5 @@ disallow_untyped_defs = True
 disallow_incomplete_defs = True
 no_implicit_optional = True
 
-[mypy-rich.*,bs4]
+[mypy-rich.*,bs4,keyring]
 ignore_missing_imports = True

From 93e6329901b83b80ccc2ec339431d2bf2e0d07f6 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 6 Dec 2020 13:28:08 +0100
Subject: [PATCH 036/524] Use the least destructive conflict resolver if there
 are multiple

---
 sync_url.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sync_url.py b/sync_url.py
index fe0b3c4..c6db255 100755
--- a/sync_url.py
+++ b/sync_url.py
@@ -129,12 +129,12 @@ def main() -> None:
             return element not in [IliasElementType.VIDEO_FILE, IliasElementType.VIDEO_FOLDER]
         return True
 
-    if args.remote_first:
-        file_confilict_resolver: FileConflictResolver = _resolve_remote_first
-    elif args.local_first:
-        file_confilict_resolver = _resolve_local_first
+    if args.local_first:
+        file_confilict_resolver: FileConflictResolver = _resolve_local_first
     elif args.no_delete:
         file_confilict_resolver = _resolve_no_delete
+    elif args.remote_first:
+        file_confilict_resolver = _resolve_remote_first
     else:
         file_confilict_resolver = resolve_prompt_user
 

From ee39aaf08b7e031cb8d1ed5fb675c672419162dc Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 7 Dec 2020 22:55:28 +0100
Subject: [PATCH 037/524] Fix merge marker in LICENSE

---
 LICENSE | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/LICENSE b/LICENSE
index 7e4f54e..01f15f5 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,8 +1,4 @@
-<<<<<<< HEAD
-Copyright 2019-2020 Garmelon, I-Al-Istannen, danstooamerican, pavelzw, TheChristophe
-=======
-Copyright 2019-2020 Garmelon, I-Al-Istannen, danstooamerican, pavelzw, Scriptim
->>>>>>> f89226c (Use system keyring service for password auth)
+Copyright 2019-2020 Garmelon, I-Al-Istannen, danstooamerican, pavelzw, TheChristophe, Scriptim
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
@@ -19,4 +15,4 @@ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

From 1c2b6bf9946e31914ba41421d33bfa83c03258c6 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 13 Dec 2020 19:57:29 +0100
Subject: [PATCH 038/524] Bump version

---
 README.md | 6 +++---
 setup.py  | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 3a877c1..572528a 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ Ensure that you have at least Python 3.8 installed.
 To install PFERD or update your installation to the latest version, run this
 wherever you want to install or have already installed PFERD:
 ```
-$ pip install git+https://github.com/Garmelon/PFERD@v2.4.5
+$ pip install git+https://github.com/Garmelon/PFERD@v2.5.0
 ```
 
 The use of [venv] is recommended.
@@ -60,8 +60,8 @@ $ mkdir Vorlesungen
 $ cd Vorlesungen
 $ python3 -m venv .venv
 $ .venv/bin/activate
-$ pip install git+https://github.com/Garmelon/PFERD@v2.4.5
-$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.4.5/example_config.py
+$ pip install git+https://github.com/Garmelon/PFERD@v2.5.0
+$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.5.0/example_config.py
 $ python3 example_config.py
 $ deactivate
 ```
diff --git a/setup.py b/setup.py
index 6650016..1ed2876 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@ from setuptools import find_packages, setup
 
 setup(
     name="PFERD",
-    version="2.4.5",
+    version="2.5.0",
     packages=find_packages(),
     install_requires=[
         "requests>=2.21.0",

From 9b048a9cfc43a97c7db696019ce792ed35a8a6d1 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 30 Dec 2020 14:32:59 +0100
Subject: [PATCH 039/524] Canonize meeting names to a properly formatted date

---
 PFERD/ilias/crawler.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/PFERD/ilias/crawler.py b/PFERD/ilias/crawler.py
index 2e37e36..4d59dbf 100644
--- a/PFERD/ilias/crawler.py
+++ b/PFERD/ilias/crawler.py
@@ -40,6 +40,7 @@ class IliasElementType(Enum):
     REGULAR_FILE = "REGULAR_FILE"
     VIDEO_FILE = "VIDEO_FILE"
     FORUM = "FORUM"
+    MEETING = "MEETING"
     EXTERNAL_LINK = "EXTERNAL_LINK"
 
     def is_folder(self) -> bool:
@@ -241,6 +242,8 @@ class IliasCrawler:
                 entries_to_process += self._crawl_video_directory(entry.path, url)
                 continue
 
+            PRETTY.warning(f"Unknown type: {entry.entry_type}!")
+
         return result
 
     def _crawl_folder(self, folder_path: Path, url: str) -> List[IliasCrawlerEntry]:
@@ -269,6 +272,19 @@ class IliasCrawler:
 
             if element_type == IliasElementType.REGULAR_FILE:
                 result += self._crawl_file(folder_path, link, abs_url)
+            elif element_type == IliasElementType.MEETING:
+                meeting_name = str(element_path.name)
+                date_portion_str = meeting_name.split(" - ")[0]
+                date_portion = demangle_date(date_portion_str)
+
+                if not date_portion:
+                    result += [IliasCrawlerEntry(element_path, abs_url, element_type, None)]
+                    continue
+
+                rest_of_name = meeting_name.removeprefix(date_portion_str)
+                new_name = datetime.datetime.strftime(date_portion, "%Y-%m-%d, %H:%M") + rest_of_name
+                new_path = Path(folder_path, _sanitize_path_name(new_name))
+                result += [IliasCrawlerEntry(new_path, abs_url, IliasElementType.REGULAR_FOLDER, None)]
             elif element_type is not None:
                 result += [IliasCrawlerEntry(element_path, abs_url, element_type, None)]
             else:
@@ -320,6 +336,8 @@ class IliasCrawler:
         """
         # pylint: disable=too-many-return-statements
 
+        found_parent: Optional[bs4.Tag] = None
+
         # We look for the outer div of our inner link, to find information around it
         # (mostly the icon)
         for parent in link_element.parents:
@@ -350,6 +368,9 @@ class IliasCrawler:
         if str(img_tag["src"]).endswith("frm.svg"):
             return IliasElementType.FORUM
 
+        if str(img_tag["src"]).endswith("sess.svg"):
+            return IliasElementType.MEETING
+
         return IliasElementType.REGULAR_FOLDER
 
     @staticmethod

From 2714ac6be6881e7a49e59d6aa8c709700720e8e8 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 30 Dec 2020 14:34:11 +0100
Subject: [PATCH 040/524] Send CSRF token to Shibboleth

---
 PFERD/ilias/authenticators.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/PFERD/ilias/authenticators.py b/PFERD/ilias/authenticators.py
index e70f459..4b99dd8 100644
--- a/PFERD/ilias/authenticators.py
+++ b/PFERD/ilias/authenticators.py
@@ -74,6 +74,8 @@ class KitShibbolethAuthenticator(IliasAuthenticator):
             form = soup.find("form", {"class": "full content", "method": "post"})
             action = form["action"]
 
+            csrf_token = form.find("input", {"name": "csrf_token"})["value"]
+
             # Equivalent: Enter credentials in
             # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
             LOGGER.debug("Attempt to log in to Shibboleth using credentials")
@@ -82,6 +84,7 @@ class KitShibbolethAuthenticator(IliasAuthenticator):
                 "_eventId_proceed": "",
                 "j_username": self._auth.username,
                 "j_password": self._auth.password,
+                "csrf_token": csrf_token
             }
             soup = soupify(sess.post(url, data=data))
 

From c978e9edf462d6aafd71acb29f714c8a677c2fb5 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 30 Dec 2020 14:45:46 +0100
Subject: [PATCH 041/524] Resolve a few pylint warnings

---
 PFERD/authenticators.py | 2 +-
 PFERD/ilias/crawler.py  | 7 +++++--
 PFERD/logging.py        | 5 +----
 PFERD/organizer.py      | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/PFERD/authenticators.py b/PFERD/authenticators.py
index 5537cc1..f85c9d3 100644
--- a/PFERD/authenticators.py
+++ b/PFERD/authenticators.py
@@ -14,7 +14,7 @@ PRETTY = PrettyLogger(LOGGER)
 try:
     import keyring
 except ImportError:
-    PRETTY.warning("Keyring module not found, KeyringAuthenticator won't work!")
+    pass
 
 
 class TfaAuthenticator:
diff --git a/PFERD/ilias/crawler.py b/PFERD/ilias/crawler.py
index 4d59dbf..86bf045 100644
--- a/PFERD/ilias/crawler.py
+++ b/PFERD/ilias/crawler.py
@@ -282,9 +282,12 @@ class IliasCrawler:
                     continue
 
                 rest_of_name = meeting_name.removeprefix(date_portion_str)
-                new_name = datetime.datetime.strftime(date_portion, "%Y-%m-%d, %H:%M") + rest_of_name
+                new_name = datetime.datetime.strftime(date_portion, "%Y-%m-%d, %H:%M") \
+                    + rest_of_name
                 new_path = Path(folder_path, _sanitize_path_name(new_name))
-                result += [IliasCrawlerEntry(new_path, abs_url, IliasElementType.REGULAR_FOLDER, None)]
+                result += [
+                    IliasCrawlerEntry(new_path, abs_url, IliasElementType.REGULAR_FOLDER, None)
+                ]
             elif element_type is not None:
                 result += [IliasCrawlerEntry(element_path, abs_url, element_type, None)]
             else:
diff --git a/PFERD/logging.py b/PFERD/logging.py
index 76741f7..c25019e 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -3,13 +3,10 @@ Contains a few logger utility functions and implementations.
 """
 
 import logging
-from pathlib import Path
-from typing import List, Optional
+from typing import Optional
 
-from rich import print as rich_print
 from rich._log_render import LogRender
 from rich.console import Console
-from rich.panel import Panel
 from rich.style import Style
 from rich.text import Text
 from rich.theme import Theme
diff --git a/PFERD/organizer.py b/PFERD/organizer.py
index a15e751..fe5052b 100644
--- a/PFERD/organizer.py
+++ b/PFERD/organizer.py
@@ -116,7 +116,7 @@ class Organizer(Location):
         if self._is_marked(dst):
             PRETTY.warning(f"File {str(dst_absolute)!r} was already written!")
             conflict = ConflictType.MARKED_FILE_OVERWRITTEN
-            if self._resolve_conflict(f"Overwrite file?", dst_absolute, conflict, default=False):
+            if self._resolve_conflict("Overwrite file?", dst_absolute, conflict, default=False):
                 PRETTY.ignored_file(dst_absolute, "file was written previously")
                 return None
 

From 0e1077bb50618ff144f7aab463448b0fb9f4d770 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 30 Dec 2020 14:50:49 +0100
Subject: [PATCH 042/524] Bump version

---
 README.md | 6 +++---
 setup.py  | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 572528a..59aaaa2 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ Ensure that you have at least Python 3.8 installed.
 To install PFERD or update your installation to the latest version, run this
 wherever you want to install or have already installed PFERD:
 ```
-$ pip install git+https://github.com/Garmelon/PFERD@v2.5.0
+$ pip install git+https://github.com/Garmelon/PFERD@v2.5.1
 ```
 
 The use of [venv] is recommended.
@@ -60,8 +60,8 @@ $ mkdir Vorlesungen
 $ cd Vorlesungen
 $ python3 -m venv .venv
 $ .venv/bin/activate
-$ pip install git+https://github.com/Garmelon/PFERD@v2.5.0
-$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.5.0/example_config.py
+$ pip install git+https://github.com/Garmelon/PFERD@v2.5.1
+$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.5.1/example_config.py
 $ python3 example_config.py
 $ deactivate
 ```
diff --git a/setup.py b/setup.py
index 1ed2876..e57fc75 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@ from setuptools import find_packages, setup
 
 setup(
     name="PFERD",
-    version="2.5.0",
+    version="2.5.1",
     packages=find_packages(),
     install_requires=[
         "requests>=2.21.0",

From f0562049b6e681f60bd4465c0d8610675d6aaaa8 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 30 Dec 2020 17:18:04 +0100
Subject: [PATCH 043/524] Remove Python 3.9 method in crawler

---
 PFERD/ilias/crawler.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/PFERD/ilias/crawler.py b/PFERD/ilias/crawler.py
index 86bf045..93b626e 100644
--- a/PFERD/ilias/crawler.py
+++ b/PFERD/ilias/crawler.py
@@ -281,7 +281,10 @@ class IliasCrawler:
                     result += [IliasCrawlerEntry(element_path, abs_url, element_type, None)]
                     continue
 
-                rest_of_name = meeting_name.removeprefix(date_portion_str)
+                rest_of_name = meeting_name
+                if rest_of_name.startswith(date_portion_str):
+                    rest_of_name = rest_of_name[len(date_portion_str):]
+
                 new_name = datetime.datetime.strftime(date_portion, "%Y-%m-%d, %H:%M") \
                     + rest_of_name
                 new_path = Path(folder_path, _sanitize_path_name(new_name))

From 5de68a0400e478a9912eb6b78e317ad9f5ee8eb1 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 30 Dec 2020 17:20:30 +0100
Subject: [PATCH 044/524] Bump version

---
 README.md | 6 +++---
 setup.py  | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 59aaaa2..2b760e0 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ Ensure that you have at least Python 3.8 installed.
 To install PFERD or update your installation to the latest version, run this
 wherever you want to install or have already installed PFERD:
 ```
-$ pip install git+https://github.com/Garmelon/PFERD@v2.5.1
+$ pip install git+https://github.com/Garmelon/PFERD@v2.5.2
 ```
 
 The use of [venv] is recommended.
@@ -60,8 +60,8 @@ $ mkdir Vorlesungen
 $ cd Vorlesungen
 $ python3 -m venv .venv
 $ .venv/bin/activate
-$ pip install git+https://github.com/Garmelon/PFERD@v2.5.1
-$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.5.1/example_config.py
+$ pip install git+https://github.com/Garmelon/PFERD@v2.5.2
+$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.5.2/example_config.py
 $ python3 example_config.py
 $ deactivate
 ```
diff --git a/setup.py b/setup.py
index e57fc75..8d672a4 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@ from setuptools import find_packages, setup
 
 setup(
     name="PFERD",
-    version="2.5.1",
+    version="2.5.2",
     packages=find_packages(),
     install_requires=[
         "requests>=2.21.0",

From fb78a6e98e972dfccd7b367810f26d68d743e088 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 6 Jan 2021 12:29:24 +0100
Subject: [PATCH 045/524] Retry ILIAS downloads a few times and only fail that
 file

---
 PFERD/ilias/downloader.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/PFERD/ilias/downloader.py b/PFERD/ilias/downloader.py
index 82527a0..26e1f2d 100644
--- a/PFERD/ilias/downloader.py
+++ b/PFERD/ilias/downloader.py
@@ -122,9 +122,22 @@ class IliasDownloader:
 
         tmp_file = self._tmp_dir.new_path()
 
-        while not self._try_download(info, tmp_file):
-            LOGGER.info("Retrying download: %r", info)
-            self._authenticator.authenticate(self._session)
+        download_successful = False
+        for _ in range(0, 3):
+            try:
+                if not self._try_download(info, tmp_file):
+                    LOGGER.info("Re-Authenticating due to download failure: %r", info)
+                    self._authenticator.authenticate(self._session)
+                else:
+                    download_successful = True
+                    break
+            except IOError as e:
+                PRETTY.warning(f"I/O Error when downloading ({e}). Retrying...",)
+            LOGGER.info("Retrying download for %s", info.path)
+
+        if not download_successful:
+            PRETTY.error(f"Download of file {info.path} failed too often! Skipping it...")
+            return
 
         dst_path = self._organizer.accept_file(tmp_file, info.path)
         if dst_path and info.modification_date:

From 0b606f02fa1a791eb4f19f5809452624f0c89aaa Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 17 Jan 2021 10:33:10 +0100
Subject: [PATCH 046/524] Bump version

---
 README.md | 6 +++---
 setup.py  | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 2b760e0..ed92500 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ Ensure that you have at least Python 3.8 installed.
 To install PFERD or update your installation to the latest version, run this
 wherever you want to install or have already installed PFERD:
 ```
-$ pip install git+https://github.com/Garmelon/PFERD@v2.5.2
+$ pip install git+https://github.com/Garmelon/PFERD@v2.5.3
 ```
 
 The use of [venv] is recommended.
@@ -60,8 +60,8 @@ $ mkdir Vorlesungen
 $ cd Vorlesungen
 $ python3 -m venv .venv
 $ .venv/bin/activate
-$ pip install git+https://github.com/Garmelon/PFERD@v2.5.2
-$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.5.2/example_config.py
+$ pip install git+https://github.com/Garmelon/PFERD@v2.5.3
+$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.5.3/example_config.py
 $ python3 example_config.py
 $ deactivate
 ```
diff --git a/setup.py b/setup.py
index 8d672a4..78f82be 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@ from setuptools import find_packages, setup
 
 setup(
     name="PFERD",
-    version="2.5.2",
+    version="2.5.3",
     packages=find_packages(),
     install_requires=[
         "requests>=2.21.0",

From 35c3fa205ddd44ac8ccbd989155b6a0074323169 Mon Sep 17 00:00:00 2001
From: Toorero <toorero6@gmail.com>
Date: Thu, 28 Jan 2021 21:24:09 +0100
Subject: [PATCH 047/524] Fixed description of activating venv (#22)

Add 'source' to the venv activate command in the readme

`source` was picked over `.` to conform to the python recommendation
(https://docs.python.org/3/library/venv.html#module-venv).

This patch also adds the `egg-info` you get when building to the
gitignore.
---
 .gitignore | 1 +
 README.md  | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index fbb852b..a5f87ba 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,7 @@ build/
 .env
 .vscode
 ilias_cookies.txt
+PFERD.egg-info/
 
 # PyInstaller
 sync_url.spec
diff --git a/README.md b/README.md
index ed92500..44138db 100644
--- a/README.md
+++ b/README.md
@@ -59,7 +59,7 @@ A full example setup and initial use could look like:
 $ mkdir Vorlesungen
 $ cd Vorlesungen
 $ python3 -m venv .venv
-$ .venv/bin/activate
+$ source .venv/bin/activate
 $ pip install git+https://github.com/Garmelon/PFERD@v2.5.3
 $ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.5.3/example_config.py
 $ python3 example_config.py
@@ -69,7 +69,7 @@ $ deactivate
 Subsequent runs of the program might look like:
 ```
 $ cd Vorlesungen
-$ .venv/bin/activate
+$ source .venv/bin/activate
 $ python3 example_config.py
 $ deactivate
 ```

From 83b75e8254d5ed36a629ac35e901772c66066691 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 6 Feb 2021 22:51:08 +0100
Subject: [PATCH 048/524] syncurl: Sanitize element name on windows if it is
 used as folder name

Otherwise the name of the course might not be a invalid file name.
---
 sync_url.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sync_url.py b/sync_url.py
index c6db255..ab079c3 100755
--- a/sync_url.py
+++ b/sync_url.py
@@ -113,7 +113,7 @@ def main() -> None:
         if not element_name:
             print("Error, could not get element name. Please specify a folder yourself.")
             return
-        folder = Path(element_name)
+        folder = sanitize_windows_path(Path(element_name.replace("/", "-").replace("\\", "-")))
         cookie_jar.save_cookies()
     else:
         folder = Path(args.folder)

From 9a9018751ec3dcbd62a4334cca906d82422909ba Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 6 Feb 2021 22:54:05 +0100
Subject: [PATCH 049/524] Bump version

---
 README.md | 6 +++---
 setup.py  | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 44138db..221e8c4 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ Ensure that you have at least Python 3.8 installed.
 To install PFERD or update your installation to the latest version, run this
 wherever you want to install or have already installed PFERD:
 ```
-$ pip install git+https://github.com/Garmelon/PFERD@v2.5.3
+$ pip install git+https://github.com/Garmelon/PFERD@v2.5.4
 ```
 
 The use of [venv] is recommended.
@@ -60,8 +60,8 @@ $ mkdir Vorlesungen
 $ cd Vorlesungen
 $ python3 -m venv .venv
 $ source .venv/bin/activate
-$ pip install git+https://github.com/Garmelon/PFERD@v2.5.3
-$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.5.3/example_config.py
+$ pip install git+https://github.com/Garmelon/PFERD@v2.5.4
+$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.5.4/example_config.py
 $ python3 example_config.py
 $ deactivate
 ```
diff --git a/setup.py b/setup.py
index 78f82be..70a9107 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@ from setuptools import find_packages, setup
 
 setup(
     name="PFERD",
-    version="2.5.3",
+    version="2.5.4",
     packages=find_packages(),
     install_requires=[
         "requests>=2.21.0",

From 946b7a7931c8dc5c70edbc86e45d5d8e96b638a4 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 9 Feb 2021 12:30:59 +0100
Subject: [PATCH 050/524] Also crawl .c/.java/.zip from IPD page

---
 PFERD/ipd.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/PFERD/ipd.py b/PFERD/ipd.py
index d602e0e..ece6a97 100644
--- a/PFERD/ipd.py
+++ b/PFERD/ipd.py
@@ -82,7 +82,10 @@ class IpdCrawler:
 
         items: List[IpdDownloadInfo] = []
 
-        for link in page.findAll(name="a", attrs={"href": lambda x: x and x.endswith("pdf")}):
+        def is_relevant_url(x: str) -> bool:
+            return x.endswith(".pdf") or x.endswith(".c") or x.endswith(".java") or x.endswith(".zip")
+
+        for link in page.findAll(name="a", attrs={"href": lambda x: x and is_relevant_url(x)}):
             href: str = link.attrs.get("href")
             name = href.split("/")[-1]
 

From e2bf84392bcbe89ae60d771705af014495343b27 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 8 Apr 2021 18:12:27 +0200
Subject: [PATCH 051/524] [sync_url] Properly declare "no-videos" as flag

---
 sync_url.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sync_url.py b/sync_url.py
index ab079c3..dd88bf7 100755
--- a/sync_url.py
+++ b/sync_url.py
@@ -74,7 +74,7 @@ def main() -> None:
                         "one line in the following format: '<user>:<password>'")
     parser.add_argument("-k", "--keyring", action="store_true",
                         help="Use the system keyring service for authentication")
-    parser.add_argument('--no-videos', nargs='?', default=None, help="Don't download videos")
+    parser.add_argument('--no-videos', action="store_true", help="Don't download videos")
     parser.add_argument('--local-first', action="store_true",
                         help="Don't prompt for confirmation, keep existing files")
     parser.add_argument('--remote-first', action="store_true",

From 14cdfb6a690d55a0e83c028ec857c4aab7686d93 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 13 Apr 2021 11:19:51 +0200
Subject: [PATCH 052/524] Fix typo in date demangler doc

---
 PFERD/ilias/date_demangler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/ilias/date_demangler.py b/PFERD/ilias/date_demangler.py
index 9c1fc8d..2950d4d 100644
--- a/PFERD/ilias/date_demangler.py
+++ b/PFERD/ilias/date_demangler.py
@@ -20,7 +20,7 @@ def demangle_date(date: str) -> Optional[datetime.datetime]:
     "Gestern, HH:MM"
     "Heute, HH:MM"
     "Morgen, HH:MM"
-    "dd. mon.yyyy, HH:MM
+    "dd. mon yyyy, HH:MM
     """
     saved = locale.setlocale(locale.LC_ALL)
     try:

From 1f2af3a2909e1979d22834652e28662ba4db754b Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 13 Apr 2021 11:32:55 +0200
Subject: [PATCH 053/524] Retry on more I/O Errors

---
 PFERD/errors.py           | 18 ++++++++++++++++++
 PFERD/ilias/crawler.py    |  3 ++-
 PFERD/ilias/downloader.py | 24 +++++++++++-------------
 3 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/PFERD/errors.py b/PFERD/errors.py
index d003314..d960e13 100644
--- a/PFERD/errors.py
+++ b/PFERD/errors.py
@@ -37,3 +37,21 @@ def swallow_and_print_errors(function: TFun) -> TFun:
             Console().print_exception()
             return None
     return cast(TFun, inner)
+
+
+def retry_on_io_exception(max_retries: int, message: str) -> Callable[[TFun], TFun]:
+    """
+    Decorates a function and retries it on any exception until the max retries count is hit.
+    """
+    def retry(function: TFun) -> TFun:
+        def inner(*args: Any, **kwargs: Any) -> Any:
+            for i in range(0, max_retries):
+                # pylint: disable=broad-except
+                try:
+                    return function(*args, **kwargs)
+                except IOError as error:
+                    PRETTY.warning(f"Error duing operation '{message}': {error}")
+                    PRETTY.warning(
+                        f"Retrying operation '{message}'. Remaining retries: {max_retries - 1 - i}")
+        return cast(TFun, inner)
+    return retry
diff --git a/PFERD/ilias/crawler.py b/PFERD/ilias/crawler.py
index 93b626e..edab284 100644
--- a/PFERD/ilias/crawler.py
+++ b/PFERD/ilias/crawler.py
@@ -15,7 +15,7 @@ from urllib.parse import (parse_qs, urlencode, urljoin, urlparse, urlsplit,
 import bs4
 import requests
 
-from ..errors import FatalException
+from ..errors import FatalException, retry_on_io_exception
 from ..logging import PrettyLogger
 from ..utils import soupify
 from .authenticators import IliasAuthenticator
@@ -625,6 +625,7 @@ class IliasCrawler:
 
         return results
 
+    @retry_on_io_exception(3, "fetching webpage")
     def _get_page(self, url: str, params: Dict[str, Any],
                   retry_count: int = 0) -> bs4.BeautifulSoup:
         """
diff --git a/PFERD/ilias/downloader.py b/PFERD/ilias/downloader.py
index 26e1f2d..f6132bf 100644
--- a/PFERD/ilias/downloader.py
+++ b/PFERD/ilias/downloader.py
@@ -10,6 +10,7 @@ from typing import Callable, List, Optional, Union
 import bs4
 import requests
 
+from ..errors import retry_on_io_exception
 from ..logging import PrettyLogger
 from ..organizer import Organizer
 from ..tmp_dir import TmpDir
@@ -116,26 +117,23 @@ class IliasDownloader:
         """
 
         LOGGER.debug("Downloading %r", info)
+
         if not self._strategy(self._organizer, info):
             self._organizer.mark(info.path)
             return
 
         tmp_file = self._tmp_dir.new_path()
 
-        download_successful = False
-        for _ in range(0, 3):
-            try:
-                if not self._try_download(info, tmp_file):
-                    LOGGER.info("Re-Authenticating due to download failure: %r", info)
-                    self._authenticator.authenticate(self._session)
-                else:
-                    download_successful = True
-                    break
-            except IOError as e:
-                PRETTY.warning(f"I/O Error when downloading ({e}). Retrying...",)
-            LOGGER.info("Retrying download for %s", info.path)
+        @retry_on_io_exception(3, "downloading file")
+        def download_impl() -> bool:
+            if not self._try_download(info, tmp_file):
+                LOGGER.info("Re-Authenticating due to download failure: %r", info)
+                self._authenticator.authenticate(self._session)
+                raise IOError("Scheduled retry")
+            else:
+                return True
 
-        if not download_successful:
+        if not download_impl():
             PRETTY.error(f"Download of file {info.path} failed too often! Skipping it...")
             return
 

From 4f480d117e11f9fc2c04e7674efb77b0899619b6 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 14 Apr 2021 19:24:05 +0200
Subject: [PATCH 054/524] Install keyring in CI

---
 .github/workflows/package.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/package.yml b/.github/workflows/package.yml
index 1c0c353..615917b 100644
--- a/.github/workflows/package.yml
+++ b/.github/workflows/package.yml
@@ -23,7 +23,7 @@ jobs:
         python-version: '3.x'
 
     - name: "Install dependencies"
-      run: "pip install setuptools pyinstaller rich requests beautifulsoup4 -f --upgrade"
+      run: "pip install setuptools keyring pyinstaller rich requests beautifulsoup4 -f --upgrade"
 
     - name: "Install sync_url.py"
       run: "pyinstaller sync_url.py -F"

From 80ae5ddfaa87f3d5c7fe54f656c0083b9d818f82 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 14 Apr 2021 19:47:41 +0200
Subject: [PATCH 055/524] Bump version to v2.6.0

---
 README.md | 6 +++---
 setup.py  | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 221e8c4..b01bbc9 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ Ensure that you have at least Python 3.8 installed.
 To install PFERD or update your installation to the latest version, run this
 wherever you want to install or have already installed PFERD:
 ```
-$ pip install git+https://github.com/Garmelon/PFERD@v2.5.4
+$ pip install git+https://github.com/Garmelon/PFERD@v2.6.0
 ```
 
 The use of [venv] is recommended.
@@ -60,8 +60,8 @@ $ mkdir Vorlesungen
 $ cd Vorlesungen
 $ python3 -m venv .venv
 $ source .venv/bin/activate
-$ pip install git+https://github.com/Garmelon/PFERD@v2.5.4
-$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.5.4/example_config.py
+$ pip install git+https://github.com/Garmelon/PFERD@v2.6.0
+$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.6.0/example_config.py
 $ python3 example_config.py
 $ deactivate
 ```
diff --git a/setup.py b/setup.py
index 70a9107..bdb7754 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@ from setuptools import find_packages, setup
 
 setup(
     name="PFERD",
-    version="2.5.4",
+    version="2.6.0",
     packages=find_packages(),
     install_requires=[
         "requests>=2.21.0",

From 7cc40595dc0fcd4e05a48f5ce8ba7d77f322a284 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 14 Apr 2021 20:25:25 +0200
Subject: [PATCH 056/524] Allow synchronizing to directory "."

---
 sync_url.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sync_url.py b/sync_url.py
index dd88bf7..06a94b3 100755
--- a/sync_url.py
+++ b/sync_url.py
@@ -121,7 +121,7 @@ def main() -> None:
     # files may not escape the pferd_root with relative paths
     # note: Path(Path.cwd, Path(folder)) == Path(folder) if it is an absolute path
     pferd_root = Path(Path.cwd(), Path(folder)).parent
-    target = folder.name
+    target = folder.resolve().name
     pferd = Pferd(pferd_root, test_run=args.test_run)
 
     def dir_filter(_: Path, element: IliasElementType) -> bool:

From 6d5d9333ad7f8aed4fdce2203134989beb883df9 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 19 Apr 2021 11:07:25 +0200
Subject: [PATCH 057/524] Force folder to be file-system path

---
 sync_url.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sync_url.py b/sync_url.py
index 06a94b3..ca78de0 100755
--- a/sync_url.py
+++ b/sync_url.py
@@ -121,7 +121,8 @@ def main() -> None:
     # files may not escape the pferd_root with relative paths
     # note: Path(Path.cwd, Path(folder)) == Path(folder) if it is an absolute path
     pferd_root = Path(Path.cwd(), Path(folder)).parent
-    target = folder.resolve().name
+    # Folder might be a *PurePath* at this point
+    target = Path(folder).resolve().name
     pferd = Pferd(pferd_root, test_run=args.test_run)
 
     def dir_filter(_: Path, element: IliasElementType) -> bool:

From 29cd5d1a3c4cab259636d6ab1e42f38c7a718792 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 19 Apr 2021 11:10:02 +0200
Subject: [PATCH 058/524] Reflect totality of sanitize_windows_path in return
 type

---
 PFERD/transform.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/transform.py b/PFERD/transform.py
index 7a05dd1..a2152ba 100644
--- a/PFERD/transform.py
+++ b/PFERD/transform.py
@@ -130,7 +130,7 @@ def re_rename(regex: Regex, target: str) -> Transform:
     return inner
 
 
-def sanitize_windows_path(path: PurePath) -> Optional[PurePath]:
+def sanitize_windows_path(path: PurePath) -> PurePath:
     """
     A small function to escape characters that are forbidden in windows path names.
     This method is a no-op on other operating systems.

From c1ab7485e248c9bbcfa40405ba9a2dd1713784d1 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 19 Apr 2021 11:21:56 +0200
Subject: [PATCH 059/524] Bump version to 2.6.1

---
 README.md | 6 +++---
 setup.py  | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index b01bbc9..178fbac 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ Ensure that you have at least Python 3.8 installed.
 To install PFERD or update your installation to the latest version, run this
 wherever you want to install or have already installed PFERD:
 ```
-$ pip install git+https://github.com/Garmelon/PFERD@v2.6.0
+$ pip install git+https://github.com/Garmelon/PFERD@v2.6.1
 ```
 
 The use of [venv] is recommended.
@@ -60,8 +60,8 @@ $ mkdir Vorlesungen
 $ cd Vorlesungen
 $ python3 -m venv .venv
 $ source .venv/bin/activate
-$ pip install git+https://github.com/Garmelon/PFERD@v2.6.0
-$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.6.0/example_config.py
+$ pip install git+https://github.com/Garmelon/PFERD@v2.6.1
+$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.6.1/example_config.py
 $ python3 example_config.py
 $ deactivate
 ```
diff --git a/setup.py b/setup.py
index bdb7754..a4dfab3 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@ from setuptools import find_packages, setup
 
 setup(
     name="PFERD",
-    version="2.6.0",
+    version="2.6.1",
     packages=find_packages(),
     install_requires=[
         "requests>=2.21.0",

From 27e4abcfa32309eb3dd61ce20a4d65fa22a477a1 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Mon, 26 Apr 2021 23:46:44 +0200
Subject: [PATCH 060/524] Do project setup from scratch

Following guidelines from the Python Packaging User Guide [1].

This commit intentionally breaks the .gitignore, project dependencies, GitHub
Actions and other stuff. It also removes almost the entire README. The intention
behind this is to get rid of all cruft that as accumulated over time and to have
a fresh start. Only necessary things will be re-added as they're needed.

From now on, I also plan on adding documentation for every feature at the same
time that the feature is implemented. This is to ensure that the documentation
does not become outdated.

[1]: https://packaging.python.org/
---
 .github/workflows/package.yml      |  74 ---------
 .gitignore                         |  17 +-
 DEV.md                             |  37 +++++
 README.md                          | 251 +----------------------------
 example_config.py                  | 131 ---------------
 example_config_personal_desktop.py |  38 -----
 pyproject.toml                     |   3 +
 requirements.txt                   |   4 -
 setup.cfg                          |   7 +
 setup.py                           |  17 --
 10 files changed, 56 insertions(+), 523 deletions(-)
 delete mode 100644 .github/workflows/package.yml
 create mode 100644 DEV.md
 delete mode 100644 example_config.py
 delete mode 100644 example_config_personal_desktop.py
 create mode 100644 pyproject.toml
 delete mode 100644 requirements.txt
 create mode 100644 setup.cfg
 delete mode 100644 setup.py

diff --git a/.github/workflows/package.yml b/.github/workflows/package.yml
deleted file mode 100644
index 615917b..0000000
--- a/.github/workflows/package.yml
+++ /dev/null
@@ -1,74 +0,0 @@
-name: Package Application with Pyinstaller
-
-on:
-  push:
-    branches:
-      - "*"
-    tags:
-      - "v*"
-
-jobs:
-  build:
-
-    runs-on: ${{ matrix.os }}
-    strategy:
-      matrix:
-        os: [ubuntu-latest, windows-latest, macos-latest]
-
-    steps:
-    - uses: actions/checkout@v2
-
-    - uses: actions/setup-python@v2
-      with:
-        python-version: '3.x'
-
-    - name: "Install dependencies"
-      run: "pip install setuptools keyring pyinstaller rich requests beautifulsoup4 -f --upgrade"
-
-    - name: "Install sync_url.py"
-      run: "pyinstaller sync_url.py -F"
-
-    - name: "Move artifact"
-      run: "mv dist/sync_url* dist/sync_url-${{ matrix.os }}"
-
-    - uses: actions/upload-artifact@v2
-      with:
-        name: "Pferd Sync URL"
-        path: "dist/sync_url*"
-
-  release:
-    name: Release
-
-    needs: [build]
-    runs-on: ubuntu-latest
-    if: startsWith(github.ref, 'refs/tags/')
-
-    env:
-      GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
-    steps:
-    - name: "Checkout"
-      uses: actions/checkout@v2
-
-    - name: "Download artifacts"
-      uses: actions/download-artifact@v2
-      with:
-        name: "Pferd Sync URL"
-
-    - name: "look at folder structure"
-      run: "ls -lah"
-
-    - name: "Rename releases"
-      run: "mv sync_url-macos-latest pferd_sync_url_mac && mv sync_url-ubuntu-latest pferd_sync_url_linux && mv sync_url-windows-latest pferd_sync_url.exe"
-
-    - name: "Create release"
-      uses: softprops/action-gh-release@v1
-
-    - name: "Upload release artifacts"
-      uses: softprops/action-gh-release@v1
-      with:
-        body: "Download the correct sync_url for your platform and run it in the terminal or CMD. You might need to make it executable on Linux/Mac with `chmod +x <file>`. Also please enclose the *url you pass to the program in double quotes* or your shell might silently screw it up!"
-        files: |
-          pferd_sync_url_mac
-          pferd_sync_url_linux
-          pferd_sync_url.exe
diff --git a/.gitignore b/.gitignore
index a5f87ba..bd8bab9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,15 +1,2 @@
-__pycache__/
-.venv/
-venv/
-.idea/
-build/
-.mypy_cache/
-.tmp/
-.env
-.vscode
-ilias_cookies.txt
-PFERD.egg-info/
-
-# PyInstaller
-sync_url.spec
-dist/
+/.mypy_cache/
+/.venv/
diff --git a/DEV.md b/DEV.md
new file mode 100644
index 0000000..a679b4a
--- /dev/null
+++ b/DEV.md
@@ -0,0 +1,37 @@
+# PFERD Development Guide
+
+PFERD is packaged following the [Python Packaging User Guide][ppug] (in
+particular [this][ppug-1] and [this][ppug-2] guide).
+
+[ppug]: <https://packaging.python.org/> "Python Packaging User Guide"
+[ppug-1]: <https://packaging.python.org/tutorials/packaging-projects/> "Packaging Python Projects"
+[ppug-2]: <https://packaging.python.org/guides/distributing-packages-using-setuptools/> "Packaging and distributing projects"
+
+## Setting up a dev environment
+
+The use of [venv][venv] is recommended. To initially set up a development
+environment, run these commands in the same directory as this file:
+
+```
+$ python -m venv .venv
+$ . .venv/bin/activate
+$ pip install --editable .
+```
+
+After this, you can use PFERD as if it was installed normally. Since PFERD was
+installed with `--editable`, there is no need to re-run `pip install` when the
+source code is changed.
+
+For more details, see [this part of the Python Tutorial][venv-tut] and
+[this section on "development mode"][ppug-dev].
+
+[venv]: <https://docs.python.org/3/library/venv.html> "venv - Creation of virtual environments"
+[venv-tut]: <https://docs.python.org/3/tutorial/venv.html> "12. Virtual Environments and Packages"
+[ppug-dev]: <https://packaging.python.org/guides/distributing-packages-using-setuptools/#working-in-development-mode> "Working in “development mode”"
+
+## Contributing
+
+When submitting a PR that adds, changes or modifies a feature, please ensure
+that the corresponding documentation is updated.
+
+In your first PR, please add your name to the `LICENSE` file.
diff --git a/README.md b/README.md
index 178fbac..5b74de5 100644
--- a/README.md
+++ b/README.md
@@ -2,254 +2,17 @@
 
 **P**rogramm zum **F**lotten, **E**infachen **R**unterladen von **D**ateien
 
-- [Quickstart with `sync_url`](#quickstart-with-sync_url)
-- [Installation](#installation)
-    - [Upgrading from 2.0.0 to 2.1.0+](#upgrading-from-200-to-210)
-- [Example setup](#example-setup)
-- [Usage](#usage)
-    - [General concepts](#general-concepts)
-    - [Constructing transforms](#constructing-transforms)
-        - [Transform creators](#transform-creators)
-        - [Transform combinators](#transform-combinators)
-    - [A short, but commented example](#a-short-but-commented-example)
+Other resources:
 
-## Quickstart with `sync_url`
+- [Development Guide](DEV.md)
 
-The `sync_url` program allows you to just synchronize a given ILIAS URL (of a
-course, a folder, your personal desktop, etc.) without any extra configuration
-or setting up. Download the program, open ILIAS, copy the URL from the address
-bar and pass it to sync_url.
+## Installation with pip
 
-It bundles everything it needs in one executable and is easy to
-use, but doesn't expose all the configuration options and tweaks a full install
-does.
+Ensure you have at least Python 3.8 installed. Run the following command to
+install PFERD or upgrade it to the latest version:
 
-1. Download the `sync_url` binary from the [latest release](https://github.com/Garmelon/PFERD/releases/latest).
-2. Recognize that you most likely need to enclose the URL in `""` quotes to prevent your shell from interpreting `&` and other symbols
-3. Run the binary in your terminal (`./sync_url` or `sync_url.exe` in the CMD) to see the help and use it. I'd recommend using the `--cookies` option.  
-  If you are on **Linux/Mac**, you need to *make the file executable* using `chmod +x <file>`.  
-  If you are on **Mac**, you need to allow this unverified program to run (see e.g. [here](https://www.switchingtomac.com/tutorials/osx/how-to-run-unverified-apps-on-macos/))
-
-## Installation
-
-Ensure that you have at least Python 3.8 installed.
-
-To install PFERD or update your installation to the latest version, run this
-wherever you want to install or have already installed PFERD:
 ```
-$ pip install git+https://github.com/Garmelon/PFERD@v2.6.1
+$ pip install --upgrade git+https://github.com/Garmelon/PFERD@latest
 ```
 
-The use of [venv] is recommended.
-
-[venv]: https://docs.python.org/3/library/venv.html
-
-### Upgrading from 2.0.0 to 2.1.0+
-
-- The `IliasDirectoryType` type was renamed to `IliasElementType` and is now far more detailed.
-  The new values are: `REGULAR_FOLDER`, `VIDEO_FOLDER`, `EXERCISE_FOLDER`, `REGULAR_FILE`, `VIDEO_FILE`, `FORUM`, `EXTERNAL_LINK`.
-- Forums and external links are skipped automatically if you use the `kit_ilias` helper.
-
-## Example setup
-
-In this example, `python3` refers to at least Python 3.8.
-
-A full example setup and initial use could look like:
-```
-$ mkdir Vorlesungen
-$ cd Vorlesungen
-$ python3 -m venv .venv
-$ source .venv/bin/activate
-$ pip install git+https://github.com/Garmelon/PFERD@v2.6.1
-$ curl -O https://raw.githubusercontent.com/Garmelon/PFERD/v2.6.1/example_config.py
-$ python3 example_config.py
-$ deactivate
-```
-
-Subsequent runs of the program might look like:
-```
-$ cd Vorlesungen
-$ source .venv/bin/activate
-$ python3 example_config.py
-$ deactivate
-```
-
-If you just want to get started and crawl *your entire ILIAS Desktop* instead
-of a given set of courses, please replace `example_config.py` with
-`example_config_personal_desktop.py` in all of the instructions below (`curl` call and
-`python3` run command).
-
-## Usage
-
-### General concepts
-
-A PFERD config is a normal python file that starts multiple *synchronizers*
-which do all the heavy lifting. While you can create and wire them up manually,
-you are encouraged to use the helper methods provided in `PFERD.Pferd`.
-
-The synchronizers take some input arguments specific to their service and a
-*transform*. The transform receives the computed path of an element in ILIAS and
-can return either an output path (so you can rename files or move them around as
-you wish) or `None` if you do not want to save the given file.
-
-Additionally the ILIAS synchronizer allows you to define a *crawl filter*. This
-filter also receives the computed path as the input, but is only called for
-*directories*. If you return `True`, the directory will be crawled and
-searched. If you return `False` the directory will be ignored and nothing in it
-will be passed to the transform.
-
-### Constructing transforms
-
-While transforms are just normal python functions, writing them by hand can
-quickly become tedious. In order to help you with writing your own transforms
-and filters, PFERD defines a few useful transform creators and combinators in
-the `PFERD.transform` module:
-
-#### Transform creators
-
-These methods let you create a few basic transform building blocks:
-
-- **`glob(glob)`**  
-  Creates a transform that returns the unchanged path if the glob matches the path and `None` otherwise.
-  See also [Path.match].  
-  Example: `glob("Übung/*.pdf")`
-- **`predicate(pred)`**  
-  Creates a transform that returns the unchanged path if `pred(path)` returns a truthy value.
-  Returns `None` otherwise.  
-  Example: `predicate(lambda path: len(path.parts) == 3)`
-- **`move_dir(source, target)`**  
-  Creates a transform that moves all files from the `source` to the `target` directory.  
-  Example: `move_dir("Übung/", "Blätter/")`
-- **`move(source, target)`**  
-  Creates a transform that moves the `source` file to `target`.  
-  Example: `move("Vorlesung/VL02_Automten.pdf", "Vorlesung/VL02_Automaten.pdf")`
-- **`rename(source, target)`**  
-  Creates a transform that renames all files named `source` to `target`.
-  This transform works on the file names, not paths, and thus works no matter where the file is located.  
-  Example: `rename("VL02_Automten.pdf", "VL02_Automaten.pdf")`
-- **`re_move(regex, target)`**  
-  Creates a transform that moves all files matching `regex` to `target`.
-  The transform `str.format` on the `target` string with the contents of the capturing groups before returning it.
-  The capturing groups can be accessed via their index.
-  See also [Match.group].  
-  Example: `re_move(r"Übung/Blatt (\d+)\.pdf", "Blätter/Blatt_{1:0>2}.pdf")`
-- **`re_rename(regex, target)`**  
-  Creates a transform that renames all files matching `regex` to `target`.
-  This transform works on the file names, not paths, and thus works no matter where the file is located.  
-  Example: `re_rename(r"VL(\d+)(.*)\.pdf", "Vorlesung_Nr_{1}__{2}.pdf")`
-
-All movement or rename transforms above return `None` if a file doesn't match
-their movement or renaming criteria. This enables them to be used as building
-blocks to build up more complex transforms.
-
-In addition, `PFERD.transform` also defines the `keep` transform which returns its input path unchanged.
-This behaviour can be very useful when creating more complex transforms.
-See below for example usage.
-
-[Path.match]: https://docs.python.org/3/library/pathlib.html#pathlib.Path.match
-[Match.group]: https://docs.python.org/3/library/re.html#re.Match.group
-
-#### Transform combinators
-
-These methods let you combine transforms into more complex transforms:
-
-- **`optionally(transform)`**  
-  Wraps a given transform and returns its result if it is not `None`.
-  Otherwise returns the input path unchanged.
-  See below for example usage.
-* **`do(transforms)`**  
-  Accepts a series of transforms and applies them in the given order to the result of the previous one.
-  If any transform returns `None`, `do` short-circuits and also returns `None`.
-  This can be used to perform multiple renames in a row:
-  ```py
-  do(
-      # Move them
-      move_dir("Vorlesungsmaterial/Vorlesungsvideos/", "Vorlesung/Videos/"),
-      # Fix extensions (if they have any)
-      optionally(re_rename("(.*).m4v.mp4", "{1}.mp4")),
-      # Remove the 'dbs' prefix (if they have any)
-      optionally(re_rename("(?i)dbs-(.+)", "{1}")),
-  )
-  ```
-- **`attempt(transforms)`**  
-  Applies the passed transforms in the given order until it finds one that does not return `None`.
-  If it does not find any, it returns `None`.
-  This can be used to give a list of possible transformations and automatically pick the first one that fits:
-  ```py
-  attempt(
-      # Move all videos. If a video is passed in, this `re_move` will succeed
-      # and attempt short-circuits with the result.
-      re_move(r"Vorlesungsmaterial/.*/(.+?)\.mp4", "Vorlesung/Videos/{1}.mp4"),
-      # Move the whole folder to a nicer name - now without any mp4!
-      move_dir("Vorlesungsmaterial/", "Vorlesung/"),
-      # If we got another file, keep it.
-      keep,
-  )
-  ```
-
-All of these combinators are used in the provided example configs, if you want
-to see some more real-life usages.
-
-### A short, but commented example
-
-```py
-from pathlib import Path, PurePath
-from PFERD import Pferd
-from PFERD.ilias import IliasElementType
-from PFERD.transform import *
-
-# This filter will later be used by the ILIAS crawler to decide whether it
-# should crawl a directory (or directory-like structure).
-def filter_course(path: PurePath, type: IliasElementType) -> bool:
-    # Note that glob returns a Transform, which is a function from PurePath ->
-    # Optional[PurePath]. Because of this, we need to apply the result of
-    # 'glob' to our input path. The returned value will be truthy (a Path) if
-    # the transform succeeded, or `None` if it failed.
-
-    # We need to crawl the 'Tutorien' folder as it contains one that we want.
-    if glob("Tutorien/")(path):
-        return True
-    # If we found 'Tutorium 10', keep it!
-    if glob("Tutorien/Tutorium 10")(path):
-        return True
-    # Discard all other folders inside 'Tutorien'
-    if glob("Tutorien/*")(path):
-        return False
-
-    # All other dirs (including subdirs of 'Tutorium 10') should be searched :)
-    return True
-
-
-# This transform will later be used to rename a few files. It can also be used
-# to ignore some files.
-transform_course = attempt(
-    # We don't care about the other tuts and would instead prefer a cleaner
-    # directory structure.
-    move_dir("Tutorien/Tutorium 10/", "Tutorium/"),
-    # We don't want to modify any other files, so we're going to keep them
-    # exactly as they are.
-    keep
-)
-
-# Enable and configure the text output. Needs to be called before calling any
-# other PFERD methods.
-Pferd.enable_logging()
-# Create a Pferd instance rooted in the same directory as the script file. This
-# is not a test run, so files will be downloaded (default, can be omitted).
-pferd = Pferd(Path(__file__).parent, test_run=False)
-
-# Use the ilias_kit helper to synchronize an ILIAS course
-pferd.ilias_kit(
-    # The directory that all of the downloaded files should be placed in
-    "My_cool_course/",
-    # The course ID (found in the URL when on the course page in ILIAS)
-    "course id",
-    # A path to a cookie jar. If you synchronize multiple ILIAS courses,
-    # setting this to a common value requires you to only log in once.
-    cookies=Path("ilias_cookies.txt"),
-    # A transform can rename, move or filter out certain files
-    transform=transform_course,
-    # A crawl filter limits what paths the cralwer searches
-    dir_filter=filter_course,
-)
-```
+The use of [venv](https://docs.python.org/3/library/venv.html) is recommended.
diff --git a/example_config.py b/example_config.py
deleted file mode 100644
index bffecfb..0000000
--- a/example_config.py
+++ /dev/null
@@ -1,131 +0,0 @@
-import argparse
-from pathlib import Path, PurePath
-
-from PFERD import Pferd
-from PFERD.ilias import IliasElementType
-from PFERD.transform import (attempt, do, glob, keep, move, move_dir,
-                             optionally, re_move, re_rename)
-
-tf_ss_2020_numerik = attempt(
-    re_move(r"Übungsblätter/(\d+)\. Übungsblatt/.*", "Blätter/Blatt_{1:0>2}.pdf"),
-    keep,
-)
-
-
-tf_ss_2020_db = attempt(
-    move_dir("Begrüßungsvideo/", "Vorlesung/Videos/"),
-    do(
-        move_dir("Vorlesungsmaterial/Vorlesungsvideos/", "Vorlesung/Videos/"),
-        optionally(re_rename("(.*).m4v.mp4", "{1}.mp4")),
-        optionally(re_rename("(?i)dbs-(.+)", "{1}")),
-    ),
-    move_dir("Vorlesungsmaterial/", "Vorlesung/"),
-    keep,
-)
-
-
-tf_ss_2020_rechnernetze = attempt(
-    re_move(r"Vorlesungsmaterial/.*/(.+?)\.mp4", "Vorlesung/Videos/{1}.mp4"),
-    move_dir("Vorlesungsmaterial/", "Vorlesung/"),
-    keep,
-)
-
-
-tf_ss_2020_sicherheit = attempt(
-    move_dir("Vorlesungsvideos/", "Vorlesung/Videos/"),
-    move_dir("Übungsvideos/", "Übung/Videos/"),
-    re_move(r"VL(.*)\.pdf", "Vorlesung/{1}.pdf"),
-    re_move(r"Übungsblatt (\d+)\.pdf", "Blätter/Blatt_{1:0>2}.pdf"),
-    move("Chiffrat.txt", "Blätter/Blatt_01_Chiffrat.txt"),
-    keep,
-)
-
-
-tf_ss_2020_pg = attempt(
-    move_dir("Vorlesungsaufzeichnungen/", "Vorlesung/Videos/"),
-    move_dir("Vorlesungsmaterial/", "Vorlesung/"),
-    re_move(r"Übungen/uebungsblatt(\d+).pdf", "Blätter/Blatt_{1:0>2}.pdf"),
-    keep,
-)
-
-
-def df_ss_2020_or1(path: PurePath, _type: IliasElementType) -> bool:
-    if glob("Tutorien/")(path):
-        return True
-    if glob("Tutorien/Tutorium 10, dienstags 15:45 Uhr/")(path):
-        return True
-    if glob("Tutorien/*")(path):
-        return False
-    return True
-
-
-tf_ss_2020_or1 = attempt(
-    move_dir("Vorlesung/Unbeschriebene Folien/", "Vorlesung/Folien/"),
-    move_dir("Video zur Organisation/", "Vorlesung/Videos/"),
-    keep,
-)
-
-
-def main() -> None:
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--test-run", action="store_true")
-    parser.add_argument("synchronizers", nargs="*")
-    args = parser.parse_args()
-
-    pferd = Pferd(Path(__file__).parent, test_run=args.test_run)
-    pferd.enable_logging()
-
-    if not args.synchronizers or "numerik" in args.synchronizers:
-        pferd.ilias_kit(
-            target="Numerik",
-            course_id="1083036",
-            transform=tf_ss_2020_numerik,
-            cookies="ilias_cookies.txt",
-        )
-
-    if not args.synchronizers or "db" in args.synchronizers:
-        pferd.ilias_kit(
-            target="DB",
-            course_id="1101554",
-            transform=tf_ss_2020_db,
-            cookies="ilias_cookies.txt",
-        )
-
-    if not args.synchronizers or "rechnernetze" in args.synchronizers:
-        pferd.ilias_kit(
-            target="Rechnernetze",
-            course_id="1099996",
-            transform=tf_ss_2020_rechnernetze,
-            cookies="ilias_cookies.txt",
-        )
-
-    if not args.synchronizers or "sicherheit" in args.synchronizers:
-        pferd.ilias_kit(
-            target="Sicherheit",
-            course_id="1101980",
-            transform=tf_ss_2020_sicherheit,
-            cookies="ilias_cookies.txt",
-        )
-
-    if not args.synchronizers or "pg" in args.synchronizers:
-        pferd.ilias_kit(
-            target="PG",
-            course_id="1106095",
-            transform=tf_ss_2020_pg,
-            cookies="ilias_cookies.txt",
-        )
-
-    if not args.synchronizers or "or1" in args.synchronizers:
-        pferd.ilias_kit(
-            target="OR1",
-            course_id="1105941",
-            dir_filter=df_ss_2020_or1,
-            transform=tf_ss_2020_or1,
-            cookies="ilias_cookies.txt",
-        )
-
-    # Prints a summary listing all new, modified or deleted files
-    pferd.print_summary()
-
-if __name__ == "__main__":
-    main()
diff --git a/example_config_personal_desktop.py b/example_config_personal_desktop.py
deleted file mode 100644
index 8d481b4..0000000
--- a/example_config_personal_desktop.py
+++ /dev/null
@@ -1,38 +0,0 @@
-"""
-This is a small config that just crawls the ILIAS Personal Desktop.
-It does not filter or rename anything, it just gobbles up everything it can find.
-
-Note that this still includes a test-run switch, so you can see what it *would* download.
-You can enable that with the "--test-run" command line switch,
-i. e. "python3 example_config_minimal.py --test-run".
-"""
-
-import argparse
-from pathlib import Path
-
-from PFERD import Pferd
-
-
-def main() -> None:
-    # Parse command line arguments
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--test-run", action="store_true")
-    args = parser.parse_args()
-
-    # Create the Pferd helper instance
-    pferd = Pferd(Path(__file__).parent, test_run=args.test_run)
-    pferd.enable_logging()
-
-    # Synchronize the personal desktop into the "ILIAS" directory.
-    # It saves the cookies, so you only need to log in again when the ILIAS cookies expire.
-    pferd.ilias_kit_personal_desktop(
-        "ILIAS",
-        cookies="ilias_cookies.txt",
-    )
-
-    # Prints a summary listing all new, modified or deleted files
-    pferd.print_summary()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..9787c3b
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,3 @@
+[build-system]
+requires = ["setuptools", "wheel"]
+build-backend = "setuptools.build_meta"
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 2d852e1..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-requests>=2.21.0
-beautifulsoup4>=4.7.1
-rich>=2.1.0
-keyring>=21.5.0
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..6d01c03
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,7 @@
+[metadata]
+name = PFERD
+version = 3.0.0
+
+[options]
+packages = PFERD
+python_requires = >=3.8
diff --git a/setup.py b/setup.py
deleted file mode 100644
index a4dfab3..0000000
--- a/setup.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from setuptools import find_packages, setup
-
-setup(
-    name="PFERD",
-    version="2.6.1",
-    packages=find_packages(),
-    install_requires=[
-        "requests>=2.21.0",
-        "beautifulsoup4>=4.7.1",
-        "rich>=2.1.0",
-        "keyring>=21.5.0"
-    ],
-)
-
-# When updating the version, also:
-# - update the README.md installation instructions
-# - set a tag on the update commit

From 5595a908d883f1f3da5a14790017085002e5d3e4 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 27 Apr 2021 00:29:42 +0200
Subject: [PATCH 061/524] Configure entry point

---
 .gitignore        |  2 ++
 PFERD/__init__.py | 10 ++--------
 setup.cfg         |  4 ++++
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/.gitignore b/.gitignore
index bd8bab9..9ee2832 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,4 @@
 /.mypy_cache/
 /.venv/
+/PFERD.egg-info/
+__pycache__/
diff --git a/PFERD/__init__.py b/PFERD/__init__.py
index 059f585..b657171 100644
--- a/PFERD/__init__.py
+++ b/PFERD/__init__.py
@@ -1,8 +1,2 @@
-# pylint: disable=invalid-name
-
-"""
-This module exports only what you need for a basic configuration. If you want a
-more complex configuration, you need to import the other submodules manually.
-"""
-
-from .pferd import Pferd
+def main() -> None:
+    print("Hello world")
diff --git a/setup.cfg b/setup.cfg
index 6d01c03..db60477 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -5,3 +5,7 @@ version = 3.0.0
 [options]
 packages = PFERD
 python_requires = >=3.8
+
+[options.entry_points]
+console_scripts =
+  pferd = PFERD:main

From fbebc46c580c02562f270d8bae23f28d2d8e540a Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 27 Apr 2021 12:41:49 +0200
Subject: [PATCH 062/524] Load and dump config

---
 .gitignore        |   2 +-
 PFERD/__init__.py |  40 +++++++++++++++++-
 PFERD/config.py   | 101 ++++++++++++++++++++++++++++++++++++++++++++++
 PFERD/utils.py    |  97 ++++++--------------------------------------
 4 files changed, 153 insertions(+), 87 deletions(-)
 create mode 100644 PFERD/config.py

diff --git a/.gitignore b/.gitignore
index 9ee2832..c888722 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,4 @@
-/.mypy_cache/
+.mypy_cache/
 /.venv/
 /PFERD.egg-info/
 __pycache__/
diff --git a/PFERD/__init__.py b/PFERD/__init__.py
index b657171..7b3a3c1 100644
--- a/PFERD/__init__.py
+++ b/PFERD/__init__.py
@@ -1,2 +1,40 @@
+import argparse
+from pathlib import Path
+
+from .config import Config, ConfigDumpException, ConfigLoadException
+
+
 def main() -> None:
-    print("Hello world")
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--config", "-c",
+        type=Path,
+        metavar="PATH",
+        help="specify custom config file path",
+    )
+    parser.add_argument(
+        "--dump-config",
+        nargs="?",
+        const=True,
+        type=Path,
+        metavar="PATH",
+        help="dump current configuration to a file and exit."
+        " Uses default config file path if no path is specified",
+    )
+    args = parser.parse_args()
+
+    try:
+        config_parser = Config.load_parser(args.config)
+        config = Config(config_parser)
+    except ConfigLoadException:
+        exit(1)
+
+    if args.dump_config:
+        path = None if args.dump_config is True else args.dump_config
+        try:
+            config.dump(path)
+        except ConfigDumpException:
+            exit(1)
+        exit()
+
+    print(config)
diff --git a/PFERD/config.py b/PFERD/config.py
new file mode 100644
index 0000000..d71e4d1
--- /dev/null
+++ b/PFERD/config.py
@@ -0,0 +1,101 @@
+import configparser
+import os
+from pathlib import Path
+from typing import Optional
+
+from .utils import prompt_yes_no
+
+
+class ConfigLoadException(Exception):
+    pass
+
+
+class ConfigDumpException(Exception):
+    pass
+
+
+class Config:
+    @staticmethod
+    def _default_path() -> Path:
+        if os.name == "posix":
+            return Path("~/.config/PFERD/pferd.cfg").expanduser()
+        elif os.name == "nt":
+            return Path("~/AppData/Roaming/PFERD/pferd.cfg").expanduser()
+        else:
+            return Path("~/.pferd.cfg").expanduser()
+
+    def __init__(self, parser: configparser.ConfigParser):
+        self._parser = parser
+        # TODO Load and validate config into dataclasses
+
+    @staticmethod
+    def _fail_load(path: Path, reason: str) -> None:
+        print(f"Failed to load config file at {path}")
+        print(f"Reason: {reason}")
+        raise ConfigLoadException()
+
+    @staticmethod
+    def load_parser(path: Optional[Path] = None) -> configparser.ConfigParser:
+        """
+        May throw a ConfigLoadException.
+        """
+
+        if not path:
+            path = Config._default_path()
+
+        parser = configparser.ConfigParser()
+
+        # Using config.read_file instead of config.read because config.read
+        # would just ignore a missing file and carry on.
+        try:
+            with open(path) as f:
+                parser.read_file(f, source=str(path))
+        except FileNotFoundError:
+            Config._fail_load(path, "File does not exist")
+        except IsADirectoryError:
+            Config._fail_load(path, "That's a directory, not a file")
+        except PermissionError:
+            Config._fail_load(path, "Insufficient permissions")
+
+        return parser
+
+    @staticmethod
+    def _fail_dump(path: Path, reason: str) -> None:
+        print(f"Failed to dump config file to {path}")
+        print(f"Reason: {reason}")
+        raise ConfigDumpException()
+
+    def dump(self, path: Optional[Path] = None) -> None:
+        """
+        May throw a ConfigDumpException.
+        """
+
+        if not path:
+            path = self._default_path()
+
+        print(f"Dumping config to {path}")
+
+        try:
+            path.parent.mkdir(parents=True, exist_ok=True)
+        except PermissionError:
+            self._fail_dump(path, "Could not create parent directory")
+
+        try:
+            # Ensuring we don't accidentally overwrite any existing files by
+            # always asking before overwriting a file.
+            try:
+                # x = open for exclusive creation, failing if the file already
+                # exists
+                with open(path, "x") as f:
+                    self._parser.write(f)
+            except FileExistsError:
+                print("That file already exists.")
+                if prompt_yes_no("Overwrite it?", default=False):
+                    with open(path, "w") as f:
+                        self._parser.write(f)
+                else:
+                    self._fail_dump(path, "File already exists")
+        except IsADirectoryError:
+            self._fail_dump(path, "That's a directory, not a file")
+        except PermissionError:
+            self._fail_dump(path, "Insufficient permissions")
diff --git a/PFERD/utils.py b/PFERD/utils.py
index 56c101a..4e1b5d7 100644
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -1,98 +1,25 @@
-"""
-A few utility bobs and bits.
-"""
-
-import re
-from pathlib import Path, PurePath
-from typing import Optional, Tuple, Union
-
-import bs4
-import requests
-
-from .progress import ProgressSettings, progress_for, size_from_headers
-
-PathLike = Union[PurePath, str, Tuple[str, ...]]
+from typing import Optional
 
 
-def to_path(pathlike: PathLike) -> Path:
+def prompt_yes_no(query: str, default: Optional[bool]) -> bool:
     """
-    Convert a given PathLike into a Path.
-    """
-    if isinstance(pathlike, tuple):
-        return Path(*pathlike)
-    return Path(pathlike)
-
-
-Regex = Union[str, re.Pattern]
-
-
-def to_pattern(regex: Regex) -> re.Pattern:
-    """
-    Convert a regex to a re.Pattern.
-    """
-    if isinstance(regex, re.Pattern):
-        return regex
-    return re.compile(regex)
-
-
-def soupify(response: requests.Response) -> bs4.BeautifulSoup:
-    """
-    Wrap a requests response in a bs4 object.
-    """
-
-    return bs4.BeautifulSoup(response.text, "html.parser")
-
-
-def stream_to_path(
-        response: requests.Response,
-        target: Path,
-        progress_name: Optional[str] = None,
-        chunk_size: int = 1024 ** 2
-) -> None:
-    """
-    Download a requests response content to a file by streaming it. This
-    function avoids excessive memory usage when downloading large files. The
-    chunk_size is in bytes.
-
-    If progress_name is None, no progress bar will be shown. Otherwise a progress
-    bar will appear, if the download is bigger than an internal threshold.
-    """
-
-    with response:
-        length = size_from_headers(response)
-        if progress_name and length and int(length) > 1024 * 1024 * 10:  # 10 MiB
-            settings: Optional[ProgressSettings] = ProgressSettings(progress_name, length)
-        else:
-            settings = None
-
-        with open(target, 'wb') as file_descriptor:
-            with progress_for(settings) as progress:
-                for chunk in response.iter_content(chunk_size=chunk_size):
-                    file_descriptor.write(chunk)
-                    progress.advance(len(chunk))
-
-
-def prompt_yes_no(question: str, default: Optional[bool] = None) -> bool:
-    """
-    Prompts the user a yes/no question and returns their choice.
+    Asks the user a yes/no question and returns their choice.
     """
 
     if default is True:
-        prompt = "[Y/n]"
+        query += " [Y/n] "
     elif default is False:
-        prompt = "[y/N]"
+        query += " [y/N] "
     else:
-        prompt = "[y/n]"
-
-    text = f"{question} {prompt} "
-    wrong_reply = "Please reply with 'yes'/'y' or 'no'/'n'."
+        query += " [y/n] "
 
     while True:
-        response = input(text).strip().lower()
-        if response in {"yes", "ye", "y"}:
+        response = input(query).strip().lower()
+        if response == "y":
             return True
-        if response in {"no", "n"}:
+        elif response == "n":
             return False
-        if response == "" and default is not None:
+        elif response == "" and default is not None:
             return default
-        print(wrong_reply)
+
+        print("Please answer with 'y' or 'n'.")

From 3a74c23d0991d1ae340f71ec5e3d1ad9fb359916 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 29 Apr 2021 09:51:25 +0200
Subject: [PATCH 063/524] Implement transformer

---
 PFERD/transformer.py | 238 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 238 insertions(+)
 create mode 100644 PFERD/transformer.py

diff --git a/PFERD/transformer.py b/PFERD/transformer.py
new file mode 100644
index 0000000..1ecaf19
--- /dev/null
+++ b/PFERD/transformer.py
@@ -0,0 +1,238 @@
+import re
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, Optional, Union
+
+
+class Rule(ABC):
+    @abstractmethod
+    def transform(self, path: Path) -> Optional[Path]:
+        pass
+
+
+class NormalRule(Rule):
+    def __init__(self, left: Path, right: Path):
+        self._left = left
+        self._right = right
+
+    def _match_prefix(self, path: Path) -> Optional[Path]:
+        left_parts = list(reversed(self._left.parts))
+        path_parts = list(reversed(path.parts))
+
+        if len(left_parts) > len(path_parts):
+            return None
+
+        while left_parts and path_parts:
+            left_part = left_parts.pop()
+            path_part = path_parts.pop()
+
+            if left_part != path_part:
+                return None
+
+        if left_parts:
+            return None
+
+        return Path(*path_parts)
+
+    def transform(self, path: Path) -> Optional[Path]:
+        if rest := self._match_prefix(path):
+            return self._right / rest
+
+        return None
+
+
+class ExactRule(Rule):
+    def __init__(self, left: Path, right: Path):
+        self._left = left
+        self._right = right
+
+    def transform(self, path: Path) -> Optional[Path]:
+        if path == self._left:
+            return self._right
+
+        return None
+
+
+class ReRule(Rule):
+    def __init__(self, left: str, right: str):
+        self._left = left
+        self._right = right
+
+    def transform(self, path: Path) -> Optional[Path]:
+        if match := re.fullmatch(self._left, str(path)):
+            kwargs: Dict[str, Union[int, float]] = {}
+
+            groups = [match[0]] + list(match.groups())
+            for i, group in enumerate(groups):
+                try:
+                    kwargs[f"i{i}"] = int(group)
+                except ValueError:
+                    pass
+
+                try:
+                    kwargs[f"f{i}"] = float(group)
+                except ValueError:
+                    pass
+
+            return Path(self._right.format(*groups, **kwargs))
+
+        return None
+
+
+@dataclass
+class RuleParseException(Exception):
+    line: "Line"
+    reason: str
+
+    def pretty_print(self) -> None:
+        print(f"Error parsing rule on line {self.line.line_nr}:")
+        print(self.line.line)
+        spaces = " " * self.line.index
+        print(f"{spaces}^--- {self.reason}")
+
+
+class Line:
+    def __init__(self, line: str, line_nr: int):
+        self._line = line
+        self._line_nr = line_nr
+        self._index = 0
+
+    def get(self) -> Optional[str]:
+        if self._index < len(self._line):
+            return self._line[self._index]
+
+        return None
+
+    @property
+    def line(self) -> str:
+        return self._line
+
+    @property
+    def line_nr(self) -> str:
+        return self._line
+
+    @property
+    def index(self) -> int:
+        return self._index
+
+    @index.setter
+    def index(self, index: int) -> None:
+        self._index = index
+
+    def advance(self) -> None:
+        self._index += 1
+
+    def expect(self, string: str) -> None:
+        for char in string:
+            if self.get() == char:
+                self.advance()
+            else:
+                raise RuleParseException(self, f"Expected {char!r}")
+
+
+QUOTATION_MARKS = {'"', "'"}
+
+
+def parse_string_literal(line: Line) -> str:
+    escaped = False
+    result = []
+
+    quotation_mark = line.get()
+    if quotation_mark not in QUOTATION_MARKS:
+        # This should never happen as long as this function is only called from
+        # parse_string.
+        raise RuleParseException(line, "Invalid quotation mark")
+    line.advance()
+
+    while c := line.get():
+        if escaped:
+            result.append(c)
+            escaped = False
+            line.advance()
+        elif c == quotation_mark:
+            line.advance()
+            return "".join(result)
+        elif c == "\\":
+            escaped = True
+            line.advance()
+        else:
+            result.append(c)
+            line.advance()
+
+    raise RuleParseException(line, "Expected end of string literal")
+
+
+def parse_until_space_or_eol(line: Line) -> str:
+    result = []
+    while c := line.get():
+        if c == " ":
+            break
+        result.append(c)
+        line.advance()
+
+    return "".join(result)
+
+
+def parse_string(line: Line) -> str:
+    if line.get() in QUOTATION_MARKS:
+        return parse_string_literal(line)
+    else:
+        return parse_until_space_or_eol(line)
+
+
+def parse_arrow(line: Line) -> str:
+    line.expect("-")
+
+    name = []
+    while True:
+        if c := line.get():
+            if c == "-":
+                break
+            else:
+                name.append(c)
+            line.advance()
+        else:
+            raise RuleParseException(line, "Expected rest of arrow")
+
+    line.expect("->")
+    return "".join(name)
+
+
+def parse_rule(line: Line) -> Rule:
+    left = parse_string(line)
+    line.expect(" ")
+    arrowindex = line.index
+    arrowname = parse_arrow(line)
+    line.expect(" ")
+    right = parse_string(line)
+
+    if arrowname == "":
+        return NormalRule(Path(left), Path(right))
+    elif arrowname == "exact":
+        return ExactRule(Path(left), Path(right))
+    elif arrowname == "re":
+        return ReRule(left, right)
+    else:
+        line.index = arrowindex + 1  # For nicer error message
+        raise RuleParseException(line, "Invalid arrow name")
+
+
+class Transformer:
+    def __init__(self, rules: str):
+        """
+        May throw a RuleParseException.
+        """
+
+        self._rules = []
+        for i, line in enumerate(rules.split("\n")):
+            line = line.strip()
+            if line:
+                self._rules.append(parse_rule(Line(line, i)))
+
+    def transform(self, path: Path) -> Optional[Path]:
+        for rule in self._rules:
+            if result := rule.transform(path):
+                return result
+
+        return None

From b915e393dd28a3fe317eb552f211e9c0c0738a35 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 29 Apr 2021 10:24:28 +0200
Subject: [PATCH 064/524] Implement limiter

---
 PFERD/limiter.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 PFERD/limiter.py

diff --git a/PFERD/limiter.py b/PFERD/limiter.py
new file mode 100644
index 0000000..f73e2cd
--- /dev/null
+++ b/PFERD/limiter.py
@@ -0,0 +1,19 @@
+import asyncio
+from contextlib import AbstractAsyncContextManager, asynccontextmanager
+from typing import AsyncIterator
+
+
+class Limiter:
+    def __init__(self, limit: int = 10):
+        self._semaphore = asyncio.Semaphore(limit)
+
+    @asynccontextmanager
+    async def _context_manager(self) -> AsyncIterator[None]:
+        await self._semaphore.acquire()
+        try:
+            yield
+        finally:
+            self._semaphore.release()
+
+    def limit(self) -> AbstractAsyncContextManager[None]:
+        return self._context_manager()

From a18db57e6fc0c92cb4dec2734bd393e05d6913d6 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 29 Apr 2021 11:25:00 +0200
Subject: [PATCH 065/524] Implement terminal conductor

---
 PFERD/conductor.py | 83 ++++++++++++++++++++++++++++++++++++++++++++++
 setup.cfg          |  2 ++
 2 files changed, 85 insertions(+)
 create mode 100644 PFERD/conductor.py

diff --git a/PFERD/conductor.py b/PFERD/conductor.py
new file mode 100644
index 0000000..bf41f61
--- /dev/null
+++ b/PFERD/conductor.py
@@ -0,0 +1,83 @@
+import asyncio
+from contextlib import (AbstractAsyncContextManager, AbstractContextManager,
+                        asynccontextmanager, contextmanager)
+from pathlib import Path
+from typing import AsyncIterator, Iterator, List, Optional
+
+import rich
+from rich.markup import escape
+from rich.progress import Progress, TaskID
+
+
+class ProgressBar:
+    def __init__(self, progress: Progress, taskid: TaskID):
+        self._progress = progress
+        self._taskid = taskid
+
+    def advance(self, amount: float = 1) -> None:
+        self._progress.advance(self._taskid, advance=amount)
+
+
+class TerminalConductor:
+    def __init__(self) -> None:
+        self._stopped = False
+        self._lock = asyncio.Lock()
+        self._progress = Progress()
+        self._lines: List[str] = []
+
+    def _start(self) -> None:
+        for line in self._lines:
+            rich.print(line)
+        self._lines = []
+
+        self._progress.start()
+
+    def _stop(self) -> None:
+        self._progress.stop()
+        self._stopped = True
+
+    async def start(self) -> None:
+        with self._lock:
+            self._start()
+
+    async def stop(self) -> None:
+        with self._lock:
+            self._stop()
+
+    def print(self, line: str) -> None:
+        if self._stopped:
+            self._lines.append(line)
+        else:
+            rich.print(line)
+
+    @asynccontextmanager
+    async def _exclusive_output_cm(self) -> AsyncIterator[None]:
+        async with self._lock:
+            self.stop()
+            try:
+                yield
+            finally:
+                self.start()
+
+    def exclusive_output(self) -> AbstractAsyncContextManager[None]:
+        return self._exclusive_output_cm()
+
+    @contextmanager
+    def _progress_bar_cm(
+            self,
+            description: str,
+            steps: Optional[float],
+    ) -> Iterator[ProgressBar]:
+        taskid = self._progress.add_task(description, steps=steps)
+        bar = ProgressBar(self._progress, taskid)
+        try:
+            yield bar
+        finally:
+            self._progress.remove_task(taskid)
+
+    def progress_bar(
+            self,
+            description: Path,
+            steps: Optional[float],
+    ) -> AbstractContextManager[ProgressBar]:
+        return self._progress_bar_cm(escape(str(description)), steps=steps)
diff --git a/setup.cfg b/setup.cfg
index db60477..1c6e764 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -5,6 +5,8 @@ version = 3.0.0
 [options]
 packages = PFERD
 python_requires = >=3.8
+install_requires =
+  rich>=10.1.0
 
 [options.entry_points]
 console_scripts =

From 8da1ac6ceee7b37c0654193519d4ae780f4e9d72 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 29 Apr 2021 11:25:13 +0200
Subject: [PATCH 066/524] Extend mypy config

---
 mypy.ini | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/mypy.ini b/mypy.ini
index 60306de..14509d6 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -1,7 +1,11 @@
 [mypy]
+disallow_any_generics = True
 disallow_untyped_defs = True
 disallow_incomplete_defs = True
 no_implicit_optional = True
+warn_unused_ignores = True
+warn_unreachable = True
+show_error_context = True
 
 [mypy-rich.*,bs4,keyring]
 ignore_missing_imports = True

From c4fb92c6585f90b6a99a884c8a4859ce46d7f884 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 29 Apr 2021 13:11:58 +0200
Subject: [PATCH 067/524] Make type hints compatible with Python 3.8

---
 PFERD/conductor.py | 11 ++++++-----
 PFERD/limiter.py   |  7 ++++---
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/PFERD/conductor.py b/PFERD/conductor.py
index bf41f61..86df7e4 100644
--- a/PFERD/conductor.py
+++ b/PFERD/conductor.py
@@ -1,8 +1,9 @@
 import asyncio
-from contextlib import (AbstractAsyncContextManager, AbstractContextManager,
-                        asynccontextmanager, contextmanager)
+from contextlib import asynccontextmanager, contextmanager
 from pathlib import Path
-from typing import AsyncIterator, Iterator, List, Optional
+# TODO If we upgrade to python 3.9, these context manager hints are deprecated
+from typing import (AsyncContextManager, AsyncIterator, ContextManager,
+                    Iterator, List, Optional)
 
 import rich
 from rich.markup import escape
@@ -59,7 +60,7 @@ class TerminalConductor:
             finally:
                 self.start()
 
-    def exclusive_output(self) -> AbstractAsyncContextManager[None]:
+    def exclusive_output(self) -> AsyncContextManager[None]:
         return self._exclusive_output_cm()
 
     @contextmanager
@@ -79,5 +80,5 @@ class TerminalConductor:
             self,
             description: Path,
             steps: Optional[float],
-    ) -> AbstractContextManager[ProgressBar]:
+    ) -> ContextManager[ProgressBar]:
         return self._progress_bar_cm(escape(str(description)), steps=steps)
diff --git a/PFERD/limiter.py b/PFERD/limiter.py
index f73e2cd..f001d8b 100644
--- a/PFERD/limiter.py
+++ b/PFERD/limiter.py
@@ -1,6 +1,7 @@
 import asyncio
-from contextlib import AbstractAsyncContextManager, asynccontextmanager
-from typing import AsyncIterator
+from contextlib import asynccontextmanager
+# TODO If we upgrade to python 3.9, this context manager hint is deprecated
+from typing import AsyncContextManager, AsyncIterator
 
 
 class Limiter:
@@ -15,5 +16,5 @@ class Limiter:
         finally:
             self._semaphore.release()
 
-    def limit(self) -> AbstractAsyncContextManager[None]:
+    def limit(self) -> AsyncContextManager[None]:
         return self._context_manager()

From 7e127cd5cce37b6c0f6cd8b0139ae9d13cb69e07 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 29 Apr 2021 13:43:50 +0200
Subject: [PATCH 068/524] Clean up and fix conductor and limiter

Turns out you have to await an async lock, who knew...
---
 PFERD/conductor.py | 33 ++++++++++++---------------------
 PFERD/limiter.py   |  8 ++------
 2 files changed, 14 insertions(+), 27 deletions(-)

diff --git a/PFERD/conductor.py b/PFERD/conductor.py
index 86df7e4..fef5a0e 100644
--- a/PFERD/conductor.py
+++ b/PFERD/conductor.py
@@ -1,12 +1,8 @@
 import asyncio
 from contextlib import asynccontextmanager, contextmanager
-from pathlib import Path
-# TODO If we upgrade to python 3.9, these context manager hints are deprecated
-from typing import (AsyncContextManager, AsyncIterator, ContextManager,
-                    Iterator, List, Optional)
+from typing import AsyncIterator, Iterator, List, Optional
 
 import rich
-from rich.markup import escape
 from rich.progress import Progress, TaskID
 
 
@@ -38,11 +34,11 @@ class TerminalConductor:
         self._stopped = True
 
     async def start(self) -> None:
-        with self._lock:
+        async with self._lock:
             self._start()
 
     async def stop(self) -> None:
-        with self._lock:
+        async with self._lock:
             self._stop()
 
     def print(self, line: str) -> None:
@@ -52,7 +48,7 @@ class TerminalConductor:
             rich.print(line)
 
     @asynccontextmanager
-    async def _exclusive_output_cm(self) -> AsyncIterator[None]:
+    async def exclusive_output(self) -> AsyncIterator[None]:
         async with self._lock:
             self.stop()
             try:
@@ -60,25 +56,20 @@ class TerminalConductor:
             finally:
                 self.start()
 
-    def exclusive_output(self) -> AsyncContextManager[None]:
-        return self._exclusive_output_cm()
-
     @contextmanager
-    def _progress_bar_cm(
+    def progress_bar(
             self,
             description: str,
-            steps: Optional[float],
+            total: Optional[float] = None,
     ) -> Iterator[ProgressBar]:
-        taskid = self._progress.add_task(description, steps=steps)
+        if total is None:
+            # Indeterminate progress bar
+            taskid = self._progress.add_task(description, start=False)
+        else:
+            taskid = self._progress.add_task(description, total=total)
+
         bar = ProgressBar(self._progress, taskid)
         try:
             yield bar
         finally:
             self._progress.remove_task(taskid)
-
-    def progress_bar(
-            self,
-            description: Path,
-            steps: Optional[float],
-    ) -> ContextManager[ProgressBar]:
-        return self._progress_bar_cm(escape(str(description)), steps=steps)
diff --git a/PFERD/limiter.py b/PFERD/limiter.py
index f001d8b..ff91d57 100644
--- a/PFERD/limiter.py
+++ b/PFERD/limiter.py
@@ -1,7 +1,6 @@
 import asyncio
 from contextlib import asynccontextmanager
-# TODO If we upgrade to python 3.9, this context manager hint is deprecated
-from typing import AsyncContextManager, AsyncIterator
+from typing import AsyncIterator
 
 
 class Limiter:
@@ -9,12 +8,9 @@ class Limiter:
         self._semaphore = asyncio.Semaphore(limit)
 
     @asynccontextmanager
-    async def _context_manager(self) -> AsyncIterator[None]:
+    async def limit(self) -> AsyncIterator[None]:
         await self._semaphore.acquire()
         try:
             yield
         finally:
             self._semaphore.release()
-
-    def limit(self) -> AsyncContextManager[None]:
-        return self._context_manager()

From bbc792f9fb7de4459da1fdaa55f24ea292333981 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 29 Apr 2021 13:44:29 +0200
Subject: [PATCH 069/524] Implement Crawler and DummyCrawler

---
 PFERD/crawler.py           | 60 ++++++++++++++++++++++++++++++++++++++
 PFERD/crawlers/__init__.py |  5 ++++
 PFERD/crawlers/dummy.py    | 53 +++++++++++++++++++++++++++++++++
 3 files changed, 118 insertions(+)
 create mode 100644 PFERD/crawler.py
 create mode 100644 PFERD/crawlers/__init__.py
 create mode 100644 PFERD/crawlers/dummy.py

diff --git a/PFERD/crawler.py b/PFERD/crawler.py
new file mode 100644
index 0000000..9f1c7d9
--- /dev/null
+++ b/PFERD/crawler.py
@@ -0,0 +1,60 @@
+import configparser
+from abc import ABC, abstractmethod
+from contextlib import asynccontextmanager
+from pathlib import Path
+from typing import AsyncIterator, Optional
+
+from rich.markup import escape
+
+from .conductor import ProgressBar, TerminalConductor
+from .limiter import Limiter
+from .transformer import RuleParseException, Transformer
+
+
+class CrawlerLoadException(Exception):
+    pass
+
+
+class Crawler(ABC):
+    def __init__(self, name: str, section: configparser.SectionProxy) -> None:
+        """
+        May throw a CrawlerLoadException.
+        """
+
+        self.name = name
+
+        self._conductor = TerminalConductor()
+        self._limiter = Limiter()
+
+        try:
+            self._transformer = Transformer(section.get("transform", ""))
+        except RuleParseException as e:
+            e.pretty_print()
+            raise CrawlerLoadException()
+
+        # output_dir = Path(section.get("output_dir", name))
+
+    def print(self, text: str) -> None:
+        self._conductor.print(text)
+
+    @asynccontextmanager
+    async def progress_bar(
+            self,
+            path: Path,
+            total: Optional[int] = None,
+    ) -> AsyncIterator[ProgressBar]:
+        desc = escape(str(path))
+        async with self._limiter.limit():
+            with self._conductor.progress_bar(desc, total=total) as bar:
+                yield bar
+
+    async def run(self) -> None:
+        await self._conductor.start()
+        try:
+            await self.crawl()
+        finally:
+            await self._conductor.stop()
+
+    @abstractmethod
+    async def crawl(self) -> None:
+        pass
diff --git a/PFERD/crawlers/__init__.py b/PFERD/crawlers/__init__.py
new file mode 100644
index 0000000..5248a2d
--- /dev/null
+++ b/PFERD/crawlers/__init__.py
@@ -0,0 +1,5 @@
+from .dummy import DummyCrawler
+
+CRAWLERS = {
+    "dummy": DummyCrawler,
+}
diff --git a/PFERD/crawlers/dummy.py b/PFERD/crawlers/dummy.py
new file mode 100644
index 0000000..b4d787a
--- /dev/null
+++ b/PFERD/crawlers/dummy.py
@@ -0,0 +1,53 @@
+import asyncio
+import random
+from pathlib import Path
+from typing import Any
+
+from rich.markup import escape
+
+from ..crawler import Crawler
+
+DUMMY_TREE = {
+    "Blätter": {
+        "Blatt_01.pdf": (),
+        "Blatt_02.pdf": (),
+        "Blatt_03.pdf": (),
+        "Blatt_04.pdf": (),
+        "Blatt_05.pdf": (),
+        "Blatt_01_Lösung.pdf": (),
+        "Blatt_02_Lösung.pdf": (),
+        "Blatt_03_Lösung.pdf": (),
+        "Blatt_04_Lösung.pdf": (),
+        "Blatt_05_Lösung.pdf": (),
+    },
+    "Vorlesungsfolien": {
+        "VL_01.pdf": (),
+        "VL_02.pdf": (),
+        "VL_03.pdf": (),
+        "VL_04.pdf": (),
+        "VL_05.pdf": (),
+    },
+    "noch_mehr.txt": (),
+    "dateien.jar": (),
+}
+
+
+class DummyCrawler(Crawler):
+    async def crawl(self) -> None:
+        await self._crawl_entry(Path(), DUMMY_TREE)
+
+    async def _crawl_entry(self, path: Path, value: Any) -> None:
+        if value == ():
+            n = random.randint(5, 20)
+            async with self.progress_bar(path, n) as bar:
+                await asyncio.sleep(random.random() / 2)
+                for i in range(n):
+                    await asyncio.sleep(0.5)
+                    bar.advance()
+            self.print(f"[green]Downloaded {escape(str(path))}")
+        else:
+            t = random.random() * 2 + 1
+            async with self.progress_bar(path) as bar:
+                await asyncio.sleep(t)
+            tasks = [self._crawl_entry(path / k, v) for k, v in value.items()]
+            await asyncio.gather(*tasks)

From 3ea86d18a0be532e53f62c54425c9bc9814b0ead Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 29 Apr 2021 13:45:04 +0200
Subject: [PATCH 070/524] Jerry-rig DummyCrawler to run

---
 PFERD/__init__.py |   5 +-
 PFERD/pferd.py    | 446 +---------------------------------------------
 2 files changed, 13 insertions(+), 438 deletions(-)

diff --git a/PFERD/__init__.py b/PFERD/__init__.py
index 7b3a3c1..a16b19b 100644
--- a/PFERD/__init__.py
+++ b/PFERD/__init__.py
@@ -1,7 +1,9 @@
 import argparse
+import asyncio
 from pathlib import Path
 
 from .config import Config, ConfigDumpException, ConfigLoadException
+from .pferd import Pferd
 
 
 def main() -> None:
@@ -37,4 +39,5 @@ def main() -> None:
             exit(1)
         exit()
 
-    print(config)
+    pferd = Pferd(config)
+    asyncio.run(pferd.run())
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index 1bb6f78..d145ade 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -1,440 +1,12 @@
-"""
-Convenience functions for using PFERD.
-"""
+from .config import Config
+from .crawlers import CRAWLERS
 
-import logging
-from pathlib import Path
-from typing import Callable, List, Optional, Union
 
-from .authenticators import UserPassAuthenticator
-from .cookie_jar import CookieJar
-from .diva import (DivaDownloader, DivaDownloadStrategy, DivaPlaylistCrawler,
-                   diva_download_new)
-from .download_summary import DownloadSummary
-from .errors import FatalException, swallow_and_print_errors
-from .ilias import (IliasAuthenticator, IliasCrawler, IliasDirectoryFilter,
-                    IliasDownloader, IliasDownloadInfo, IliasDownloadStrategy,
-                    KitShibbolethAuthenticator, download_modified_or_new)
-from .ipd import (IpdCrawler, IpdDownloader, IpdDownloadInfo,
-                  IpdDownloadStrategy, ipd_download_new_or_modified)
-from .location import Location
-from .logging import PrettyLogger, enable_logging
-from .organizer import FileConflictResolver, Organizer, resolve_prompt_user
-from .tmp_dir import TmpDir
-from .transform import TF, Transform, apply_transform
-from .utils import PathLike, to_path
+class Pferd:
+    def __init__(self, config: Config):
+        self._config = config
 
-# TODO save known-good cookies as soon as possible
-
-
-LOGGER = logging.getLogger(__name__)
-PRETTY = PrettyLogger(LOGGER)
-
-
-class Pferd(Location):
-    # pylint: disable=too-many-arguments
-    """
-    The main entrypoint in your Pferd usage: This class combines a number of
-    useful shortcuts for running synchronizers in a single interface.
-    """
-
-    def __init__(
-            self,
-            base_dir: Path,
-            tmp_dir: Path = Path(".tmp"),
-            test_run: bool = False
-    ):
-        super().__init__(Path(base_dir))
-
-        self._download_summary = DownloadSummary()
-        self._tmp_dir = TmpDir(self.resolve(tmp_dir))
-        self._test_run = test_run
-
-    @staticmethod
-    def enable_logging() -> None:
-        """
-        Enable and configure logging via the logging module.
-        """
-
-        enable_logging()
-
-    @staticmethod
-    def _print_transformables(transformables: List[TF]) -> None:
-        LOGGER.info("")
-        LOGGER.info("Results of the test run:")
-        for transformable in transformables:
-            LOGGER.info(transformable.path)
-
-    @staticmethod
-    def _get_authenticator(
-            username: Optional[str], password: Optional[str]
-    ) -> KitShibbolethAuthenticator:
-        inner_auth = UserPassAuthenticator("ILIAS - Pferd.py", username, password)
-        return KitShibbolethAuthenticator(inner_auth)
-
-    def _ilias(
-            self,
-            target: PathLike,
-            base_url: str,
-            crawl_function: Callable[[IliasCrawler], List[IliasDownloadInfo]],
-            authenticator: IliasAuthenticator,
-            cookies: Optional[PathLike],
-            dir_filter: IliasDirectoryFilter,
-            transform: Transform,
-            download_strategy: IliasDownloadStrategy,
-            timeout: int,
-            clean: bool = True,
-            file_conflict_resolver: FileConflictResolver = resolve_prompt_user
-    ) -> Organizer:
-        # pylint: disable=too-many-locals
-        cookie_jar = CookieJar(to_path(cookies) if cookies else None)
-        session = cookie_jar.create_session()
-        tmp_dir = self._tmp_dir.new_subdir()
-        organizer = Organizer(self.resolve(to_path(target)), file_conflict_resolver)
-
-        crawler = IliasCrawler(base_url, session, authenticator, dir_filter)
-        downloader = IliasDownloader(tmp_dir, organizer, session,
-                                     authenticator, download_strategy, timeout)
-
-        cookie_jar.load_cookies()
-        info = crawl_function(crawler)
-        cookie_jar.save_cookies()
-
-        transformed = apply_transform(transform, info)
-        if self._test_run:
-            self._print_transformables(transformed)
-            return organizer
-
-        downloader.download_all(transformed)
-        cookie_jar.save_cookies()
-
-        if clean:
-            organizer.cleanup()
-
-        return organizer
-
-    @swallow_and_print_errors
-    def ilias_kit(
-            self,
-            target: PathLike,
-            course_id: str,
-            dir_filter: IliasDirectoryFilter = lambda x, y: True,
-            transform: Transform = lambda x: x,
-            cookies: Optional[PathLike] = None,
-            username: Optional[str] = None,
-            password: Optional[str] = None,
-            download_strategy: IliasDownloadStrategy = download_modified_or_new,
-            clean: bool = True,
-            timeout: int = 5,
-            file_conflict_resolver: FileConflictResolver = resolve_prompt_user
-    ) -> Organizer:
-        """
-        Synchronizes a folder with the ILIAS instance of the KIT.
-
-        Arguments:
-            target {Path} -- the target path to write the data to
-            course_id {str} -- the id of the main course page (found in the URL after ref_id
-                when opening the course homepage)
-
-        Keyword Arguments:
-            dir_filter {IliasDirectoryFilter} -- A filter for directories. Will be applied on the
-                crawler level, these directories and all of their content is skipped.
-                (default: {lambdax:True})
-            transform {Transform} -- A transformation function for the output paths. Return None
-                to ignore a file. (default: {lambdax:x})
-            cookies {Optional[Path]} -- The path to store and load cookies from.
-                (default: {None})
-            username {Optional[str]} -- The SCC username. If none is given, it will prompt
-                the user. (default: {None})
-            password {Optional[str]} -- The SCC password. If none is given, it will prompt
-                the user. (default: {None})
-            download_strategy {DownloadStrategy} -- A function to determine which files need to
-                be downloaded. Can save bandwidth and reduce the number of requests.
-                (default: {download_modified_or_new})
-            clean {bool} -- Whether to clean up when the method finishes.
-            timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
-                requests bug.
-            file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
-                with overwriting or deleting files. The default always asks the user.
-        """
-        # This authenticator only works with the KIT ilias instance.
-        authenticator = Pferd._get_authenticator(username=username, password=password)
-        PRETTY.starting_synchronizer(target, "ILIAS", course_id)
-
-        organizer = self._ilias(
-            target=target,
-            base_url="https://ilias.studium.kit.edu/",
-            crawl_function=lambda crawler: crawler.crawl_course(course_id),
-            authenticator=authenticator,
-            cookies=cookies,
-            dir_filter=dir_filter,
-            transform=transform,
-            download_strategy=download_strategy,
-            clean=clean,
-            timeout=timeout,
-            file_conflict_resolver=file_conflict_resolver
-        )
-
-        self._download_summary.merge(organizer.download_summary)
-
-        return organizer
-
-    def print_summary(self) -> None:
-        """
-        Prints the accumulated download summary.
-        """
-        PRETTY.summary(self._download_summary)
-
-    @swallow_and_print_errors
-    def ilias_kit_personal_desktop(
-            self,
-            target: PathLike,
-            dir_filter: IliasDirectoryFilter = lambda x, y: True,
-            transform: Transform = lambda x: x,
-            cookies: Optional[PathLike] = None,
-            username: Optional[str] = None,
-            password: Optional[str] = None,
-            download_strategy: IliasDownloadStrategy = download_modified_or_new,
-            clean: bool = True,
-            timeout: int = 5,
-            file_conflict_resolver: FileConflictResolver = resolve_prompt_user
-    ) -> Organizer:
-        """
-        Synchronizes a folder with the ILIAS instance of the KIT. This method will crawl the ILIAS
-        "personal desktop" instead of a single course.
-
-        Arguments:
-            target {Path} -- the target path to write the data to
-
-        Keyword Arguments:
-            dir_filter {IliasDirectoryFilter} -- A filter for directories. Will be applied on the
-                crawler level, these directories and all of their content is skipped.
-                (default: {lambdax:True})
-            transform {Transform} -- A transformation function for the output paths. Return None
-                to ignore a file. (default: {lambdax:x})
-            cookies {Optional[Path]} -- The path to store and load cookies from.
-                (default: {None})
-            username {Optional[str]} -- The SCC username. If none is given, it will prompt
-                the user. (default: {None})
-            password {Optional[str]} -- The SCC password. If none is given, it will prompt
-                the user. (default: {None})
-            download_strategy {DownloadStrategy} -- A function to determine which files need to
-                be downloaded. Can save bandwidth and reduce the number of requests.
-                (default: {download_modified_or_new})
-            clean {bool} -- Whether to clean up when the method finishes.
-            timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
-                requests bug.
-            file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
-                with overwriting or deleting files. The default always asks the user.
-        """
-        # This authenticator only works with the KIT ilias instance.
-        authenticator = Pferd._get_authenticator(username, password)
-        PRETTY.starting_synchronizer(target, "ILIAS", "Personal Desktop")
-
-        organizer = self._ilias(
-            target=target,
-            base_url="https://ilias.studium.kit.edu/",
-            crawl_function=lambda crawler: crawler.crawl_personal_desktop(),
-            authenticator=authenticator,
-            cookies=cookies,
-            dir_filter=dir_filter,
-            transform=transform,
-            download_strategy=download_strategy,
-            clean=clean,
-            timeout=timeout,
-            file_conflict_resolver=file_conflict_resolver
-        )
-
-        self._download_summary.merge(organizer.download_summary)
-
-        return organizer
-
-    @swallow_and_print_errors
-    def ilias_kit_folder(
-            self,
-            target: PathLike,
-            full_url: str,
-            dir_filter: IliasDirectoryFilter = lambda x, y: True,
-            transform: Transform = lambda x: x,
-            cookies: Optional[PathLike] = None,
-            username: Optional[str] = None,
-            password: Optional[str] = None,
-            download_strategy: IliasDownloadStrategy = download_modified_or_new,
-            clean: bool = True,
-            timeout: int = 5,
-            file_conflict_resolver: FileConflictResolver = resolve_prompt_user
-    ) -> Organizer:
-        """
-        Synchronizes a folder with a given folder on the ILIAS instance of the KIT.
-
-        Arguments:
-            target {Path}  -- the target path to write the data to
-            full_url {str} -- the full url of the folder/videos/course to crawl
-
-        Keyword Arguments:
-            dir_filter {IliasDirectoryFilter} -- A filter for directories. Will be applied on the
-                crawler level, these directories and all of their content is skipped.
-                (default: {lambdax:True})
-            transform {Transform} -- A transformation function for the output paths. Return None
-                to ignore a file. (default: {lambdax:x})
-            cookies {Optional[Path]} -- The path to store and load cookies from.
-                (default: {None})
-            username {Optional[str]} -- The SCC username. If none is given, it will prompt
-                the user. (default: {None})
-            password {Optional[str]} -- The SCC password. If none is given, it will prompt
-                the user. (default: {None})
-            download_strategy {DownloadStrategy} -- A function to determine which files need to
-                be downloaded. Can save bandwidth and reduce the number of requests.
-                (default: {download_modified_or_new})
-            clean {bool} -- Whether to clean up when the method finishes.
-            timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
-                requests bug.
-            file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
-                with overwriting or deleting files. The default always asks the user.
-        """
-        # This authenticator only works with the KIT ilias instance.
-        authenticator = Pferd._get_authenticator(username=username, password=password)
-        PRETTY.starting_synchronizer(target, "ILIAS", "An ILIAS element by url")
-
-        if not full_url.startswith("https://ilias.studium.kit.edu"):
-            raise FatalException("Not a valid KIT ILIAS URL")
-
-        organizer = self._ilias(
-            target=target,
-            base_url="https://ilias.studium.kit.edu/",
-            crawl_function=lambda crawler: crawler.recursive_crawl_url(full_url),
-            authenticator=authenticator,
-            cookies=cookies,
-            dir_filter=dir_filter,
-            transform=transform,
-            download_strategy=download_strategy,
-            clean=clean,
-            timeout=timeout,
-            file_conflict_resolver=file_conflict_resolver
-        )
-
-        self._download_summary.merge(organizer.download_summary)
-
-        return organizer
-
-    @swallow_and_print_errors
-    def ipd_kit(
-            self,
-            target: Union[PathLike, Organizer],
-            url: str,
-            transform: Transform = lambda x: x,
-            download_strategy: IpdDownloadStrategy = ipd_download_new_or_modified,
-            clean: bool = True,
-            file_conflict_resolver: FileConflictResolver = resolve_prompt_user
-    ) -> Organizer:
-        """
-        Synchronizes a folder with a DIVA playlist.
-
-        Arguments:
-            target {Union[PathLike, Organizer]} -- The organizer / target folder to use.
-            url {str} -- the url to the page
-
-        Keyword Arguments:
-            transform {Transform} -- A transformation function for the output paths. Return None
-                to ignore a file. (default: {lambdax:x})
-            download_strategy {DivaDownloadStrategy} -- A function to determine which files need to
-                be downloaded. Can save bandwidth and reduce the number of requests.
-                (default: {diva_download_new})
-            clean {bool} -- Whether to clean up when the method finishes.
-            file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
-                with overwriting or deleting files. The default always asks the user.
-        """
-        tmp_dir = self._tmp_dir.new_subdir()
-
-        if target is None:
-            PRETTY.starting_synchronizer("None", "IPD", url)
-            raise FatalException("Got 'None' as target directory, aborting")
-
-        if isinstance(target, Organizer):
-            organizer = target
-        else:
-            organizer = Organizer(self.resolve(to_path(target)), file_conflict_resolver)
-
-        PRETTY.starting_synchronizer(organizer.path, "IPD", url)
-
-        elements: List[IpdDownloadInfo] = IpdCrawler(url).crawl()
-        transformed = apply_transform(transform, elements)
-
-        if self._test_run:
-            self._print_transformables(transformed)
-            return organizer
-
-        downloader = IpdDownloader(tmp_dir=tmp_dir, organizer=organizer, strategy=download_strategy)
-        downloader.download_all(transformed)
-
-        if clean:
-            organizer.cleanup()
-
-        self._download_summary.merge(organizer.download_summary)
-
-        return organizer
-
-    @swallow_and_print_errors
-    def diva_kit(
-            self,
-            target: Union[PathLike, Organizer],
-            playlist_location: str,
-            transform: Transform = lambda x: x,
-            download_strategy: DivaDownloadStrategy = diva_download_new,
-            clean: bool = True,
-            file_conflict_resolver: FileConflictResolver = resolve_prompt_user
-    ) -> Organizer:
-        """
-        Synchronizes a folder with a DIVA playlist.
-
-        Arguments:
-            organizer {Organizer} -- The organizer to use.
-            playlist_location {str} -- the playlist id or the playlist URL
-              in the format 'https://mediaservice.bibliothek.kit.edu/#/details/DIVA-2019-271'
-
-        Keyword Arguments:
-            transform {Transform} -- A transformation function for the output paths. Return None
-                to ignore a file. (default: {lambdax:x})
-            download_strategy {DivaDownloadStrategy} -- A function to determine which files need to
-                be downloaded. Can save bandwidth and reduce the number of requests.
-                (default: {diva_download_new})
-            clean {bool} -- Whether to clean up when the method finishes.
-            file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
-                with overwriting or deleting files. The default always asks the user.
-        """
-        tmp_dir = self._tmp_dir.new_subdir()
-
-        if playlist_location.startswith("http"):
-            playlist_id = DivaPlaylistCrawler.fetch_id(playlist_link=playlist_location)
-        else:
-            playlist_id = playlist_location
-
-        if target is None:
-            PRETTY.starting_synchronizer("None", "DIVA", playlist_id)
-            raise FatalException("Got 'None' as target directory, aborting")
-
-        if isinstance(target, Organizer):
-            organizer = target
-        else:
-            organizer = Organizer(self.resolve(to_path(target)), file_conflict_resolver)
-
-        PRETTY.starting_synchronizer(organizer.path, "DIVA", playlist_id)
-
-        crawler = DivaPlaylistCrawler(playlist_id)
-        downloader = DivaDownloader(tmp_dir, organizer, download_strategy)
-
-        info = crawler.crawl()
-
-        transformed = apply_transform(transform, info)
-        if self._test_run:
-            self._print_transformables(transformed)
-            return organizer
-
-        downloader.download_all(transformed)
-
-        if clean:
-            organizer.cleanup()
-
-        self._download_summary.merge(organizer.download_summary)
-
-        return organizer
+    async def run(self) -> None:
+        print("Bleep bloop 1")
+        await CRAWLERS["dummy"]("dummy", self._config._parser["dummy"]).run()
+        print("Bleep bloop 2")

From ac3bfd7388af31c8feab2c3835a382f23c498034 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 29 Apr 2021 13:53:16 +0200
Subject: [PATCH 071/524] Make progress bars easier to use

The crawler now supports two types of progress bars
---
 PFERD/crawler.py        | 20 +++++++++++++++++---
 PFERD/crawlers/dummy.py | 16 +++++++++-------
 2 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index 9f1c7d9..0092744 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -2,7 +2,8 @@ import configparser
 from abc import ABC, abstractmethod
 from contextlib import asynccontextmanager
 from pathlib import Path
-from typing import AsyncIterator, Optional
+# TODO In Python 3.9 and above, AsyncContextManager is deprecated
+from typing import AsyncContextManager, AsyncIterator, Optional
 
 from rich.markup import escape
 
@@ -40,14 +41,27 @@ class Crawler(ABC):
     @asynccontextmanager
     async def progress_bar(
             self,
-            path: Path,
+            desc: str,
             total: Optional[int] = None,
     ) -> AsyncIterator[ProgressBar]:
-        desc = escape(str(path))
         async with self._limiter.limit():
             with self._conductor.progress_bar(desc, total=total) as bar:
                 yield bar
 
+    def crawl_bar(self, path: Path) -> AsyncContextManager[ProgressBar]:
+        path = escape(str(path))
+        desc = f"[bold magenta]Crawling[/bold magenta] {path}"
+        return self.progress_bar(desc)
+
+    def download_bar(
+            self,
+            path: Path,
+            size: int,
+    ) -> AsyncContextManager[ProgressBar]:
+        path = escape(str(path))
+        desc = f"[bold green]Downloading[/bold green] {path}"
+        return self.progress_bar(desc, total=size)
+
     async def run(self) -> None:
         await self._conductor.start()
         try:
diff --git a/PFERD/crawlers/dummy.py b/PFERD/crawlers/dummy.py
index b4d787a..a88216b 100644
--- a/PFERD/crawlers/dummy.py
+++ b/PFERD/crawlers/dummy.py
@@ -14,11 +14,13 @@ DUMMY_TREE = {
         "Blatt_03.pdf": (),
         "Blatt_04.pdf": (),
         "Blatt_05.pdf": (),
-        "Blatt_01_Lösung.pdf": (),
-        "Blatt_02_Lösung.pdf": (),
-        "Blatt_03_Lösung.pdf": (),
-        "Blatt_04_Lösung.pdf": (),
-        "Blatt_05_Lösung.pdf": (),
+        "Lösungen": {
+            "Blatt_01_Lösung.pdf": (),
+            "Blatt_02_Lösung.pdf": (),
+            "Blatt_03_Lösung.pdf": (),
+            "Blatt_04_Lösung.pdf": (),
+            "Blatt_05_Lösung.pdf": (),
+        },
     },
     "Vorlesungsfolien": {
         "VL_01.pdf": (),
@@ -39,7 +41,7 @@ class DummyCrawler(Crawler):
     async def _crawl_entry(self, path: Path, value: Any) -> None:
         if value == ():
             n = random.randint(5, 20)
-            async with self.progress_bar(path, n) as bar:
+            async with self.download_bar(path, n) as bar:
                 await asyncio.sleep(random.random() / 2)
                 for i in range(n):
                     await asyncio.sleep(0.5)
@@ -47,7 +49,7 @@ class DummyCrawler(Crawler):
             self.print(f"[green]Downloaded {escape(str(path))}")
         else:
             t = random.random() * 2 + 1
-            async with self.progress_bar(path) as bar:
+            async with self.crawl_bar(path) as bar:
                 await asyncio.sleep(t)
             tasks = [self._crawl_entry(path / k, v) for k, v in value.items()]
             await asyncio.gather(*tasks)

From 6431a3fb3db070ddbe5a1ce286e9b375f72b82ad Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 29 Apr 2021 14:23:09 +0200
Subject: [PATCH 072/524] Fix some mypy errors

---
 PFERD/crawler.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index 0092744..31aab5b 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -49,8 +49,8 @@ class Crawler(ABC):
                 yield bar
 
     def crawl_bar(self, path: Path) -> AsyncContextManager[ProgressBar]:
-        path = escape(str(path))
-        desc = f"[bold magenta]Crawling[/bold magenta] {path}"
+        pathstr = escape(str(path))
+        desc = f"[bold magenta]Crawling[/bold magenta] {pathstr}"
         return self.progress_bar(desc)
 
     def download_bar(
@@ -58,8 +58,8 @@ class Crawler(ABC):
             path: Path,
             size: int,
     ) -> AsyncContextManager[ProgressBar]:
-        path = escape(str(path))
-        desc = f"[bold green]Downloading[/bold green] {path}"
+        pathstr = escape(str(path))
+        desc = f"[bold green]Downloading[/bold green] {pathstr}"
         return self.progress_bar(desc, total=size)
 
     async def run(self) -> None:

From 2e85d26b6bbb6a392e4123080f3cb9f74a40f0d7 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 29 Apr 2021 14:23:28 +0200
Subject: [PATCH 073/524] Use conductor via context manager

---
 PFERD/conductor.py | 43 +++++++++++++++++++++++++------------------
 PFERD/crawler.py   |  5 +----
 2 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/PFERD/conductor.py b/PFERD/conductor.py
index fef5a0e..121ed9a 100644
--- a/PFERD/conductor.py
+++ b/PFERD/conductor.py
@@ -1,6 +1,7 @@
 import asyncio
 from contextlib import asynccontextmanager, contextmanager
-from typing import AsyncIterator, Iterator, List, Optional
+from types import TracebackType
+from typing import AsyncIterator, Iterator, List, Optional, Type
 
 import rich
 from rich.progress import Progress, TaskID
@@ -22,24 +23,30 @@ class TerminalConductor:
         self._progress = Progress()
         self._lines: List[str] = []
 
-    def _start(self) -> None:
-        for line in self._lines:
-            rich.print(line)
-        self._lines = []
-
-        self._progress.start()
-
-    def _stop(self) -> None:
-        self._progress.stop()
-        self._stopped = True
-
-    async def start(self) -> None:
+    async def _start(self) -> None:
         async with self._lock:
-            self._start()
+            for line in self._lines:
+                rich.print(line)
+            self._lines = []
 
-    async def stop(self) -> None:
+            self._progress.start()
+
+    async def _stop(self) -> None:
         async with self._lock:
-            self._stop()
+            self._progress.stop()
+            self._stopped = True
+
+    async def __aenter__(self) -> None:
+        await self._start()
+
+    async def __aexit__(
+            self,
+            exc_type: Optional[Type[BaseException]],
+            exc_value: Optional[BaseException],
+            traceback: Optional[TracebackType],
+    ) -> Optional[bool]:
+        await self._stop()
+        return None
 
     def print(self, line: str) -> None:
         if self._stopped:
@@ -50,11 +57,11 @@ class TerminalConductor:
     @asynccontextmanager
     async def exclusive_output(self) -> AsyncIterator[None]:
         async with self._lock:
-            self.stop()
+            self._stop()
             try:
                 yield
             finally:
-                self.start()
+                self._start()
 
     @contextmanager
     def progress_bar(
diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index 31aab5b..093ba91 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -63,11 +63,8 @@ class Crawler(ABC):
         return self.progress_bar(desc, total=size)
 
     async def run(self) -> None:
-        await self._conductor.start()
-        try:
+        async with self._conductor:
             await self.crawl()
-        finally:
-            await self._conductor.stop()
 
     @abstractmethod
     async def crawl(self) -> None:

From d96a361325ed1c16bbfd9af725484a6470d5de49 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 29 Apr 2021 15:26:10 +0200
Subject: [PATCH 074/524] Test and fix exclusive output

---
 PFERD/conductor.py      | 34 ++++++++++++++++++++--------------
 PFERD/crawler.py        |  3 +++
 PFERD/crawlers/dummy.py |  8 ++++++--
 PFERD/utils.py          | 29 ++++++++++++++++++++++++++---
 4 files changed, 55 insertions(+), 19 deletions(-)

diff --git a/PFERD/conductor.py b/PFERD/conductor.py
index 121ed9a..161a287 100644
--- a/PFERD/conductor.py
+++ b/PFERD/conductor.py
@@ -3,7 +3,6 @@ from contextlib import asynccontextmanager, contextmanager
 from types import TracebackType
 from typing import AsyncIterator, Iterator, List, Optional, Type
 
-import rich
 from rich.progress import Progress, TaskID
 
 
@@ -24,20 +23,26 @@ class TerminalConductor:
         self._lines: List[str] = []
 
     async def _start(self) -> None:
-        async with self._lock:
-            for line in self._lines:
-                rich.print(line)
-            self._lines = []
+        for task in self._progress.tasks:
+            task.visible = True
+        self._progress.start()
 
-            self._progress.start()
+        self._stopped = False
+
+        for line in self._lines:
+            self.print(line)
+        self._lines = []
 
     async def _stop(self) -> None:
-        async with self._lock:
-            self._progress.stop()
-            self._stopped = True
+        self._stopped = True
+
+        for task in self._progress.tasks:
+            task.visible = False
+        self._progress.stop()
 
     async def __aenter__(self) -> None:
-        await self._start()
+        async with self._lock:
+            await self._start()
 
     async def __aexit__(
             self,
@@ -45,23 +50,24 @@ class TerminalConductor:
             exc_value: Optional[BaseException],
             traceback: Optional[TracebackType],
     ) -> Optional[bool]:
-        await self._stop()
+        async with self._lock:
+            await self._stop()
         return None
 
     def print(self, line: str) -> None:
         if self._stopped:
             self._lines.append(line)
         else:
-            rich.print(line)
+            self._progress.console.print(line)
 
     @asynccontextmanager
     async def exclusive_output(self) -> AsyncIterator[None]:
         async with self._lock:
-            self._stop()
+            await self._stop()
             try:
                 yield
             finally:
-                self._start()
+                await self._start()
 
     @contextmanager
     def progress_bar(
diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index 093ba91..6326b90 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -38,6 +38,9 @@ class Crawler(ABC):
     def print(self, text: str) -> None:
         self._conductor.print(text)
 
+    def exclusive_output(self):
+        return self._conductor.exclusive_output()
+
     @asynccontextmanager
     async def progress_bar(
             self,
diff --git a/PFERD/crawlers/dummy.py b/PFERD/crawlers/dummy.py
index a88216b..46a7a69 100644
--- a/PFERD/crawlers/dummy.py
+++ b/PFERD/crawlers/dummy.py
@@ -6,6 +6,7 @@ from typing import Any
 from rich.markup import escape
 
 from ..crawler import Crawler
+from ..utils import ainput
 
 DUMMY_TREE = {
     "Blätter": {
@@ -17,7 +18,7 @@ DUMMY_TREE = {
         "Lösungen": {
             "Blatt_01_Lösung.pdf": (),
             "Blatt_02_Lösung.pdf": (),
-            "Blatt_03_Lösung.pdf": (),
+            "Blatt_03_Lösung.pdf": True,
             "Blatt_04_Lösung.pdf": (),
             "Blatt_05_Lösung.pdf": (),
         },
@@ -39,7 +40,10 @@ class DummyCrawler(Crawler):
         await self._crawl_entry(Path(), DUMMY_TREE)
 
     async def _crawl_entry(self, path: Path, value: Any) -> None:
-        if value == ():
+        if value is True:
+            async with self.exclusive_output():
+                await ainput(f"File {path}, please press enter: ")
+        if value == () or value is True:
             n = random.randint(5, 20)
             async with self.download_bar(path, n) as bar:
                 await asyncio.sleep(random.random() / 2)
diff --git a/PFERD/utils.py b/PFERD/utils.py
index 4e1b5d7..3808f1d 100644
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -1,7 +1,30 @@
-from typing import Optional
+import functools
+import contextvars
+import asyncio
+import getpass
+from typing import Any, Callable, Optional, TypeVar
+
+T = TypeVar("T")
 
 
-def prompt_yes_no(query: str, default: Optional[bool]) -> bool:
+# TODO When switching to 3.9, use asyncio.to_thread instead of this
+async def to_thread(func: Callable[..., T], *args: Any, **kwargs: Any) -> T:
+    # https://github.com/python/cpython/blob/8d47f92d46a92a5931b8f3dcb4a484df672fc4de/Lib/asyncio/threads.py
+    loop = asyncio.get_event_loop()
+    ctx = contextvars.copy_context()
+    func_call = functools.partial(ctx.run, func, *args, **kwargs)
+    return await loop.run_in_executor(None, func_call)
+
+
+async def ainput(prompt: Optional[str] = None) -> str:
+    return await to_thread(lambda: input(prompt))
+
+
+async def agetpass(prompt: Optional[str] = None) -> str:
+    return await to_thread(lambda: getpass.getpass(prompt))
+
+
+async def prompt_yes_no(query: str, default: Optional[bool]) -> bool:
     """
     Asks the user a yes/no question and returns their choice.
     """
@@ -14,7 +37,7 @@ def prompt_yes_no(query: str, default: Optional[bool]) -> bool:
         query += " [y/n] "
 
     while True:
-        response = input(query).strip().lower()
+        response = (await ainput(query)).strip().lower()
         if response == "y":
             return True
         elif response == "n":

From d2103d7c44f6d342cd9b6a1829a4da3f1adaf240 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 29 Apr 2021 15:43:20 +0200
Subject: [PATCH 075/524] Document crawler

---
 PFERD/crawler.py | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index 6326b90..36c528d 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -19,6 +19,11 @@ class CrawlerLoadException(Exception):
 class Crawler(ABC):
     def __init__(self, name: str, section: configparser.SectionProxy) -> None:
         """
+        Initialize a crawler from its name and its section in the config file.
+
+        If you are writing your own constructor for your own crawler, make sure
+        to call this constructor first (via super().__init__).
+
         May throw a CrawlerLoadException.
         """
 
@@ -36,9 +41,28 @@ class Crawler(ABC):
         # output_dir = Path(section.get("output_dir", name))
 
     def print(self, text: str) -> None:
+        """
+        Print rich markup to the terminal. Crawlers *must* use this function to
+        print things unless they are holding an exclusive output context
+        manager! Be careful to escape all user-supplied strings.
+        """
+
         self._conductor.print(text)
 
     def exclusive_output(self):
+        """
+        Acquire exclusive rights™ to the terminal output. While this context
+        manager is held, output such as printing and progress bars from other
+        threads is suspended and the current thread may do whatever it wants
+        with the terminal. However, it must return the terminal to its original
+        state before exiting the context manager.
+
+        No two threads can hold this context manager at the same time.
+
+        Useful for password or confirmation prompts as well as running other
+        programs while crawling (e. g. to get certain credentials).
+        """
+
         return self._conductor.exclusive_output()
 
     @asynccontextmanager
@@ -66,9 +90,21 @@ class Crawler(ABC):
         return self.progress_bar(desc, total=size)
 
     async def run(self) -> None:
+        """
+        Start the crawling process. Call this function if you want to use a
+        crawler.
+        """
+
         async with self._conductor:
             await self.crawl()
 
     @abstractmethod
     async def crawl(self) -> None:
+        """
+        Overwrite this function if you are writing a crawler.
+
+        This function must not return before all crawling is complete. To crawl
+        multiple things concurrently, asyncio.gather can be used.
+        """
+
         pass

From 502654d8535ed4be4bd93f978232117dd5e210fd Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 29 Apr 2021 15:47:52 +0200
Subject: [PATCH 076/524] Fix mypy errors

---
 PFERD/crawler.py |  2 +-
 PFERD/utils.py   | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index 36c528d..376cada 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -49,7 +49,7 @@ class Crawler(ABC):
 
         self._conductor.print(text)
 
-    def exclusive_output(self):
+    def exclusive_output(self) -> AsyncContextManager[None]:
         """
         Acquire exclusive rights™ to the terminal output. While this context
         manager is held, output such as printing and progress bars from other
diff --git a/PFERD/utils.py b/PFERD/utils.py
index 3808f1d..08017aa 100644
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -1,6 +1,6 @@
-import functools
-import contextvars
 import asyncio
+import contextvars
+import functools
 import getpass
 from typing import Any, Callable, Optional, TypeVar
 
@@ -13,14 +13,14 @@ async def to_thread(func: Callable[..., T], *args: Any, **kwargs: Any) -> T:
     loop = asyncio.get_event_loop()
     ctx = contextvars.copy_context()
     func_call = functools.partial(ctx.run, func, *args, **kwargs)
-    return await loop.run_in_executor(None, func_call)
+    return await loop.run_in_executor(None, func_call)  # type: ignore
 
 
-async def ainput(prompt: Optional[str] = None) -> str:
+async def ainput(prompt: str) -> str:
     return await to_thread(lambda: input(prompt))
 
 
-async def agetpass(prompt: Optional[str] = None) -> str:
+async def agetpass(prompt: str) -> str:
     return await to_thread(lambda: getpass.getpass(prompt))
 
 

From 20a24dbcbf747fd82a6e8202d03429534504c799 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 29 Apr 2021 16:14:50 +0200
Subject: [PATCH 077/524] Add changelog

---
 CHANGELOG.md | 20 ++++++++++++++++++++
 README.md    |  1 +
 2 files changed, 21 insertions(+)
 create mode 100644 CHANGELOG.md

diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..14966d7
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,20 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+### Added
+- Support for concurrent downloads
+- Support for proper config files
+- This changelog
+
+### Changed
+- Rewrote almost everything
+- Redesigned CLI
+
+### Removed
+- Backwards compatibility with 2.x
diff --git a/README.md b/README.md
index 5b74de5..9f82f4f 100644
--- a/README.md
+++ b/README.md
@@ -4,6 +4,7 @@
 
 Other resources:
 
+- [Changelog](CHANGELOG.md)
 - [Development Guide](DEV.md)
 
 ## Installation with pip

From 0096d83387a75d7367e3fc42cfb4a58a1c5191f4 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 29 Apr 2021 16:37:42 +0200
Subject: [PATCH 078/524] Simplify Limiter implementation

---
 PFERD/limiter.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/PFERD/limiter.py b/PFERD/limiter.py
index ff91d57..ae72fe6 100644
--- a/PFERD/limiter.py
+++ b/PFERD/limiter.py
@@ -9,8 +9,5 @@ class Limiter:
 
     @asynccontextmanager
     async def limit(self) -> AsyncIterator[None]:
-        await self._semaphore.acquire()
-        try:
+        async with self._semaphore:
             yield
-        finally:
-            self._semaphore.release()

From f776186480bd4f1955edbcc54365379ee0478e00 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 29 Apr 2021 16:52:00 +0200
Subject: [PATCH 079/524] Use PurePath instead of Path

Path should only be used when we need to access the file system. For all other
purposes (mainly crawling), we use PurePath instead since the paths don't
correspond to paths in the local file system.
---
 PFERD/crawler.py        |  9 +++++----
 PFERD/crawlers/dummy.py |  6 +++---
 PFERD/transformer.py    | 26 +++++++++++++-------------
 3 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index 376cada..9ceca20 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -1,7 +1,7 @@
 import configparser
 from abc import ABC, abstractmethod
 from contextlib import asynccontextmanager
-from pathlib import Path
+from pathlib import PurePath
 # TODO In Python 3.9 and above, AsyncContextManager is deprecated
 from typing import AsyncContextManager, AsyncIterator, Optional
 
@@ -38,7 +38,8 @@ class Crawler(ABC):
             e.pretty_print()
             raise CrawlerLoadException()
 
-        # output_dir = Path(section.get("output_dir", name))
+        # working_dir = Path(section.get("working_dir", ""))
+        # output_dir = working_dir / section.get("output_dir", name)
 
     def print(self, text: str) -> None:
         """
@@ -75,14 +76,14 @@ class Crawler(ABC):
             with self._conductor.progress_bar(desc, total=total) as bar:
                 yield bar
 
-    def crawl_bar(self, path: Path) -> AsyncContextManager[ProgressBar]:
+    def crawl_bar(self, path: PurePath) -> AsyncContextManager[ProgressBar]:
         pathstr = escape(str(path))
         desc = f"[bold magenta]Crawling[/bold magenta] {pathstr}"
         return self.progress_bar(desc)
 
     def download_bar(
             self,
-            path: Path,
+            path: PurePath,
             size: int,
     ) -> AsyncContextManager[ProgressBar]:
         pathstr = escape(str(path))
diff --git a/PFERD/crawlers/dummy.py b/PFERD/crawlers/dummy.py
index 46a7a69..204b4b1 100644
--- a/PFERD/crawlers/dummy.py
+++ b/PFERD/crawlers/dummy.py
@@ -1,6 +1,6 @@
 import asyncio
 import random
-from pathlib import Path
+from pathlib import PurePath
 from typing import Any
 
 from rich.markup import escape
@@ -37,9 +37,9 @@ DUMMY_TREE = {
 
 class DummyCrawler(Crawler):
     async def crawl(self) -> None:
-        await self._crawl_entry(Path(), DUMMY_TREE)
+        await self._crawl_entry(PurePath(), DUMMY_TREE)
 
-    async def _crawl_entry(self, path: Path, value: Any) -> None:
+    async def _crawl_entry(self, path: PurePath, value: Any) -> None:
         if value is True:
             async with self.exclusive_output():
                 await ainput(f"File {path}, please press enter: ")
diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index 1ecaf19..298c580 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -1,22 +1,22 @@
 import re
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
-from pathlib import Path
+from pathlib import PurePath
 from typing import Dict, Optional, Union
 
 
 class Rule(ABC):
     @abstractmethod
-    def transform(self, path: Path) -> Optional[Path]:
+    def transform(self, path: PurePath) -> Optional[PurePath]:
         pass
 
 
 class NormalRule(Rule):
-    def __init__(self, left: Path, right: Path):
+    def __init__(self, left: PurePath, right: PurePath):
         self._left = left
         self._right = right
 
-    def _match_prefix(self, path: Path) -> Optional[Path]:
+    def _match_prefix(self, path: PurePath) -> Optional[PurePath]:
         left_parts = list(reversed(self._left.parts))
         path_parts = list(reversed(path.parts))
 
@@ -33,9 +33,9 @@ class NormalRule(Rule):
         if left_parts:
             return None
 
-        return Path(*path_parts)
+        return PurePath(*path_parts)
 
-    def transform(self, path: Path) -> Optional[Path]:
+    def transform(self, path: PurePath) -> Optional[PurePath]:
         if rest := self._match_prefix(path):
             return self._right / rest
 
@@ -43,11 +43,11 @@ class NormalRule(Rule):
 
 
 class ExactRule(Rule):
-    def __init__(self, left: Path, right: Path):
+    def __init__(self, left: PurePath, right: PurePath):
         self._left = left
         self._right = right
 
-    def transform(self, path: Path) -> Optional[Path]:
+    def transform(self, path: PurePath) -> Optional[PurePath]:
         if path == self._left:
             return self._right
 
@@ -59,7 +59,7 @@ class ReRule(Rule):
         self._left = left
         self._right = right
 
-    def transform(self, path: Path) -> Optional[Path]:
+    def transform(self, path: PurePath) -> Optional[PurePath]:
         if match := re.fullmatch(self._left, str(path)):
             kwargs: Dict[str, Union[int, float]] = {}
 
@@ -75,7 +75,7 @@ class ReRule(Rule):
                 except ValueError:
                     pass
 
-            return Path(self._right.format(*groups, **kwargs))
+            return PurePath(self._right.format(*groups, **kwargs))
 
         return None
 
@@ -208,9 +208,9 @@ def parse_rule(line: Line) -> Rule:
     right = parse_string(line)
 
     if arrowname == "":
-        return NormalRule(Path(left), Path(right))
+        return NormalRule(PurePath(left), PurePath(right))
     elif arrowname == "exact":
-        return ExactRule(Path(left), Path(right))
+        return ExactRule(PurePath(left), PurePath(right))
     elif arrowname == "re":
         return ReRule(left, right)
     else:
@@ -230,7 +230,7 @@ class Transformer:
             if line:
                 self._rules.append(parse_rule(Line(line, i)))
 
-    def transform(self, path: Path) -> Optional[Path]:
+    def transform(self, path: PurePath) -> Optional[PurePath]:
         for rule in self._rules:
             if result := rule.transform(path):
                 return result

From 9ec19be11345e816f243aaf8514ce1be7a5c07cc Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 29 Apr 2021 18:55:08 +0200
Subject: [PATCH 080/524] Document config file format

---
 CONFIG.md | 138 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 README.md |   1 +
 2 files changed, 139 insertions(+)
 create mode 100644 CONFIG.md

diff --git a/CONFIG.md b/CONFIG.md
new file mode 100644
index 0000000..8acb97c
--- /dev/null
+++ b/CONFIG.md
@@ -0,0 +1,138 @@
+# Config file format
+
+A config file consists of sections. A section begins with a `[section]` header,
+which is followed by a list of `key = value` or `key: value` pairs. Comments
+must be on their own line and start with `#` or `;`. Multiline values must be
+indented beyond their key. For more details and some examples on the format, see
+the [configparser documentation][1] ([basic interpolation][2] is enabled).
+
+[1]: <https://docs.python.org/3/library/configparser.html#supported-ini-file-structure> "Supported INI File Structure"
+[2]: <https://docs.python.org/3/library/configparser.html#configparser.BasicInterpolation> "BasicInterpolation"
+
+## The `DEFAULT` section
+
+This section contains global configuration values. It can also be used to set
+default values for the other sections.
+
+- `working_dir`: The directory PFERD operates in. Set to an absolute path to
+  make PFERD operate the same regardless of where it is executed. All other
+  paths in the config file are interpreted relative to this path. If this path
+  is relative, it is interpreted relative to the script's working dir. `~` is
+  expanded to the current user's home directory. (Default: `.`)
+
+## The `crawl:*` sections
+
+Sections whose names start with `crawl:` are used to configure crawlers. The
+rest of the section name specifies the name of the crawler.
+
+A crawler synchronizes a remote resource to a local directory. There are
+different types of crawlers for different kinds of resources, e. g. ILIAS
+courses or lecture websites.
+
+Each crawl section represents an instance of a specific type of crawler. The
+`type` option is used to specify the crawler type. The crawler's name is usually
+used as the name for the output directory. New crawlers can be created simply by
+adding a new crawl section to the config file.
+
+Depending on a crawler's type, it may have different options. For more details,
+see the type's documentation below. The following options are common to all
+crawlers:
+
+- `type`: The types are specified in [this section](#crawler-types).
+- `output_dir`: The directory the crawler synchronizes files to. A crawler will
+  never place any files outside of this directory. (Default: crawler's name)
+- `transform`: Rules for renaming and excluding certain files and directories.
+  For more details, see [this section](#transformation-rules). (Default: empty)
+
+## The `auth:*` sections
+
+Sections whose names start with `auth:` are used to configure authenticators. An
+authenticator provides login credentials to one or more crawlers.
+
+Authenticators work similar to crawlers: A section represents an authenticator
+instance, whose name is the rest of the section name. The type is specified by
+the `type` option.
+
+Depending on an authenticator's type, it may have different options. For more
+details, see the type's documentation below. The only option common to all
+authenticators is `type`:
+
+- `type`: The types are specified in [this section](#authenticator-types).
+
+## Crawler types
+
+TODO Fill in as crawlers are implemented
+
+## Authenticator types
+
+TODO Fill in as authenticators are implemented
+
+## Transformation rules
+
+Transformation rules are rules for renaming and excluding files and directories.
+They are specified line-by-line in a crawler's `transform` option. When a
+crawler needs to apply a rule to a path, it goes through this list top-to-bottom
+and choose the first matching rule.
+
+Each line has the format `SOURCE ARROW TARGET` where `TARGET` is optional.
+`SOURCE` is either a normal path without spaces (e. g. `foo/bar`), or a string
+literal delimited by `"` or `'` (e. g. `"foo\" bar/baz"`). Python's string
+escape syntax is supported. Trailing slashes are ignored. `TARGET` can be
+formatted like `SOURCE`, but it can also be a single exclamation mark without
+quotes (`!`). `ARROW` is one of `-->`, `-exact->` and `-re->`.
+
+If a rule's target is `!`, this means that when the rule matches on a path, the
+corresponding file or directory is ignored. If a rule's target is missing, the
+path is matched but not modified.
+
+### The `-->` arrow
+
+The `-->` arrow is a basic renaming operation. If a path begins with `SOURCE`,
+that part of the path is replaced with `TARGET`. This means that the rule
+`foo/bar --> baz` would convert `foo/bar` into `baz`, but also `foo/bar/xyz`
+into `baz/xyz`. The rule `foo --> !` would ignore a directory named `foo` as
+well as all its contents.
+
+### The `-exact->` arrow
+
+The `-exact->` arrow requires the path to match `SOURCE` exactly. This means
+that the rule `foo/bar -exact-> baz` would still convert `foo/bar` into `baz`,
+but `foo/bar/xyz` would be unaffected. Also, `foo -exact-> !` would only ignore
+`foo`, but not its contents (if it has any). The examples below show why this is
+useful.
+
+### The `-re->` arrow
+
+The `-re->` arrow uses regular expressions. `SOURCE` is a regular expression
+that must match the entire path. If this is the case, then the capturing groups
+are available in `TARGET` for formatting.
+
+### Example: Tutorials
+
+You have ILIAS course with lots of tutorials, but are only interested in a
+single one?
+
+```
+tutorials/
+  |- tut_01/
+  |- tut_02/
+  |- tut_03/
+  ...
+```
+
+You can use a mix of normal and exact arrows to get rid of the other ones and
+move the `tutorials/tut_02/` folder to `my_tut/`:
+
+```
+tutorials/tut_02 --> my_tut
+tutorials -exact->
+tutorials --> !
+```
+
+The second rule is required for many crawlers since they use the rules to decide
+which directories to crawl. If it was missing when the crawler looks at
+`tutorials/`, the third rule would match. This means the crawler would not crawl
+the `tutorials/` directory and thus not discover that `tutorials/tut02/`
+existed.
+
+Since the second rule is only relevant for crawling, the `TARGET` is left out.
diff --git a/README.md b/README.md
index 9f82f4f..f9d718e 100644
--- a/README.md
+++ b/README.md
@@ -4,6 +4,7 @@
 
 Other resources:
 
+- [Config file format](CONFIG.md)
 - [Changelog](CHANGELOG.md)
 - [Development Guide](DEV.md)
 

From e7a51decb098a30019a1393b723956c99a85ef85 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 29 Apr 2021 20:13:46 +0200
Subject: [PATCH 081/524] Elaborate on transforms and implement changes

---
 CONFIG.md            |  36 ++++++++++++++
 PFERD/transformer.py | 111 ++++++++++++++++++++++++++++++++-----------
 2 files changed, 120 insertions(+), 27 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index 8acb97c..05f3363 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -107,6 +107,21 @@ The `-re->` arrow uses regular expressions. `SOURCE` is a regular expression
 that must match the entire path. If this is the case, then the capturing groups
 are available in `TARGET` for formatting.
 
+`TARGET` uses Python's [format string syntax][3]. The *n*-th capturing group can
+be referred to as `{g<n>}` (e. g. `{g3}`). `{g0}` refers to the original path.
+If capturing group *n*'s contents are a valid integer, the integer value is
+available as `{i<n>}` (e. g. `{i3}`). If capturing group *n*'s contents are a
+valid float, the float value is available as `{f<n>}` (e. g. `{f3}`).
+
+Python's format string syntax has rich options for formatting its arguments. For
+example, to left-pad the capturing group 3 with the digit `0` to width 5, you
+can use `{i3:05}`.
+
+PFERD even allows you to write entire expressions inside the curly braces, for
+example `{g2.lower()}` or `{g3.replace(' ', '_')}`.
+
+[3]: <https://docs.python.org/3/library/string.html#format-string-syntax> "Format String Syntax"
+
 ### Example: Tutorials
 
 You have ILIAS course with lots of tutorials, but are only interested in a
@@ -136,3 +151,24 @@ the `tutorials/` directory and thus not discover that `tutorials/tut02/`
 existed.
 
 Since the second rule is only relevant for crawling, the `TARGET` is left out.
+
+### Example: Lecture slides
+
+You have a course with slides like `Lecture 3: Linear functions.PDF` and you
+would like to rename them to `03_linear_functions.pdf`.
+
+```
+Lectures/
+  |- Lecture 1: Introduction.PDF
+  |- Lecture 2: Vectors and matrices.PDF
+  |- Lecture 3: Linear functions.PDF
+  ...
+```
+
+To do this, you can use the most powerful of arrows, the regex arrow.
+
+```
+"Lectures/Lecture (\\d+): (.*)\\.PDF" -re-> "Lectures/{i1:02}_{g2.lower().replace(' ', '_')}.pdf"
+```
+
+Note the escaped backslashes on the `SOURCE` side.
diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index 298c580..84332df 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -1,3 +1,9 @@
+# I'm sorry that this code has become a bit dense and unreadable. While
+# reading, it is important to remember what True and False mean. I'd love to
+# have some proper sum-types for the inputs and outputs, they'd make this code
+# a lot easier to understand.
+
+import ast
 import re
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
@@ -7,12 +13,23 @@ from typing import Dict, Optional, Union
 
 class Rule(ABC):
     @abstractmethod
-    def transform(self, path: PurePath) -> Optional[PurePath]:
+    def transform(self, path: PurePath) -> Union[PurePath, bool]:
+        """
+        Try to apply this rule to the path. Returns another path if the rule
+        was successfully applied, True if the rule matched but resulted in an
+        exclamation mark, and False if the rule didn't match at all.
+        """
+
         pass
 
 
+# These rules all use a Union[T, bool] for their right side. They are passed a
+# T if the arrow's right side was a normal string, True if it was an
+# exclamation mark and False if it was missing entirely.
+
 class NormalRule(Rule):
-    def __init__(self, left: PurePath, right: PurePath):
+    def __init__(self, left: PurePath, right: Union[PurePath, bool]):
+
         self._left = left
         self._right = right
 
@@ -35,49 +52,61 @@ class NormalRule(Rule):
 
         return PurePath(*path_parts)
 
-    def transform(self, path: PurePath) -> Optional[PurePath]:
+    def transform(self, path: PurePath) -> Union[PurePath, bool]:
         if rest := self._match_prefix(path):
-            return self._right / rest
+            if isinstance(self._right, bool):
+                return self._right or path
+            else:
+                return self._right / rest
 
-        return None
+        return False
 
 
 class ExactRule(Rule):
-    def __init__(self, left: PurePath, right: PurePath):
+    def __init__(self, left: PurePath, right: Union[PurePath, bool]):
         self._left = left
         self._right = right
 
-    def transform(self, path: PurePath) -> Optional[PurePath]:
+    def transform(self, path: PurePath) -> Union[PurePath, bool]:
         if path == self._left:
-            return self._right
+            if isinstance(self._right, bool):
+                return self._right or path
+            else:
+                return self._right
 
-        return None
+        return False
 
 
 class ReRule(Rule):
-    def __init__(self, left: str, right: str):
+    def __init__(self, left: str, right: Union[str, bool]):
         self._left = left
         self._right = right
 
-    def transform(self, path: PurePath) -> Optional[PurePath]:
+    def transform(self, path: PurePath) -> Union[PurePath, bool]:
         if match := re.fullmatch(self._left, str(path)):
-            kwargs: Dict[str, Union[int, float]] = {}
+            if isinstance(self._right, bool):
+                return self._right or path
+
+            vars: Dict[str, Union[str, int, float]] = {}
 
             groups = [match[0]] + list(match.groups())
             for i, group in enumerate(groups):
+                vars[f"g{i}"] = group
+
                 try:
-                    kwargs[f"i{i}"] = int(group)
+                    vars[f"i{i}"] = int(group)
                 except ValueError:
                     pass
 
                 try:
-                    kwargs[f"f{i}"] = float(group)
+                    vars[f"f{i}"] = float(group)
                 except ValueError:
                     pass
 
-            return PurePath(self._right.format(*groups, **kwargs))
+            result = eval(f"f{self._right!r}", vars)
+            return PurePath(result)
 
-        return None
+        return False
 
 
 @dataclass
@@ -136,7 +165,9 @@ QUOTATION_MARKS = {'"', "'"}
 
 def parse_string_literal(line: Line) -> str:
     escaped = False
-    result = []
+
+    # Points to first character of string literal
+    start_index = line.index
 
     quotation_mark = line.get()
     if quotation_mark not in QUOTATION_MARKS:
@@ -147,17 +178,17 @@ def parse_string_literal(line: Line) -> str:
 
     while c := line.get():
         if escaped:
-            result.append(c)
             escaped = False
             line.advance()
         elif c == quotation_mark:
             line.advance()
-            return "".join(result)
+            stop_index = line.index
+            literal = line.line[start_index:stop_index]
+            return ast.literal_eval(literal)
         elif c == "\\":
             escaped = True
             line.advance()
         else:
-            result.append(c)
             line.advance()
 
     raise RuleParseException(line, "Expected end of string literal")
@@ -174,11 +205,14 @@ def parse_until_space_or_eol(line: Line) -> str:
     return "".join(result)
 
 
-def parse_string(line: Line) -> str:
+def parse_string(line: Line) -> Union[str, bool]:
     if line.get() in QUOTATION_MARKS:
         return parse_string_literal(line)
     else:
-        return parse_until_space_or_eol(line)
+        string = parse_until_space_or_eol(line)
+        if string == "!":
+            return True
+        return string
 
 
 def parse_arrow(line: Line) -> str:
@@ -200,17 +234,35 @@ def parse_arrow(line: Line) -> str:
 
 
 def parse_rule(line: Line) -> Rule:
+    # Parse left side
+    leftindex = line.index
     left = parse_string(line)
+    if isinstance(left, bool):
+        line.index = leftindex
+        raise RuleParseException(line, "Left side can't be '!'")
+
+    # Parse arrow
     line.expect(" ")
     arrowindex = line.index
     arrowname = parse_arrow(line)
-    line.expect(" ")
-    right = parse_string(line)
 
+    # Parse right side
+    if line.get():
+        line.expect(" ")
+        right = parse_string(line)
+    else:
+        right = False
+    rightpath: Union[PurePath, bool]
+    if isinstance(right, bool):
+        rightpath = right
+    else:
+        rightpath = PurePath(right)
+
+    # Dispatch
     if arrowname == "":
-        return NormalRule(PurePath(left), PurePath(right))
+        return NormalRule(PurePath(left), rightpath)
     elif arrowname == "exact":
-        return ExactRule(PurePath(left), PurePath(right))
+        return ExactRule(PurePath(left), rightpath)
     elif arrowname == "re":
         return ReRule(left, right)
     else:
@@ -232,7 +284,12 @@ class Transformer:
 
     def transform(self, path: PurePath) -> Optional[PurePath]:
         for rule in self._rules:
-            if result := rule.transform(path):
+            result = rule.transform(path)
+            if isinstance(result, PurePath):
                 return result
+            elif result:  # Exclamation mark
+                return None
+            else:
+                continue
 
         return None

From a8dcf941b94d8f474f2606a92d1caf57ccb61665 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 30 Apr 2021 15:32:56 +0200
Subject: [PATCH 082/524] Document possible redownload settings

---
 CONFIG.md | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/CONFIG.md b/CONFIG.md
index 05f3363..a004dc3 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -41,6 +41,15 @@ crawlers:
 - `type`: The types are specified in [this section](#crawler-types).
 - `output_dir`: The directory the crawler synchronizes files to. A crawler will
   never place any files outside of this directory. (Default: crawler's name)
+- `redownload`: When to download again a file that is already present locally.
+  (Default: `never-smart`)
+    - `never`: If a file is present locally, it is not downloaded again.
+    - `never-smart`: Like `never`, but PFERD tries to detect if an already
+      downloaded files has changed via some (unreliable) heuristics.
+    - `always`: All files are always downloaded, regardless of whether they are
+      already present locally.
+    - `always-smart`: Like `always`, but PFERD tries to avoid unnecessary
+      downloads via some (unreliable) heuristics.
 - `transform`: Rules for renaming and excluding certain files and directories.
   For more details, see [this section](#transformation-rules). (Default: empty)
 

From 91c33596daf267541a0f389de252c193d9c2c05e Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 30 Apr 2021 16:22:14 +0200
Subject: [PATCH 083/524] Load crawlers from config file

---
 PFERD/config.py  | 21 +++++++++++++++++++--
 PFERD/crawler.py |  8 +++++++-
 PFERD/pferd.py   | 46 +++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 69 insertions(+), 6 deletions(-)

diff --git a/PFERD/config.py b/PFERD/config.py
index d71e4d1..d02900d 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -1,7 +1,7 @@
 import configparser
 import os
 from pathlib import Path
-from typing import Optional
+from typing import List, Optional, Tuple
 
 from .utils import prompt_yes_no
 
@@ -26,7 +26,6 @@ class Config:
 
     def __init__(self, parser: configparser.ConfigParser):
         self._parser = parser
-        # TODO Load and validate config into dataclasses
 
     @staticmethod
     def _fail_load(path: Path, reason: str) -> None:
@@ -99,3 +98,21 @@ class Config:
             self._fail_dump(path, "That's a directory, not a file")
         except PermissionError:
             self._fail_dump(path, "Insufficient permissions")
+
+    @property
+    def default_section(self) -> configparser.SectionProxy:
+        return self._parser[configparser.DEFAULTSECT]
+
+    def crawler_sections(self) -> List[Tuple[str, configparser.SectionProxy]]:
+        result = []
+        for section_name, section_proxy in self._parser.items():
+            if section_name.startswith("crawler:"):
+                crawler_name = section_name[8:]
+                result.append((crawler_name, section_proxy))
+
+        return result
+
+    @property
+    def working_dir(self) -> Path:
+        pathstr = self.default_section.get("working_dir", ".")
+        return Path(pathstr).expanduser()
diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index 9ceca20..6b1b350 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -8,6 +8,7 @@ from typing import AsyncContextManager, AsyncIterator, Optional
 from rich.markup import escape
 
 from .conductor import ProgressBar, TerminalConductor
+from .config import Config
 from .limiter import Limiter
 from .transformer import RuleParseException, Transformer
 
@@ -17,7 +18,12 @@ class CrawlerLoadException(Exception):
 
 
 class Crawler(ABC):
-    def __init__(self, name: str, section: configparser.SectionProxy) -> None:
+    def __init__(
+            self,
+            name: str,
+            config: Config,
+            section: configparser.SectionProxy,
+    ) -> None:
         """
         Initialize a crawler from its name and its section in the config file.
 
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index d145ade..131ddc1 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -1,12 +1,52 @@
+from typing import Dict
+
+from rich import print
+from rich.markup import escape
+
 from .config import Config
+from .crawler import Crawler
 from .crawlers import CRAWLERS
 
 
+class PferdLoadException(Exception):
+    pass
+
+
 class Pferd:
     def __init__(self, config: Config):
         self._config = config
+        self._crawlers: Dict[str, Crawler] = {}
+
+    def _load_crawlers(self) -> None:
+        abort = False
+        for name, section in self._config.crawler_sections():
+            print(f"[bold bright_cyan]Loading[/] crawler:{escape(name)}")
+            crawler_type = section.get("type")
+            crawler_constructor = CRAWLERS.get(crawler_type)
+            if crawler_constructor is None:
+                abort = True
+                if crawler_type is None:
+                    print("[red]Error: No type")
+                else:
+                    t = escape(repr(crawler_type))
+                    print(f"[red]Error: Unknown type {t}")
+                continue
+
+            crawler = crawler_constructor(name, self._config, section)
+            self._crawlers[name] = crawler
+
+        if abort:
+            raise PferdLoadException()
 
     async def run(self) -> None:
-        print("Bleep bloop 1")
-        await CRAWLERS["dummy"]("dummy", self._config._parser["dummy"]).run()
-        print("Bleep bloop 2")
+        try:
+            self._load_crawlers()
+        except PferdLoadException:
+            print("[bold red]Could not initialize PFERD properly")
+            exit(1)
+
+        for name, crawler in self._crawlers.items():
+            print()
+            print(f"[bold bright_cyan]Running[/] crawler:{escape(name)}")
+
+            await crawler.run()

From 07e831218e975ff82637f5016d40ab0112882652 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 2 May 2021 00:56:10 +0200
Subject: [PATCH 084/524] Add sync report

---
 PFERD/report.py | 80 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)
 create mode 100644 PFERD/report.py

diff --git a/PFERD/report.py b/PFERD/report.py
new file mode 100644
index 0000000..382b82d
--- /dev/null
+++ b/PFERD/report.py
@@ -0,0 +1,80 @@
+from dataclasses import dataclass
+from pathlib import PurePath
+from typing import Set
+
+
+@dataclass
+class MarkDuplicateException(Exception):
+    """
+    Tried to mark a file that was already marked.
+    """
+
+    path: PurePath
+
+
+@dataclass
+class MarkConflictException(Exception):
+    """
+    Marking the path would have caused a conflict.
+
+    A conflict can have two reasons: Either the new file has the same path as
+    the parent directory of a known file, or a parent directory of the new file
+    has the same path as a known file. In either case, adding the new file
+    would require a file and a directory to share the same path, which is
+    usually not possible.
+    """
+
+    path: PurePath
+    collides_with: PurePath
+
+
+class Report:
+    """
+    A report of a synchronization. Includes all files found by the crawler, as
+    well as the set of changes made to local files.
+    """
+
+    def __init__(self):
+        self.known_files: Set[PurePath] = set()
+
+        self.new_files: Set[PurePath] = set()
+        self.changed_files: Set[PurePath] = set()
+        self.deleted_files: Set[PurePath] = set()
+
+    def mark(self, path: PurePath):
+        """
+        Mark a previously unknown file as known.
+
+        May throw a MarkDuplicateException or a MarkConflictException. For more
+        detail, see the respective exception's docstring.
+        """
+
+        for known_path in self.known_files:
+            if path == known_path:
+                raise MarkDuplicateException(path)
+
+            if path.relative_to(known_path) or known_path.relative_to(path):
+                raise MarkConflictException(path, known_path)
+
+        self.known_files.add(path)
+
+    def add_file(self, path: PurePath):
+        """
+        Unlike mark(), this function accepts any paths.
+        """
+
+        self.new_files.add(path)
+
+    def change_file(self, path: PurePath):
+        """
+        Unlike mark(), this function accepts any paths.
+        """
+
+        self.changed_files.add(path)
+
+    def delete_file(self, path: PurePath):
+        """
+        Unlike mark(), this function accepts any paths.
+        """
+
+        self.deleted_files.add(path)

From fde811ae5aee4a87c7d2891c23e12c2fa554676f Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 5 May 2021 00:55:55 +0200
Subject: [PATCH 085/524] Document on_conflict option

---
 CONFIG.md | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index a004dc3..65daae9 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -50,6 +50,18 @@ crawlers:
       already present locally.
     - `always-smart`: Like `always`, but PFERD tries to avoid unnecessary
       downloads via some (unreliable) heuristics.
+- `on_conflict`: What to do when the local and remote versions of a file or
+  directory differ. Includes the cases where a file is replaced by a directory
+  or a directory by a file. (Default: `prompt`)
+    - `prompt`: Always ask the user before overwriting or deleting local files
+      and directories.
+    - `local-first`: Always keep the local file or directory. Equivalent to
+      using `prompt` and always choosing "no". Implies that `redownload` is set
+      to `never`.
+    - `remote-first`: Always keep the remote file or directory. Equivalent to
+      using `prompt` and always choosing "yes".
+    - `no-delete`: Never delete local files, but overwrite local files if the
+      remote file is different.
 - `transform`: Rules for renaming and excluding certain files and directories.
   For more details, see [this section](#transformation-rules). (Default: empty)
 
@@ -133,7 +145,7 @@ example `{g2.lower()}` or `{g3.replace(' ', '_')}`.
 
 ### Example: Tutorials
 
-You have ILIAS course with lots of tutorials, but are only interested in a
+You have an ILIAS course with lots of tutorials, but are only interested in a
 single one?
 
 ```
@@ -174,7 +186,7 @@ Lectures/
   ...
 ```
 
-To do this, you can use the most powerful of arrows, the regex arrow.
+To do this, you can use the most powerful of arrows: The regex arrow.
 
 ```
 "Lectures/Lecture (\\d+): (.*)\\.PDF" -re-> "Lectures/{i1:02}_{g2.lower().replace(' ', '_')}.pdf"

From bbfdadc4633997b5437ba6d4f98db41fb69e2390 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 5 May 2021 18:08:34 +0200
Subject: [PATCH 086/524] Implement output directory

---
 PFERD/crawler.py    |   8 +-
 PFERD/output_dir.py | 365 ++++++++++++++++++++++++++++++++++++++++++++
 PFERD/pferd.py      |   7 +-
 PFERD/report.py     |  13 +-
 4 files changed, 381 insertions(+), 12 deletions(-)
 create mode 100644 PFERD/output_dir.py

diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index 6b1b350..4ee4fad 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -10,6 +10,7 @@ from rich.markup import escape
 from .conductor import ProgressBar, TerminalConductor
 from .config import Config
 from .limiter import Limiter
+from .output_dir import OnConflict, OutputDirectory, Redownload
 from .transformer import RuleParseException, Transformer
 
 
@@ -44,8 +45,11 @@ class Crawler(ABC):
             e.pretty_print()
             raise CrawlerLoadException()
 
-        # working_dir = Path(section.get("working_dir", ""))
-        # output_dir = working_dir / section.get("output_dir", name)
+        output_dir = config.working_dir / section.get("output_dir", name)
+        redownload = Redownload.NEVER_SMART
+        on_conflict = OnConflict.PROMPT
+        self._output_dir = OutputDirectory(
+            output_dir, redownload, on_conflict, self._conductor)
 
     def print(self, text: str) -> None:
         """
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
new file mode 100644
index 0000000..9276069
--- /dev/null
+++ b/PFERD/output_dir.py
@@ -0,0 +1,365 @@
+import filecmp
+import os
+import random
+import shutil
+import string
+from contextlib import asynccontextmanager
+from dataclasses import dataclass
+from datetime import datetime
+from enum import Enum
+from pathlib import Path, PurePath
+# TODO In Python 3.9 and above, AsyncContextManager is deprecated
+from typing import AsyncContextManager, AsyncIterator, BinaryIO, Optional
+
+from .conductor import TerminalConductor
+from .report import MarkConflictException, MarkDuplicateException, Report
+from .utils import prompt_yes_no
+
+SUFFIX_CHARS = string.ascii_lowercase + string.digits
+SUFFIX_LENGTH = 6
+TRIES = 5
+
+
+class OutputDirException(Exception):
+    pass
+
+
+class Redownload(Enum):
+    NEVER = "never"
+    NEVER_SMART = "never-smart"
+    ALWAYS = "always"
+    ALWAYS_SMART = "always-smart"
+
+
+class OnConflict(Enum):
+    PROMPT = "prompt"
+    LOCAL_FIRST = "local-first"
+    REMOTE_FIRST = "remote-first"
+    NO_DELETE = "no-delete"
+
+
+@dataclass
+class Heuristics:
+    mtime: Optional[datetime]
+
+
+class FileSink:
+    def __init__(self, file: BinaryIO):
+        self._file = file
+        self._done = False
+
+    @property
+    def file(self) -> BinaryIO:
+        return self._file
+
+    def done(self) -> None:
+        self._done = True
+
+    def is_done(self) -> bool:
+        return self._done
+
+
+@dataclass
+class DownloadInfo:
+    path: PurePath
+    local_path: Path
+    tmp_path: Path
+    heuristics: Heuristics
+    on_conflict: OnConflict
+    success: bool = False
+
+
+class OutputDirectory:
+    def __init__(
+            self,
+            root: Path,
+            redownload: Redownload,
+            on_conflict: OnConflict,
+            conductor: TerminalConductor,
+    ):
+        self._root = root
+        self._redownload = redownload
+        self._on_conflict = on_conflict
+        self._conductor = conductor
+
+        self._report = Report()
+
+    def _mark(self, path: PurePath) -> None:
+        """
+        May throw an OutputDirException
+        """
+
+        try:
+            self._report.mark(path)
+        except MarkDuplicateException:
+            msg = "Another file has already been placed here."
+            raise OutputDirException(msg)
+        except MarkConflictException as e:
+            msg = f"Collides with other file: {e.collides_with}"
+            raise OutputDirException(msg)
+
+    def _resolve(self, path: PurePath) -> Path:
+        """
+        May throw an OutputDirException.
+        """
+
+        if ".." in path.parts:
+            msg = f"Path {path} contains forbidden '..'"
+            raise OutputDirException(msg)
+        return self._root / path
+
+    def _should_download(
+            self,
+            local_path: Path,
+            heuristics: Heuristics,
+            redownload: Redownload,
+    ) -> bool:
+        # If we don't have a *file* at the local path, we'll always redownload
+        # since we know that the remote is different from the local files. This
+        # includes the case where no local file exists.
+        if not local_path.is_file():
+            return True
+
+        if redownload == Redownload.NEVER:
+            return False
+        elif redownload == Redownload.ALWAYS:
+            return True
+
+        stat = local_path.stat()
+
+        remote_newer = None
+        if mtime := heuristics.mtime:
+            remote_newer = mtime.timestamp() > stat.st_mtime
+
+        if redownload == Redownload.NEVER_SMART:
+            if remote_newer is None:
+                return False
+            else:
+                return remote_newer
+        elif redownload == Redownload.ALWAYS_SMART:
+            if remote_newer is None:
+                return True
+            else:
+                return not remote_newer
+
+        # This should never be reached
+        raise ValueError(f"{redownload!r} is not a valid redownload policy")
+
+    # The following conflict resolution functions all return False if the local
+    # file(s) should be kept and True if they should be replaced by the remote
+    # files.
+
+    async def _conflict_lfrf(
+            self,
+            on_conflict: OnConflict,
+            path: PurePath,
+    ) -> bool:
+        if on_conflict == OnConflict.PROMPT:
+            async with self._conductor.exclusive_output():
+                prompt = f"Replace {path} with remote file?"
+                return await prompt_yes_no(prompt, default=False)
+        elif on_conflict == OnConflict.LOCAL_FIRST:
+            return False
+        elif on_conflict == OnConflict.REMOTE_FIRST:
+            return True
+        elif on_conflict == OnConflict.NO_DELETE:
+            return True
+
+        # This should never be reached
+        raise ValueError(f"{on_conflict!r} is not a valid conflict policy")
+
+    async def _conflict_ldrf(
+            self,
+            on_conflict: OnConflict,
+            path: PurePath,
+    ) -> bool:
+        if on_conflict == OnConflict.PROMPT:
+            async with self._conductor.exclusive_output():
+                prompt = f"Recursively delete {path} and replace with remote file?"
+                return await prompt_yes_no(prompt, default=False)
+        elif on_conflict == OnConflict.LOCAL_FIRST:
+            return False
+        elif on_conflict == OnConflict.REMOTE_FIRST:
+            return True
+        elif on_conflict == OnConflict.NO_DELETE:
+            return False
+
+        # This should never be reached
+        raise ValueError(f"{on_conflict!r} is not a valid conflict policy")
+
+    async def _conflict_lfrd(
+            self,
+            on_conflict: OnConflict,
+            path: PurePath,
+            parent: PurePath,
+    ) -> bool:
+        if on_conflict == OnConflict.PROMPT:
+            async with self._conductor.exclusive_output():
+                prompt = f"Delete {parent} so remote file {path} can be downloaded?"
+                return await prompt_yes_no(prompt, default=False)
+        elif on_conflict == OnConflict.LOCAL_FIRST:
+            return False
+        elif on_conflict == OnConflict.REMOTE_FIRST:
+            return True
+        elif on_conflict == OnConflict.NO_DELETE:
+            return False
+
+        # This should never be reached
+        raise ValueError(f"{on_conflict!r} is not a valid conflict policy")
+
+    async def _conflict_delete_lf(
+            self,
+            on_conflict: OnConflict,
+            path: PurePath,
+    ) -> bool:
+        if on_conflict == OnConflict.PROMPT:
+            async with self._conductor.exclusive_output():
+                prompt = f"Delete {path}?"
+                return await prompt_yes_no(prompt, default=False)
+        elif on_conflict == OnConflict.LOCAL_FIRST:
+            return False
+        elif on_conflict == OnConflict.REMOTE_FIRST:
+            return True
+        elif on_conflict == OnConflict.NO_DELETE:
+            return False
+
+        # This should never be reached
+        raise ValueError(f"{on_conflict!r} is not a valid conflict policy")
+
+    def _tmp_path(self, base: Path, suffix_length: int) -> Path:
+        prefix = "" if base.name.startswith(".") else "."
+        suffix = random.choices(SUFFIX_CHARS, k=suffix_length)
+        name = f"{prefix}{base.name}.tmp.{suffix}"
+        return base.parent / name
+
+    @asynccontextmanager
+    async def _sink_context_manager(
+            self,
+            file: BinaryIO,
+            info: DownloadInfo,
+    ) -> AsyncIterator[FileSink]:
+        sink = FileSink(file)
+        try:
+            with file:
+                yield sink
+        finally:
+            info.success = sink.is_done()
+            await self._after_download(info)
+
+    async def download(
+            self,
+            path: PurePath,
+            mtime: Optional[datetime] = None,
+            redownload: Optional[Redownload] = None,
+            on_conflict: Optional[OnConflict] = None,
+    ) -> Optional[AsyncContextManager[FileSink]]:
+        """
+        May throw an OutputDirException.
+        """
+
+        heuristics = Heuristics(mtime)
+        redownload = self._redownload if redownload is None else redownload
+        on_conflict = self._on_conflict if on_conflict is None else on_conflict
+        local_path = self._resolve(path)
+
+        self._mark(path)
+
+        if not self._should_download(local_path, heuristics, redownload):
+            return None
+
+        # Detect and solve local-dir-remote-file conflict
+        if local_path.is_dir():
+            if await self._conflict_ldrf(on_conflict, path):
+                shutil.rmtree(local_path)
+            else:
+                return None
+
+        # Detect and solve local-file-remote-dir conflict
+        for parent in path.parents:
+            local_parent = self._resolve(parent)
+            if local_parent.exists() and not local_parent.is_dir():
+                if await self._conflict_lfrd(on_conflict, path, parent):
+                    local_parent.unlink()
+                    break
+                else:
+                    return None
+
+        # Ensure parent directory exists
+        local_path.parent.mkdir(parents=True, exist_ok=True)
+
+        # Create tmp file
+        for attempt in range(TRIES):
+            suffix_length = SUFFIX_LENGTH + 2 * attempt
+            tmp_path = self._tmp_path(local_path, suffix_length)
+            info = DownloadInfo(path, local_path, tmp_path,
+                                heuristics, on_conflict)
+            try:
+                file = open(tmp_path, "bx")
+                return self._sink_context_manager(file, info)
+            except FileExistsError:
+                pass  # Try again
+
+        return None
+
+    async def _after_download(self, info: DownloadInfo) -> None:
+        changed = False
+
+        if not info.success:
+            info.tmp_path.unlink()
+            return
+
+        # Solve conflicts arising from existing local file
+        if info.local_path.exists():
+            changed = True
+            if filecmp.cmp(info.local_path, info.tmp_path):
+                info.tmp_path.unlink()
+                return
+
+            if not await self._conflict_lfrf(info.on_conflict, info.path):
+                info.tmp_path.unlink()
+                return
+
+        # Modify metadata if necessary
+        if mtime := info.heuristics.mtime:
+            # TODO Pick an implementation
+            # Rounding up to avoid inaccuracies in how the OS stores timestamps
+            # mtimestamp = math.ceil(mtime.timestamp())
+            mtimestamp = mtime.timestamp()
+            os.utime(info.tmp_path, times=(mtimestamp, mtimestamp))
+
+        info.tmp_path.replace(info.local_path)
+
+        if changed:
+            self._report.change_file(info.path)
+        else:
+            self._report.add_file(info.path)
+
+    def cleanup(self) -> None:
+        self._cleanup_dir(self._root, PurePath())
+
+    def _cleanup(self, path: Path, pure: PurePath) -> None:
+        if path.is_dir():
+            self._cleanup_dir(path, pure)
+        elif path.is_file():
+            self._cleanup_file(path, pure)
+
+    def _cleanup_dir(self, path: Path, pure: PurePath) -> None:
+        for child in path.iterdir():
+            pure_child = pure / child.name
+            self._cleanup(child, pure_child)
+
+        try:
+            path.rmdir()
+        except OSError:
+            pass
+
+    def _cleanup_file(self, path: Path, pure: PurePath) -> None:
+        if self._report.marked(pure):
+            return
+
+        if self._conflict_delete_lf(self._on_conflict, pure):
+            try:
+                path.unlink()
+                self._report.delete_file(pure)
+            except OSError:
+                pass
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index 131ddc1..54356c1 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -25,11 +25,8 @@ class Pferd:
             crawler_constructor = CRAWLERS.get(crawler_type)
             if crawler_constructor is None:
                 abort = True
-                if crawler_type is None:
-                    print("[red]Error: No type")
-                else:
-                    t = escape(repr(crawler_type))
-                    print(f"[red]Error: Unknown type {t}")
+                t = escape(repr(crawler_type))
+                print(f"[red]Error: Unknown type {t}")
                 continue
 
             crawler = crawler_constructor(name, self._config, section)
diff --git a/PFERD/report.py b/PFERD/report.py
index 382b82d..38e8130 100644
--- a/PFERD/report.py
+++ b/PFERD/report.py
@@ -34,14 +34,14 @@ class Report:
     well as the set of changes made to local files.
     """
 
-    def __init__(self):
+    def __init__(self) -> None:
         self.known_files: Set[PurePath] = set()
 
         self.new_files: Set[PurePath] = set()
         self.changed_files: Set[PurePath] = set()
         self.deleted_files: Set[PurePath] = set()
 
-    def mark(self, path: PurePath):
+    def mark(self, path: PurePath) -> None:
         """
         Mark a previously unknown file as known.
 
@@ -58,21 +58,24 @@ class Report:
 
         self.known_files.add(path)
 
-    def add_file(self, path: PurePath):
+    def marked(self, path: PurePath) -> bool:
+        return path in self.known_files
+
+    def add_file(self, path: PurePath) -> None:
         """
         Unlike mark(), this function accepts any paths.
         """
 
         self.new_files.add(path)
 
-    def change_file(self, path: PurePath):
+    def change_file(self, path: PurePath) -> None:
         """
         Unlike mark(), this function accepts any paths.
         """
 
         self.changed_files.add(path)
 
-    def delete_file(self, path: PurePath):
+    def delete_file(self, path: PurePath) -> None:
         """
         Unlike mark(), this function accepts any paths.
         """

From 5497dd28275764d7af5dbac6c94452f1b71c8bab Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 5 May 2021 23:36:54 +0200
Subject: [PATCH 087/524] Add @noncritical and @repeat decorators

---
 PFERD/crawler.py | 77 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 76 insertions(+), 1 deletion(-)

diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index 4ee4fad..4cb48a9 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -3,7 +3,8 @@ from abc import ABC, abstractmethod
 from contextlib import asynccontextmanager
 from pathlib import PurePath
 # TODO In Python 3.9 and above, AsyncContextManager is deprecated
-from typing import AsyncContextManager, AsyncIterator, Optional
+from typing import (Any, AsyncContextManager, AsyncIterator, Awaitable,
+                    Callable, Optional, Protocol, TypeVar)
 
 from rich.markup import escape
 
@@ -18,6 +19,78 @@ class CrawlerLoadException(Exception):
     pass
 
 
+class CrawlerMemberFunction(Protocol):
+    def __call__(
+            self,
+            __self: "Crawler",
+            *__args: Any,
+            **__kwargs: Any,
+    ) -> None:
+        pass
+
+
+Wrapped = TypeVar("Wrapped", bound=CrawlerMemberFunction)
+
+
+def noncritical(f: Wrapped) -> Wrapped:
+    def wrapper(self: "Crawler", *args: Any, **kwargs: Any) -> None:
+        try:
+            f(self, *args, **kwargs)
+        except Exception as e:
+            self.print(f"[red]Something went wrong: {escape(str(e))}")
+            self._error_free = False
+    return wrapper  # type: ignore
+
+
+def repeat(attempts: int) -> Callable[[Wrapped], Wrapped]:
+    def decorator(f: Wrapped) -> Wrapped:
+        def wrapper(self: "Crawler", *args: Any, **kwargs: Any) -> None:
+            for _ in range(attempts - 1):
+                try:
+                    f(self, *args, **kwargs)
+                    return
+                except Exception:
+                    pass
+            f(self, *args, **kwargs)
+        return wrapper  # type: ignore
+    return decorator
+
+
+class ACrawlerMemberFunction(Protocol):
+    def __call__(
+            self,
+            __self: "Crawler",
+            *__args: Any,
+            **__kwargs: Any,
+    ) -> Awaitable[None]:
+        pass
+
+
+AWrapped = TypeVar("AWrapped", bound=ACrawlerMemberFunction)
+
+
+def anoncritical(f: AWrapped) -> AWrapped:
+    async def wrapper(self: "Crawler", *args: Any, **kwargs: Any) -> None:
+        try:
+            await f(self, *args, **kwargs)
+        except Exception as e:
+            self.print(f"[red]Something went wrong: {escape(str(e))}")
+            self._error_free = False
+    return wrapper  # type: ignore
+
+
+def arepeat(attempts: int) -> Callable[[AWrapped], AWrapped]:
+    def decorator(f: AWrapped) -> AWrapped:
+        async def wrapper(self: "Crawler", *args: Any, **kwargs: Any) -> None:
+            for _ in range(attempts - 1):
+                try:
+                    await f(self, *args, **kwargs)
+                    return
+                except Exception:
+                    pass
+            await f(self, *args, **kwargs)
+        return wrapper  # type: ignore
+    return decorator
 class Crawler(ABC):
     def __init__(
             self,
@@ -51,6 +124,8 @@ class Crawler(ABC):
         self._output_dir = OutputDirectory(
             output_dir, redownload, on_conflict, self._conductor)
 
+        self._error_free = False
+
     def print(self, text: str) -> None:
         """
         Print rich markup to the terminal. Crawlers *must* use this function to

From 273d56c39a8440aca743188ddb56e7c50a4f109d Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 5 May 2021 23:45:10 +0200
Subject: [PATCH 088/524] Properly load crawler config

---
 PFERD/config.py            | 38 ++++++++++++++++++++++------
 PFERD/crawler.py           | 52 +++++++++++++++++++++++++++++++-------
 PFERD/crawlers/__init__.py |  9 +++++--
 3 files changed, 80 insertions(+), 19 deletions(-)

diff --git a/PFERD/config.py b/PFERD/config.py
index d02900d..f2abe8d 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -1,7 +1,8 @@
-import configparser
 import os
+from configparser import ConfigParser, SectionProxy
+from dataclasses import dataclass
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, List, NoReturn, Optional, Tuple
 
 from .utils import prompt_yes_no
 
@@ -14,6 +15,27 @@ class ConfigDumpException(Exception):
     pass
 
 
+@dataclass
+class ConfigFormatException(Exception):
+    section: str
+    key: str
+    desc: str
+
+
+class Section:
+    def __init__(self, section: SectionProxy):
+        self.s = section
+
+    def error(self, key: str, desc: str) -> NoReturn:
+        raise ConfigFormatException(self.s.name, key, desc)
+
+    def invalid_value(self, key: str, value: Any) -> NoReturn:
+        self.error(key, f"Invalid value: {value!r}")
+
+    def missing_value(self, key: str) -> NoReturn:
+        self.error(key, "Missing value")
+
+
 class Config:
     @staticmethod
     def _default_path() -> Path:
@@ -24,7 +46,7 @@ class Config:
         else:
             return Path("~/.pferd.cfg").expanduser()
 
-    def __init__(self, parser: configparser.ConfigParser):
+    def __init__(self, parser: ConfigParser):
         self._parser = parser
 
     @staticmethod
@@ -34,7 +56,7 @@ class Config:
         raise ConfigLoadException()
 
     @staticmethod
-    def load_parser(path: Optional[Path] = None) -> configparser.ConfigParser:
+    def load_parser(path: Optional[Path] = None) -> ConfigParser:
         """
         May throw a ConfigLoadException.
         """
@@ -42,7 +64,7 @@ class Config:
         if not path:
             path = Config._default_path()
 
-        parser = configparser.ConfigParser()
+        parser = ConfigParser()
 
         # Using config.read_file instead of config.read because config.read
         # would just ignore a missing file and carry on.
@@ -100,10 +122,10 @@ class Config:
             self._fail_dump(path, "Insufficient permissions")
 
     @property
-    def default_section(self) -> configparser.SectionProxy:
-        return self._parser[configparser.DEFAULTSECT]
+    def default_section(self) -> SectionProxy:
+        return self._parser[self._parser.default_section]
 
-    def crawler_sections(self) -> List[Tuple[str, configparser.SectionProxy]]:
+    def crawler_sections(self) -> List[Tuple[str, SectionProxy]]:
         result = []
         for section_name, section_proxy in self._parser.items():
             if section_name.startswith("crawler:"):
diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index 4cb48a9..ff779ab 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -1,7 +1,6 @@
-import configparser
 from abc import ABC, abstractmethod
 from contextlib import asynccontextmanager
-from pathlib import PurePath
+from pathlib import Path, PurePath
 # TODO In Python 3.9 and above, AsyncContextManager is deprecated
 from typing import (Any, AsyncContextManager, AsyncIterator, Awaitable,
                     Callable, Optional, Protocol, TypeVar)
@@ -9,7 +8,7 @@ from typing import (Any, AsyncContextManager, AsyncIterator, Awaitable,
 from rich.markup import escape
 
 from .conductor import ProgressBar, TerminalConductor
-from .config import Config
+from .config import Config, Section
 from .limiter import Limiter
 from .output_dir import OnConflict, OutputDirectory, Redownload
 from .transformer import RuleParseException, Transformer
@@ -91,12 +90,46 @@ def arepeat(attempts: int) -> Callable[[AWrapped], AWrapped]:
             await f(self, *args, **kwargs)
         return wrapper  # type: ignore
     return decorator
+
+
+class CrawlerSection(Section):
+    def output_dir(self, name: str) -> Path:
+        return Path(self.s.get("output_dir", name))
+
+    def redownload(self) -> Redownload:
+        value = self.s.get("redownload", "never-smart")
+        if value == "never":
+            return Redownload.NEVER
+        elif value == "never-smart":
+            return Redownload.NEVER_SMART
+        elif value == "always":
+            return Redownload.ALWAYS
+        elif value == "always-smart":
+            return Redownload.ALWAYS_SMART
+        self.invalid_value("redownload", value)
+
+    def on_conflict(self) -> OnConflict:
+        value = self.s.get("on_conflict", "prompt")
+        if value == "prompt":
+            return OnConflict.PROMPT
+        elif value == "local-first":
+            return OnConflict.LOCAL_FIRST
+        elif value == "remote-first":
+            return OnConflict.REMOTE_FIRST
+        elif value == "no-delete":
+            return OnConflict.NO_DELETE
+        self.invalid_value("on_conflict", value)
+
+    def transform(self) -> str:
+        return self.s.get("transform", "")
+
+
 class Crawler(ABC):
     def __init__(
             self,
             name: str,
             config: Config,
-            section: configparser.SectionProxy,
+            section: CrawlerSection,
     ) -> None:
         """
         Initialize a crawler from its name and its section in the config file.
@@ -113,16 +146,17 @@ class Crawler(ABC):
         self._limiter = Limiter()
 
         try:
-            self._transformer = Transformer(section.get("transform", ""))
+            self._transformer = Transformer(section.transform())
         except RuleParseException as e:
             e.pretty_print()
             raise CrawlerLoadException()
 
-        output_dir = config.working_dir / section.get("output_dir", name)
-        redownload = Redownload.NEVER_SMART
-        on_conflict = OnConflict.PROMPT
         self._output_dir = OutputDirectory(
-            output_dir, redownload, on_conflict, self._conductor)
+            config.working_dir / section.output_dir(name),
+            section.redownload(),
+            section.on_conflict(),
+            self._conductor,
+        )
 
         self._error_free = False
 
diff --git a/PFERD/crawlers/__init__.py b/PFERD/crawlers/__init__.py
index 5248a2d..69dac39 100644
--- a/PFERD/crawlers/__init__.py
+++ b/PFERD/crawlers/__init__.py
@@ -1,5 +1,10 @@
+from configparser import SectionProxy
+from typing import Callable, Dict
+
+from ..config import Config
+from ..crawler import Crawler, CrawlerSection
 from .dummy import DummyCrawler
 
-CRAWLERS = {
-    "dummy": DummyCrawler,
+CRAWLERS: Dict[str, Callable[[str, Config, SectionProxy], Crawler]] = {
+    "dummy": lambda n, c, s: DummyCrawler(n, c, CrawlerSection(s)),
 }

From 60cd9873bcb9f116827eff6b7bc1c444fb0b786d Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 6 May 2021 01:02:40 +0200
Subject: [PATCH 089/524] Add local file crawler

---
 PFERD/conductor.py         |  3 ++
 PFERD/crawler.py           | 34 ++++++++++++++------
 PFERD/crawlers/__init__.py |  2 ++
 PFERD/crawlers/local.py    | 63 ++++++++++++++++++++++++++++++++++++++
 PFERD/output_dir.py        |  2 +-
 PFERD/pferd.py             |  2 +-
 PFERD/report.py            | 11 ++++++-
 7 files changed, 104 insertions(+), 13 deletions(-)
 create mode 100644 PFERD/crawlers/local.py

diff --git a/PFERD/conductor.py b/PFERD/conductor.py
index 161a287..76d0e2a 100644
--- a/PFERD/conductor.py
+++ b/PFERD/conductor.py
@@ -14,6 +14,9 @@ class ProgressBar:
     def advance(self, amount: float = 1) -> None:
         self._progress.advance(self._taskid, advance=amount)
 
+    def set_total(self, total) -> None:
+        self._progress.update(self._taskid, total=total)
+
 
 class TerminalConductor:
     def __init__(self) -> None:
diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index ff779ab..d088b21 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -1,16 +1,17 @@
 from abc import ABC, abstractmethod
 from contextlib import asynccontextmanager
+from datetime import datetime
 from pathlib import Path, PurePath
 # TODO In Python 3.9 and above, AsyncContextManager is deprecated
-from typing import (Any, AsyncContextManager, AsyncIterator, Awaitable,
-                    Callable, Optional, Protocol, TypeVar)
+from typing import (Any, AsyncContextManager, AsyncIterator, Callable,
+                    Coroutine, Optional, Protocol, TypeVar)
 
 from rich.markup import escape
 
 from .conductor import ProgressBar, TerminalConductor
 from .config import Config, Section
 from .limiter import Limiter
-from .output_dir import OnConflict, OutputDirectory, Redownload
+from .output_dir import FileSink, OnConflict, OutputDirectory, Redownload
 from .transformer import RuleParseException, Transformer
 
 
@@ -37,7 +38,7 @@ def noncritical(f: Wrapped) -> Wrapped:
             f(self, *args, **kwargs)
         except Exception as e:
             self.print(f"[red]Something went wrong: {escape(str(e))}")
-            self._error_free = False
+            self.error_free = False
     return wrapper  # type: ignore
 
 
@@ -61,7 +62,7 @@ class ACrawlerMemberFunction(Protocol):
             __self: "Crawler",
             *__args: Any,
             **__kwargs: Any,
-    ) -> Awaitable[None]:
+    ) -> Coroutine[Any, Any, None]:
         pass
 
 
@@ -74,7 +75,7 @@ def anoncritical(f: AWrapped) -> AWrapped:
             await f(self, *args, **kwargs)
         except Exception as e:
             self.print(f"[red]Something went wrong: {escape(str(e))}")
-            self._error_free = False
+            self.error_free = False
     return wrapper  # type: ignore
 
 
@@ -94,7 +95,7 @@ def arepeat(attempts: int) -> Callable[[AWrapped], AWrapped]:
 
 class CrawlerSection(Section):
     def output_dir(self, name: str) -> Path:
-        return Path(self.s.get("output_dir", name))
+        return Path(self.s.get("output_dir", name)).expanduser()
 
     def redownload(self) -> Redownload:
         value = self.s.get("redownload", "never-smart")
@@ -158,7 +159,7 @@ class Crawler(ABC):
             self._conductor,
         )
 
-        self._error_free = False
+        self.error_free = False
 
     def print(self, text: str) -> None:
         """
@@ -203,11 +204,24 @@ class Crawler(ABC):
     def download_bar(
             self,
             path: PurePath,
-            size: int,
+            total: Optional[int] = None,
     ) -> AsyncContextManager[ProgressBar]:
         pathstr = escape(str(path))
         desc = f"[bold green]Downloading[/bold green] {pathstr}"
-        return self.progress_bar(desc, total=size)
+        return self.progress_bar(desc, total=total)
+
+    async def download(
+            self,
+            path: PurePath,
+            mtime: Optional[datetime] = None,
+            redownload: Optional[Redownload] = None,
+            on_conflict: Optional[OnConflict] = None,
+    ) -> Optional[AsyncContextManager[FileSink]]:
+        return await self._output_dir.download(
+            path, mtime, redownload, on_conflict)
+
+    async def cleanup(self) -> None:
+        await self._output_dir.cleanup()
 
     async def run(self) -> None:
         """
diff --git a/PFERD/crawlers/__init__.py b/PFERD/crawlers/__init__.py
index 69dac39..15ef403 100644
--- a/PFERD/crawlers/__init__.py
+++ b/PFERD/crawlers/__init__.py
@@ -4,7 +4,9 @@ from typing import Callable, Dict
 from ..config import Config
 from ..crawler import Crawler, CrawlerSection
 from .dummy import DummyCrawler
+from .local import LocalCrawler, LocalCrawlerSection
 
 CRAWLERS: Dict[str, Callable[[str, Config, SectionProxy], Crawler]] = {
     "dummy": lambda n, c, s: DummyCrawler(n, c, CrawlerSection(s)),
+    "local": lambda n, c, s: LocalCrawler(n, c, LocalCrawlerSection(s)),
 }
diff --git a/PFERD/crawlers/local.py b/PFERD/crawlers/local.py
new file mode 100644
index 0000000..77ebf81
--- /dev/null
+++ b/PFERD/crawlers/local.py
@@ -0,0 +1,63 @@
+import asyncio
+from pathlib import Path, PurePath
+
+from ..config import Config
+from ..crawler import Crawler, CrawlerSection, anoncritical
+
+
+class LocalCrawlerSection(CrawlerSection):
+    def path(self) -> Path:
+        value = self.s.get("path")
+        if value is None:
+            self.missing_value("path")
+        return Path(value).expanduser()
+
+
+class LocalCrawler(Crawler):
+    def __init__(
+            self,
+            name: str,
+            config: Config,
+            section: LocalCrawlerSection,
+    ):
+        super().__init__(name, config, section)
+
+        self._path = section.path()
+
+    async def crawl(self) -> None:
+        await self._crawl_path(self._path, PurePath())
+        if self.error_free:
+            self.cleanup()
+
+    @anoncritical
+    async def _crawl_path(self, path: Path, pure: PurePath) -> None:
+        if path.is_dir():
+            await self._crawl_dir(path, pure)
+        elif path.is_file():
+            await self._crawl_file(path, pure)
+
+    async def _crawl_dir(self, path: Path, pure: PurePath) -> None:
+        tasks = []
+        async with self.crawl_bar(pure):
+            for child in path.iterdir():
+                pure_child = pure / child.name
+                tasks.append(self._crawl_path(child, pure_child))
+        await asyncio.gather(*tasks)
+
+    async def _crawl_file(self, path: Path, pure: PurePath) -> None:
+        async with self.download_bar(path) as bar:
+            bar.set_total(path.stat().st_size)
+
+            dl = await self.download(pure)
+            if not dl:
+                return
+
+            async with dl as sink:
+                with open(path, "rb") as f:
+                    while True:
+                        data = f.read(1024**2)
+                        if len(data) == 0:
+                            break
+                        sink.file.write(data)
+                        bar.advance(len(data))
+                    sink.done()
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 9276069..c875574 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -294,7 +294,7 @@ class OutputDirectory:
             info = DownloadInfo(path, local_path, tmp_path,
                                 heuristics, on_conflict)
             try:
-                file = open(tmp_path, "bx")
+                file = open(tmp_path, "xb")
                 return self._sink_context_manager(file, info)
             except FileExistsError:
                 pass  # Try again
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index 54356c1..7cdbfa0 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -26,7 +26,7 @@ class Pferd:
             if crawler_constructor is None:
                 abort = True
                 t = escape(repr(crawler_type))
-                print(f"[red]Error: Unknown type {t}")
+                print(f"[red]Error: Unknown crawler type {t}")
                 continue
 
             crawler = crawler_constructor(name, self._config, section)
diff --git a/PFERD/report.py b/PFERD/report.py
index 38e8130..b98c90c 100644
--- a/PFERD/report.py
+++ b/PFERD/report.py
@@ -28,6 +28,15 @@ class MarkConflictException(Exception):
     collides_with: PurePath
 
 
+# TODO Use PurePath.is_relative_to when updating to 3.9
+def is_relative_to(a: PurePath, b: PurePath) -> bool:
+    try:
+        a.relative_to(b)
+        return True
+    except ValueError:
+        return False
+
+
 class Report:
     """
     A report of a synchronization. Includes all files found by the crawler, as
@@ -53,7 +62,7 @@ class Report:
             if path == known_path:
                 raise MarkDuplicateException(path)
 
-            if path.relative_to(known_path) or known_path.relative_to(path):
+            if is_relative_to(path, known_path) or is_relative_to(known_path, path):
                 raise MarkConflictException(path, known_path)
 
         self.known_files.add(path)

From f9b2fd60e2d43d300097704a4933721cbc2c2115 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 9 May 2021 01:33:47 +0200
Subject: [PATCH 090/524] Document local crawler and auth

---
 CONFIG.md               | 24 +++++++++++++++++++++++-
 PFERD/crawlers/local.py |  2 +-
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index 65daae9..16c8531 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -65,6 +65,23 @@ crawlers:
 - `transform`: Rules for renaming and excluding certain files and directories.
   For more details, see [this section](#transformation-rules). (Default: empty)
 
+Some crawlers may also require credentials for authentication. To configure how
+the crawler obtains its credentials, the `auth` option is used. It is set to the
+full name of an auth section (including the `auth:` prefix).
+
+Here is a simple example:
+
+```
+[auth:example]
+type = simple
+username = foo
+password = bar
+
+[crawl:something]
+type = some-complex-crawler
+auth = auth:example
+```
+
 ## The `auth:*` sections
 
 Sections whose names start with `auth:` are used to configure authenticators. An
@@ -82,7 +99,12 @@ authenticators is `type`:
 
 ## Crawler types
 
-TODO Fill in as crawlers are implemented
+### The `local` crawler
+
+This crawler crawls a local directory. It is really simple and mostly useful for
+testing different setups.
+
+- `path`: Path to the local directory to crawl. (No default, must be specified)
 
 ## Authenticator types
 
diff --git a/PFERD/crawlers/local.py b/PFERD/crawlers/local.py
index 77ebf81..40cc233 100644
--- a/PFERD/crawlers/local.py
+++ b/PFERD/crawlers/local.py
@@ -22,7 +22,7 @@ class LocalCrawler(Crawler):
     ):
         super().__init__(name, config, section)
 
-        self._path = section.path()
+        self._path = config.working_dir / section.path()
 
     async def crawl(self) -> None:
         await self._crawl_path(self._path, PurePath())

From cec0a8e1fc2611583c2ee11260686c9a67587561 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 9 May 2021 01:45:01 +0200
Subject: [PATCH 091/524] Fix mymy errors

---
 PFERD/crawler.py | 62 +++++++++++++++++++++++++++++-------------------
 1 file changed, 37 insertions(+), 25 deletions(-)

diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index d088b21..b8e9d7c 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -3,8 +3,8 @@ from contextlib import asynccontextmanager
 from datetime import datetime
 from pathlib import Path, PurePath
 # TODO In Python 3.9 and above, AsyncContextManager is deprecated
-from typing import (Any, AsyncContextManager, AsyncIterator, Callable,
-                    Coroutine, Optional, Protocol, TypeVar)
+from typing import (Any, AsyncContextManager, AsyncIterator, Awaitable,
+                    Callable, Optional, Protocol, TypeVar)
 
 from rich.markup import escape
 
@@ -19,20 +19,17 @@ class CrawlerLoadException(Exception):
     pass
 
 
-class CrawlerMemberFunction(Protocol):
-    def __call__(
-            self,
-            __self: "Crawler",
-            *__args: Any,
-            **__kwargs: Any,
-    ) -> None:
-        pass
-
-
-Wrapped = TypeVar("Wrapped", bound=CrawlerMemberFunction)
+Wrapped = TypeVar("Wrapped", bound=Callable[..., None])
 
 
 def noncritical(f: Wrapped) -> Wrapped:
+    """
+    Warning: Must only be applied to member functions of the Crawler class!
+
+    Catches all exceptions occuring during the function call. If an exception
+    occurs, the crawler's error_free variable is set to False.
+    """
+
     def wrapper(self: "Crawler", *args: Any, **kwargs: Any) -> None:
         try:
             f(self, *args, **kwargs)
@@ -43,6 +40,14 @@ def noncritical(f: Wrapped) -> Wrapped:
 
 
 def repeat(attempts: int) -> Callable[[Wrapped], Wrapped]:
+    """
+    Warning: Must only be applied to member functions of the Crawler class!
+
+    If an exception occurs during the function call, retries the function call
+    a set amount of times. Exceptions that occur during the last attempt are
+    not caught and instead passed on upwards.
+    """
+
     def decorator(f: Wrapped) -> Wrapped:
         def wrapper(self: "Crawler", *args: Any, **kwargs: Any) -> None:
             for _ in range(attempts - 1):
@@ -56,20 +61,18 @@ def repeat(attempts: int) -> Callable[[Wrapped], Wrapped]:
     return decorator
 
 
-class ACrawlerMemberFunction(Protocol):
-    def __call__(
-            self,
-            __self: "Crawler",
-            *__args: Any,
-            **__kwargs: Any,
-    ) -> Coroutine[Any, Any, None]:
-        pass
-
-
-AWrapped = TypeVar("AWrapped", bound=ACrawlerMemberFunction)
+AWrapped = TypeVar("AWrapped", bound=Callable[..., Awaitable[None]])
 
 
 def anoncritical(f: AWrapped) -> AWrapped:
+    """
+    An async version of @noncritical.
+    Warning: Must only be applied to member functions of the Crawler class!
+
+    Catches all exceptions occuring during the function call. If an exception
+    occurs, the crawler's error_free variable is set to False.
+    """
+
     async def wrapper(self: "Crawler", *args: Any, **kwargs: Any) -> None:
         try:
             await f(self, *args, **kwargs)
@@ -80,6 +83,15 @@ def anoncritical(f: AWrapped) -> AWrapped:
 
 
 def arepeat(attempts: int) -> Callable[[AWrapped], AWrapped]:
+    """
+    An async version of @noncritical.
+    Warning: Must only be applied to member functions of the Crawler class!
+
+    If an exception occurs during the function call, retries the function call
+    a set amount of times. Exceptions that occur during the last attempt are
+    not caught and instead passed on upwards.
+    """
+
     def decorator(f: AWrapped) -> AWrapped:
         async def wrapper(self: "Crawler", *args: Any, **kwargs: Any) -> None:
             for _ in range(attempts - 1):
@@ -221,7 +233,7 @@ class Crawler(ABC):
             path, mtime, redownload, on_conflict)
 
     async def cleanup(self) -> None:
-        await self._output_dir.cleanup()
+        self._output_dir.cleanup()
 
     async def run(self) -> None:
         """

From 595ba8b7ab601c90b930f36c4c63a194deac8fb8 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Mon, 10 May 2021 23:47:46 +0200
Subject: [PATCH 092/524] Remove dummy crawler

---
 PFERD/crawlers/__init__.py |  2 --
 PFERD/crawlers/dummy.py    | 59 --------------------------------------
 2 files changed, 61 deletions(-)
 delete mode 100644 PFERD/crawlers/dummy.py

diff --git a/PFERD/crawlers/__init__.py b/PFERD/crawlers/__init__.py
index 15ef403..bf88a2a 100644
--- a/PFERD/crawlers/__init__.py
+++ b/PFERD/crawlers/__init__.py
@@ -3,10 +3,8 @@ from typing import Callable, Dict
 
 from ..config import Config
 from ..crawler import Crawler, CrawlerSection
-from .dummy import DummyCrawler
 from .local import LocalCrawler, LocalCrawlerSection
 
 CRAWLERS: Dict[str, Callable[[str, Config, SectionProxy], Crawler]] = {
-    "dummy": lambda n, c, s: DummyCrawler(n, c, CrawlerSection(s)),
     "local": lambda n, c, s: LocalCrawler(n, c, LocalCrawlerSection(s)),
 }
diff --git a/PFERD/crawlers/dummy.py b/PFERD/crawlers/dummy.py
deleted file mode 100644
index 204b4b1..0000000
--- a/PFERD/crawlers/dummy.py
+++ /dev/null
@@ -1,59 +0,0 @@
-import asyncio
-import random
-from pathlib import PurePath
-from typing import Any
-
-from rich.markup import escape
-
-from ..crawler import Crawler
-from ..utils import ainput
-
-DUMMY_TREE = {
-    "Blätter": {
-        "Blatt_01.pdf": (),
-        "Blatt_02.pdf": (),
-        "Blatt_03.pdf": (),
-        "Blatt_04.pdf": (),
-        "Blatt_05.pdf": (),
-        "Lösungen": {
-            "Blatt_01_Lösung.pdf": (),
-            "Blatt_02_Lösung.pdf": (),
-            "Blatt_03_Lösung.pdf": True,
-            "Blatt_04_Lösung.pdf": (),
-            "Blatt_05_Lösung.pdf": (),
-        },
-    },
-    "Vorlesungsfolien": {
-        "VL_01.pdf": (),
-        "VL_02.pdf": (),
-        "VL_03.pdf": (),
-        "VL_04.pdf": (),
-        "VL_05.pdf": (),
-    },
-    "noch_mehr.txt": (),
-    "dateien.jar": (),
-}
-
-
-class DummyCrawler(Crawler):
-    async def crawl(self) -> None:
-        await self._crawl_entry(PurePath(), DUMMY_TREE)
-
-    async def _crawl_entry(self, path: PurePath, value: Any) -> None:
-        if value is True:
-            async with self.exclusive_output():
-                await ainput(f"File {path}, please press enter: ")
-        if value == () or value is True:
-            n = random.randint(5, 20)
-            async with self.download_bar(path, n) as bar:
-                await asyncio.sleep(random.random() / 2)
-                for i in range(n):
-                    await asyncio.sleep(0.5)
-                    bar.advance()
-            self.print(f"[green]Downloaded {escape(str(path))}")
-        else:
-            t = random.random() * 2 + 1
-            async with self.crawl_bar(path) as bar:
-                await asyncio.sleep(t)
-            tasks = [self._crawl_entry(path / k, v) for k, v in value.items()]
-            await asyncio.gather(*tasks)

From d5f29f01c59c5992e715eda49254b10c964771fc Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Mon, 10 May 2021 23:50:16 +0200
Subject: [PATCH 093/524] Use global conductor instance

The switch from crawler-local conductors to a single pferd-global conductor was
made to prepare for auth section credential providers.
---
 PFERD/conductor.py         |  2 +-
 PFERD/config.py            |  4 ++++
 PFERD/crawler.py           | 11 +++++------
 PFERD/crawlers/__init__.py | 15 ++++++++++++---
 PFERD/crawlers/local.py    |  6 ++++--
 PFERD/pferd.py             |  9 ++++++++-
 6 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/PFERD/conductor.py b/PFERD/conductor.py
index 76d0e2a..4648e77 100644
--- a/PFERD/conductor.py
+++ b/PFERD/conductor.py
@@ -14,7 +14,7 @@ class ProgressBar:
     def advance(self, amount: float = 1) -> None:
         self._progress.advance(self._taskid, advance=amount)
 
-    def set_total(self, total) -> None:
+    def set_total(self, total: float) -> None:
         self._progress.update(self._taskid, total=total)
 
 
diff --git a/PFERD/config.py b/PFERD/config.py
index f2abe8d..f63922b 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -23,6 +23,10 @@ class ConfigFormatException(Exception):
 
 
 class Section:
+    """
+    Base class for the crawler and auth section classes.
+    """
+
     def __init__(self, section: SectionProxy):
         self.s = section
 
diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index b8e9d7c..4bcfe65 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -4,7 +4,7 @@ from datetime import datetime
 from pathlib import Path, PurePath
 # TODO In Python 3.9 and above, AsyncContextManager is deprecated
 from typing import (Any, AsyncContextManager, AsyncIterator, Awaitable,
-                    Callable, Optional, Protocol, TypeVar)
+                    Callable, Optional, TypeVar)
 
 from rich.markup import escape
 
@@ -141,8 +141,9 @@ class Crawler(ABC):
     def __init__(
             self,
             name: str,
-            config: Config,
             section: CrawlerSection,
+            config: Config,
+            conductor: TerminalConductor,
     ) -> None:
         """
         Initialize a crawler from its name and its section in the config file.
@@ -154,9 +155,9 @@ class Crawler(ABC):
         """
 
         self.name = name
-
-        self._conductor = TerminalConductor()
+        self._conductor = conductor
         self._limiter = Limiter()
+        self.error_free = True
 
         try:
             self._transformer = Transformer(section.transform())
@@ -171,8 +172,6 @@ class Crawler(ABC):
             self._conductor,
         )
 
-        self.error_free = False
-
     def print(self, text: str) -> None:
         """
         Print rich markup to the terminal. Crawlers *must* use this function to
diff --git a/PFERD/crawlers/__init__.py b/PFERD/crawlers/__init__.py
index bf88a2a..aa049b9 100644
--- a/PFERD/crawlers/__init__.py
+++ b/PFERD/crawlers/__init__.py
@@ -1,10 +1,19 @@
 from configparser import SectionProxy
 from typing import Callable, Dict
 
+from ..conductor import TerminalConductor
 from ..config import Config
-from ..crawler import Crawler, CrawlerSection
+from ..crawler import Crawler
 from .local import LocalCrawler, LocalCrawlerSection
 
-CRAWLERS: Dict[str, Callable[[str, Config, SectionProxy], Crawler]] = {
-    "local": lambda n, c, s: LocalCrawler(n, c, LocalCrawlerSection(s)),
+CrawlerConstructor = Callable[[
+    str,                # Name (without the "crawl:" prefix)
+    SectionProxy,       # Crawler's section of global config
+    Config,             # Global config
+    TerminalConductor,  # Global conductor instance
+], Crawler]
+
+CRAWLERS: Dict[str, CrawlerConstructor] = {
+    "local": lambda n, s, c, t:
+        LocalCrawler(n, LocalCrawlerSection(s), c, t),
 }
diff --git a/PFERD/crawlers/local.py b/PFERD/crawlers/local.py
index 40cc233..8501877 100644
--- a/PFERD/crawlers/local.py
+++ b/PFERD/crawlers/local.py
@@ -1,6 +1,7 @@
 import asyncio
 from pathlib import Path, PurePath
 
+from ..conductor import TerminalConductor
 from ..config import Config
 from ..crawler import Crawler, CrawlerSection, anoncritical
 
@@ -17,10 +18,11 @@ class LocalCrawler(Crawler):
     def __init__(
             self,
             name: str,
-            config: Config,
             section: LocalCrawlerSection,
+            config: Config,
+            conductor: TerminalConductor,
     ):
-        super().__init__(name, config, section)
+        super().__init__(name, section, config, conductor)
 
         self._path = config.working_dir / section.path()
 
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index 7cdbfa0..c7cd695 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -3,6 +3,7 @@ from typing import Dict
 from rich import print
 from rich.markup import escape
 
+from .conductor import TerminalConductor
 from .config import Config
 from .crawler import Crawler
 from .crawlers import CRAWLERS
@@ -15,6 +16,7 @@ class PferdLoadException(Exception):
 class Pferd:
     def __init__(self, config: Config):
         self._config = config
+        self._conductor = TerminalConductor()
         self._crawlers: Dict[str, Crawler] = {}
 
     def _load_crawlers(self) -> None:
@@ -29,7 +31,12 @@ class Pferd:
                 print(f"[red]Error: Unknown crawler type {t}")
                 continue
 
-            crawler = crawler_constructor(name, self._config, section)
+            crawler = crawler_constructor(
+                name,
+                section,
+                self._config,
+                self._conductor,
+            )
             self._crawlers[name] = crawler
 
         if abort:

From 0459ed093eac4927bbc570fadbcdf949726713de Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 11 May 2021 00:27:43 +0200
Subject: [PATCH 094/524] Add simple authenticator

... including some required authenticator infrastructure
---
 PFERD/authenticator.py           | 52 ++++++++++++++++++++++++++++++++
 PFERD/authenticators/__init__.py | 19 ++++++++++++
 PFERD/authenticators/simple.py   | 48 +++++++++++++++++++++++++++++
 PFERD/config.py                  |  9 ++++++
 PFERD/pferd.py                   | 27 +++++++++++++++++
 5 files changed, 155 insertions(+)
 create mode 100644 PFERD/authenticator.py
 create mode 100644 PFERD/authenticators/__init__.py
 create mode 100644 PFERD/authenticators/simple.py

diff --git a/PFERD/authenticator.py b/PFERD/authenticator.py
new file mode 100644
index 0000000..42d8bb9
--- /dev/null
+++ b/PFERD/authenticator.py
@@ -0,0 +1,52 @@
+from abc import ABC, abstractmethod
+from typing import Tuple
+
+from .conductor import TerminalConductor
+from .config import Config, Section
+
+
+class AuthLoadException(Exception):
+    pass
+
+
+class AuthException(Exception):
+    pass
+
+
+class AuthSection(Section):
+    pass
+
+
+class Authenticator(ABC):
+    def __init__(
+            self,
+            name: str,
+            section: AuthSection,
+            config: Config,
+            conductor: TerminalConductor,
+    ) -> None:
+        """
+        Initialize an authenticator from its name and its section in the config
+        file.
+
+        If you are writing your own constructor for your own authenticator,
+        make sure to call this constructor first (via super().__init__).
+
+        May throw an AuthLoadException.
+        """
+
+        self.name = name
+        self.conductor = conductor
+
+    @abstractmethod
+    async def credentials(self) -> Tuple[str, str]:
+        pass
+
+    def invalid_credentials(self) -> None:
+        raise AuthException("Invalid credentials")
+
+    def invalid_username(self) -> None:
+        raise AuthException("Invalid username")
+
+    def invalid_password(self) -> None:
+        raise AuthException("Invalid password")
diff --git a/PFERD/authenticators/__init__.py b/PFERD/authenticators/__init__.py
new file mode 100644
index 0000000..d021d40
--- /dev/null
+++ b/PFERD/authenticators/__init__.py
@@ -0,0 +1,19 @@
+from configparser import SectionProxy
+from typing import Callable, Dict
+
+from ..authenticator import Authenticator
+from ..conductor import TerminalConductor
+from ..config import Config
+from .simple import SimpleAuthenticator, SimpleAuthSection
+
+AuthConstructor = Callable[[
+    str,                # Name (without the "auth:" prefix)
+    SectionProxy,       # Authenticator's section of global config
+    Config,             # Global config
+    TerminalConductor,  # Global conductor instance
+], Authenticator]
+
+AUTHENTICATORS: Dict[str, AuthConstructor] = {
+    "simple": lambda n, s, c, t:
+        SimpleAuthenticator(n, SimpleAuthSection(s), c, t),
+}
diff --git a/PFERD/authenticators/simple.py b/PFERD/authenticators/simple.py
new file mode 100644
index 0000000..3a57faf
--- /dev/null
+++ b/PFERD/authenticators/simple.py
@@ -0,0 +1,48 @@
+from typing import Optional, Tuple
+
+from ..authenticator import Authenticator, AuthSection
+from ..conductor import TerminalConductor
+from ..config import Config
+from ..utils import agetpass, ainput
+
+
+class SimpleAuthSection(AuthSection):
+    def username(self) -> Optional[str]:
+        return self.s.get("username")
+
+    def password(self) -> Optional[str]:
+        return self.s.get("password")
+
+
+class SimpleAuthenticator(Authenticator):
+    def __init__(
+            self,
+            name: str,
+            section: SimpleAuthSection,
+            config: Config,
+            conductor: TerminalConductor,
+    ) -> None:
+        super().__init__(name, section, config, conductor)
+
+        self.username = section.username()
+        self.password = section.password()
+
+        self.username_fixed = self.username is not None
+        self.password_fixed = self.password is not None
+
+    async def credentials(self) -> Tuple[str, str]:
+        if self.username is not None and self.password is not None:
+            return self.username, self.password
+
+        async with self.conductor.exclusive_output():
+            if self.username is None:
+                self.username = await ainput("Username: ")
+            else:
+                print(f"Username: {self.username}")
+
+            if self.password is None:
+                self.password = await agetpass("Password: ")
+            else:
+                print("Password: *******")
+
+        return self.username, self.password
diff --git a/PFERD/config.py b/PFERD/config.py
index f63922b..56ea9af 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -138,6 +138,15 @@ class Config:
 
         return result
 
+    def authenticator_sections(self) -> List[Tuple[str, SectionProxy]]:
+        result = []
+        for section_name, section_proxy in self._parser.items():
+            if section_name.startswith("auth:"):
+                crawler_name = section_name[5:]
+                result.append((crawler_name, section_proxy))
+
+        return result
+
     @property
     def working_dir(self) -> Path:
         pathstr = self.default_section.get("working_dir", ".")
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index c7cd695..fb411fb 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -3,6 +3,8 @@ from typing import Dict
 from rich import print
 from rich.markup import escape
 
+from .authenticator import Authenticator
+from .authenticators import AUTHENTICATORS
 from .conductor import TerminalConductor
 from .config import Config
 from .crawler import Crawler
@@ -17,8 +19,32 @@ class Pferd:
     def __init__(self, config: Config):
         self._config = config
         self._conductor = TerminalConductor()
+        self._authenticators: Dict[str, Authenticator] = {}
         self._crawlers: Dict[str, Crawler] = {}
 
+    def _load_authenticators(self) -> None:
+        abort = False
+        for name, section in self._config.authenticator_sections():
+            print(f"[bold bright_cyan]Loading[/] auth:{escape(name)}")
+            authenticator_type = section.get("type")
+            authenticator_constructor = AUTHENTICATORS.get(authenticator_type)
+            if authenticator_constructor is None:
+                abort = True
+                t = escape(repr(authenticator_type))
+                print(f"[red]Error: Unknown authenticator type {t}")
+                continue
+
+            authenticator = authenticator_constructor(
+                name,
+                section,
+                self._config,
+                self._conductor,
+            )
+            self._authenticators[name] = authenticator
+
+        if abort:
+            raise PferdLoadException()
+
     def _load_crawlers(self) -> None:
         abort = False
         for name, section in self._config.crawler_sections():
@@ -44,6 +70,7 @@ class Pferd:
 
     async def run(self) -> None:
         try:
+            self._load_authenticators()
             self._load_crawlers()
         except PferdLoadException:
             print("[bold red]Could not initialize PFERD properly")

From c3ce6bb31ca4aa17b94a50c044628d99bd01270c Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 11 May 2021 00:28:45 +0200
Subject: [PATCH 095/524] Fix crawler cleanup not being awaited

---
 PFERD/crawlers/local.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/crawlers/local.py b/PFERD/crawlers/local.py
index 8501877..e80472e 100644
--- a/PFERD/crawlers/local.py
+++ b/PFERD/crawlers/local.py
@@ -29,7 +29,7 @@ class LocalCrawler(Crawler):
     async def crawl(self) -> None:
         await self._crawl_path(self._path, PurePath())
         if self.error_free:
-            self.cleanup()
+            await self.cleanup()
 
     @anoncritical
     async def _crawl_path(self, path: Path, pure: PurePath) -> None:

From 0acdee15a0987bef6f8de8105404bedf414bee72 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 13 May 2021 18:57:20 +0200
Subject: [PATCH 096/524] Let crawlers obtain authenticators

---
 PFERD/crawler.py           | 12 +++++++++++-
 PFERD/crawlers/__init__.py | 12 +++++++-----
 PFERD/pferd.py             |  1 +
 3 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index 4bcfe65..5148d9d 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -4,10 +4,11 @@ from datetime import datetime
 from pathlib import Path, PurePath
 # TODO In Python 3.9 and above, AsyncContextManager is deprecated
 from typing import (Any, AsyncContextManager, AsyncIterator, Awaitable,
-                    Callable, Optional, TypeVar)
+                    Callable, Dict, Optional, TypeVar)
 
 from rich.markup import escape
 
+from .authenticator import Authenticator
 from .conductor import ProgressBar, TerminalConductor
 from .config import Config, Section
 from .limiter import Limiter
@@ -136,6 +137,15 @@ class CrawlerSection(Section):
     def transform(self) -> str:
         return self.s.get("transform", "")
 
+    def auth(self, authenticators: Dict[str, Authenticator]) -> Authenticator:
+        value = self.s.get("auth")
+        if value is None:
+            self.missing_value("auth")
+        auth = authenticators.get(f"auth:{value}")
+        if auth is None:
+            self.invalid_value("auth", value)
+        return auth
+
 
 class Crawler(ABC):
     def __init__(
diff --git a/PFERD/crawlers/__init__.py b/PFERD/crawlers/__init__.py
index aa049b9..b2e5af5 100644
--- a/PFERD/crawlers/__init__.py
+++ b/PFERD/crawlers/__init__.py
@@ -1,19 +1,21 @@
 from configparser import SectionProxy
 from typing import Callable, Dict
 
+from ..authenticator import Authenticator
 from ..conductor import TerminalConductor
 from ..config import Config
 from ..crawler import Crawler
 from .local import LocalCrawler, LocalCrawlerSection
 
 CrawlerConstructor = Callable[[
-    str,                # Name (without the "crawl:" prefix)
-    SectionProxy,       # Crawler's section of global config
-    Config,             # Global config
-    TerminalConductor,  # Global conductor instance
+    str,                       # Name (without the "crawl:" prefix)
+    SectionProxy,              # Crawler's section of global config
+    Config,                    # Global config
+    TerminalConductor,         # Global conductor instance
+    Dict[str, Authenticator],  # Loaded authenticators by name
 ], Crawler]
 
 CRAWLERS: Dict[str, CrawlerConstructor] = {
-    "local": lambda n, s, c, t:
+    "local": lambda n, s, c, t, a:
         LocalCrawler(n, LocalCrawlerSection(s), c, t),
 }
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index fb411fb..4500ba9 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -62,6 +62,7 @@ class Pferd:
                 section,
                 self._config,
                 self._conductor,
+                self._authenticators,
             )
             self._crawlers[name] = crawler
 

From 6bd6adb9771514cdeb17786762854db77a03463b Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 13 May 2021 19:36:46 +0200
Subject: [PATCH 097/524] Fix tmp file names

---
 PFERD/output_dir.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index c875574..08c01a3 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -228,7 +228,7 @@ class OutputDirectory:
 
     def _tmp_path(self, base: Path, suffix_length: int) -> Path:
         prefix = "" if base.name.startswith(".") else "."
-        suffix = random.choices(SUFFIX_CHARS, k=suffix_length)
+        suffix = "".join(random.choices(SUFFIX_CHARS, k=suffix_length))
         name = f"{prefix}{base.name}.tmp.{suffix}"
         return base.parent / name
 

From 910462bb721cd66997361da4a153d2e2a8d59d48 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 13 May 2021 19:37:27 +0200
Subject: [PATCH 098/524] Log stuff happening to files

---
 PFERD/output_dir.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 08c01a3..18e0b6a 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -11,6 +11,8 @@ from pathlib import Path, PurePath
 # TODO In Python 3.9 and above, AsyncContextManager is deprecated
 from typing import AsyncContextManager, AsyncIterator, BinaryIO, Optional
 
+from rich.markup import escape
+
 from .conductor import TerminalConductor
 from .report import MarkConflictException, MarkDuplicateException, Report
 from .utils import prompt_yes_no
@@ -330,8 +332,12 @@ class OutputDirectory:
         info.tmp_path.replace(info.local_path)
 
         if changed:
+            self._conductor.print(
+                f"[bold bright_yellow]Changed[/] {escape(str(info.path))}")
             self._report.change_file(info.path)
         else:
+            self._conductor.print(
+                f"[bold bright_green]Added[/] {escape(str(info.path))}")
             self._report.add_file(info.path)
 
     def cleanup(self) -> None:
@@ -360,6 +366,8 @@ class OutputDirectory:
         if self._conflict_delete_lf(self._on_conflict, pure):
             try:
                 path.unlink()
+                self._conductor.print(
+                    f"[bold bright_magenta]Deleted[/] {escape(str(path))}")
                 self._report.delete_file(pure)
             except OSError:
                 pass

From 68781a88ab607060e909d8985b436c4de0ce4779 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 13 May 2021 19:39:49 +0200
Subject: [PATCH 099/524] Fix asynchronous methods being not awaited

---
 PFERD/crawler.py    |  2 +-
 PFERD/output_dir.py | 18 +++++++++---------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index 5148d9d..da35801 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -242,7 +242,7 @@ class Crawler(ABC):
             path, mtime, redownload, on_conflict)
 
     async def cleanup(self) -> None:
-        self._output_dir.cleanup()
+        await self._output_dir.cleanup()
 
     async def run(self) -> None:
         """
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 18e0b6a..635ee43 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -340,30 +340,30 @@ class OutputDirectory:
                 f"[bold bright_green]Added[/] {escape(str(info.path))}")
             self._report.add_file(info.path)
 
-    def cleanup(self) -> None:
-        self._cleanup_dir(self._root, PurePath())
+    async def cleanup(self) -> None:
+        await self._cleanup_dir(self._root, PurePath())
 
-    def _cleanup(self, path: Path, pure: PurePath) -> None:
+    async def _cleanup(self, path: Path, pure: PurePath) -> None:
         if path.is_dir():
-            self._cleanup_dir(path, pure)
+            await self._cleanup_dir(path, pure)
         elif path.is_file():
-            self._cleanup_file(path, pure)
+            await self._cleanup_file(path, pure)
 
-    def _cleanup_dir(self, path: Path, pure: PurePath) -> None:
+    async def _cleanup_dir(self, path: Path, pure: PurePath) -> None:
         for child in path.iterdir():
             pure_child = pure / child.name
-            self._cleanup(child, pure_child)
+            await self._cleanup(child, pure_child)
 
         try:
             path.rmdir()
         except OSError:
             pass
 
-    def _cleanup_file(self, path: Path, pure: PurePath) -> None:
+    async def _cleanup_file(self, path: Path, pure: PurePath) -> None:
         if self._report.marked(pure):
             return
 
-        if self._conflict_delete_lf(self._on_conflict, pure):
+        if await self._conflict_delete_lf(self._on_conflict, pure):
             try:
                 path.unlink()
                 self._conductor.print(

From 38bb66a776ef18070e7d46f4daeee35acb8c3e36 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 13 May 2021 19:40:10 +0200
Subject: [PATCH 100/524] Update file metadata in more cases

PFERD now not only updates file metadata when a file is successfully added or
changed, but also when a file is downloaded and then detected to be unchanged.

This could occur for example if a remote file's modification time was bumped,
possibly because somebody touched the file without changing it.
---
 PFERD/output_dir.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 635ee43..571d73d 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -303,6 +303,11 @@ class OutputDirectory:
 
         return None
 
+    def _update_metadata(self, info: DownloadInfo) -> None:
+        if mtime := info.heuristics.mtime:
+            mtimestamp = mtime.timestamp()
+            os.utime(info.local_path, times=(mtimestamp, mtimestamp))
+
     async def _after_download(self, info: DownloadInfo) -> None:
         changed = False
 
@@ -314,6 +319,7 @@ class OutputDirectory:
         if info.local_path.exists():
             changed = True
             if filecmp.cmp(info.local_path, info.tmp_path):
+                self._update_metadata(info)
                 info.tmp_path.unlink()
                 return
 
@@ -321,15 +327,8 @@ class OutputDirectory:
                 info.tmp_path.unlink()
                 return
 
-        # Modify metadata if necessary
-        if mtime := info.heuristics.mtime:
-            # TODO Pick an implementation
-            # Rounding up to avoid inaccuracies in how the OS stores timestamps
-            # mtimestamp = math.ceil(mtime.timestamp())
-            mtimestamp = mtime.timestamp()
-            os.utime(info.tmp_path, times=(mtimestamp, mtimestamp))
-
         info.tmp_path.replace(info.local_path)
+        self._update_metadata(info)
 
         if changed:
             self._conductor.print(

From 94d6a01ccab6c58144e864fea7b8e77ada6a61a4 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 13 May 2021 19:42:40 +0200
Subject: [PATCH 101/524] Use file mtime in local crawler

---
 PFERD/crawlers/local.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/PFERD/crawlers/local.py b/PFERD/crawlers/local.py
index e80472e..fb08cc9 100644
--- a/PFERD/crawlers/local.py
+++ b/PFERD/crawlers/local.py
@@ -1,4 +1,5 @@
 import asyncio
+import datetime
 from pathlib import Path, PurePath
 
 from ..conductor import TerminalConductor
@@ -48,12 +49,14 @@ class LocalCrawler(Crawler):
 
     async def _crawl_file(self, path: Path, pure: PurePath) -> None:
         async with self.download_bar(path) as bar:
-            bar.set_total(path.stat().st_size)
-
-            dl = await self.download(pure)
+            stat = path.stat()
+            mtime = datetime.datetime.fromtimestamp(stat.st_mtime)
+            dl = await self.download(pure, mtime=mtime)
             if not dl:
                 return
 
+            bar.set_total(stat.st_size)
+
             async with dl as sink:
                 with open(path, "rb") as f:
                     while True:

From e3ee4e515df08d0a7abeced81ef0cf4468abea6d Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 13 May 2021 19:47:44 +0200
Subject: [PATCH 102/524] Disable highlighting of primitives

This commit prevents rich from highlighting python-looking syntax like numbers,
arrays, 'None' etc.
---
 PFERD/conductor.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/PFERD/conductor.py b/PFERD/conductor.py
index 4648e77..5022a22 100644
--- a/PFERD/conductor.py
+++ b/PFERD/conductor.py
@@ -3,6 +3,7 @@ from contextlib import asynccontextmanager, contextmanager
 from types import TracebackType
 from typing import AsyncIterator, Iterator, List, Optional, Type
 
+from rich.console import Console
 from rich.progress import Progress, TaskID
 
 
@@ -22,9 +23,11 @@ class TerminalConductor:
     def __init__(self) -> None:
         self._stopped = False
         self._lock = asyncio.Lock()
-        self._progress = Progress()
         self._lines: List[str] = []
 
+        self._console = Console(highlight=False)
+        self._progress = Progress(console=self._console)
+
     async def _start(self) -> None:
         for task in self._progress.tasks:
             task.visible = True
@@ -61,7 +64,7 @@ class TerminalConductor:
         if self._stopped:
             self._lines.append(line)
         else:
-            self._progress.console.print(line)
+            self._console.print(line)
 
     @asynccontextmanager
     async def exclusive_output(self) -> AsyncIterator[None]:

From 961f40f9a10d126a7c9a241b29dfcfac3b9ede10 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 13 May 2021 19:55:04 +0200
Subject: [PATCH 103/524] Document simple authenticator

---
 CONFIG.md | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index 16c8531..92c36ae 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -104,11 +104,18 @@ authenticators is `type`:
 This crawler crawls a local directory. It is really simple and mostly useful for
 testing different setups.
 
-- `path`: Path to the local directory to crawl. (No default, must be specified)
+- `path`: Path to the local directory to crawl. (Required)
 
 ## Authenticator types
 
-TODO Fill in as authenticators are implemented
+### The `simple` authenticator
+
+With this authenticator, the username and password can be set directly in the
+config file. If the username or password are not specified, the user is prompted
+via the terminal.
+
+- `username`: The username (Optional)
+- `password`: The password (Optional)
 
 ## Transformation rules
 

From d565df27b31f5a7e635edc6d069d80cf65b1c3ef Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 13 May 2021 22:28:14 +0200
Subject: [PATCH 104/524] Add HttpCrawler

---
 PFERD/crawler.py    | 37 +++++++++++++++++++++++++++++++++++++
 PFERD/output_dir.py |  5 ++++-
 PFERD/report.py     | 12 ++++++++----
 setup.cfg           |  2 ++
 4 files changed, 51 insertions(+), 5 deletions(-)

diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index da35801..feb3f25 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -6,6 +6,7 @@ from pathlib import Path, PurePath
 from typing import (Any, AsyncContextManager, AsyncIterator, Awaitable,
                     Callable, Dict, Optional, TypeVar)
 
+import aiohttp
 from rich.markup import escape
 
 from .authenticator import Authenticator
@@ -263,3 +264,39 @@ class Crawler(ABC):
         """
 
         pass
+
+
+class HttpCrawler(Crawler):
+    COOKIE_FILE = PurePath(".cookies")
+
+    def __init__(
+            self,
+            name: str,
+            section: CrawlerSection,
+            config: Config,
+            conductor: TerminalConductor,
+    ) -> None:
+        super().__init__(name, section, config, conductor)
+
+        self._cookie_jar_path = self._output_dir.resolve(self.COOKIE_FILE)
+        self._output_dir.register_reserved(self.COOKIE_FILE)
+
+    async def run(self) -> None:
+        cookie_jar = aiohttp.CookieJar()
+
+        try:
+            cookie_jar.load(self._cookie_jar_path)
+        except Exception:
+            pass
+
+        async with aiohttp.ClientSession(cookie_jar=cookie_jar) as session:
+            self.session = session
+            try:
+                await super().run()
+            finally:
+                del self.session
+
+        try:
+            cookie_jar.save(self._cookie_jar_path)
+        except Exception:
+            self.print(f"[bold red]Warning:[/] Failed to save cookies to {escape(str(self.COOKIE_FILE))}")
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 571d73d..1be9a16 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -86,6 +86,9 @@ class OutputDirectory:
 
         self._report = Report()
 
+    def register_reserved(self, path: PurePath):
+        self._report.mark_reserved(path)
+
     def _mark(self, path: PurePath) -> None:
         """
         May throw an OutputDirException
@@ -100,7 +103,7 @@ class OutputDirectory:
             msg = f"Collides with other file: {e.collides_with}"
             raise OutputDirException(msg)
 
-    def _resolve(self, path: PurePath) -> Path:
+    def resolve(self, path: PurePath) -> Path:
         """
         May throw an OutputDirException.
         """
diff --git a/PFERD/report.py b/PFERD/report.py
index b98c90c..2c7d8af 100644
--- a/PFERD/report.py
+++ b/PFERD/report.py
@@ -44,12 +44,16 @@ class Report:
     """
 
     def __init__(self) -> None:
+        self.reserved_files: Set[PurePath] = set()
         self.known_files: Set[PurePath] = set()
 
         self.new_files: Set[PurePath] = set()
         self.changed_files: Set[PurePath] = set()
         self.deleted_files: Set[PurePath] = set()
 
+    def mark_reserved(self, path: PurePath) -> None:
+        self.reserved_files.add(path)
+
     def mark(self, path: PurePath) -> None:
         """
         Mark a previously unknown file as known.
@@ -58,12 +62,12 @@ class Report:
         detail, see the respective exception's docstring.
         """
 
-        for known_path in self.known_files:
-            if path == known_path:
+        for other in self.known_files & self.reserved_files:
+            if path == other:
                 raise MarkDuplicateException(path)
 
-            if is_relative_to(path, known_path) or is_relative_to(known_path, path):
-                raise MarkConflictException(path, known_path)
+            if is_relative_to(path, other) or is_relative_to(other, path):
+                raise MarkConflictException(path, other)
 
         self.known_files.add(path)
 
diff --git a/setup.cfg b/setup.cfg
index 1c6e764..9dcb111 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -6,6 +6,8 @@ version = 3.0.0
 packages = PFERD
 python_requires = >=3.8
 install_requires =
+  aiohttp>=3.7.4.post0
+  beautifulsoup4>=4.9.3
   rich>=10.1.0
 
 [options.entry_points]

From 93a5a94dab50e916ed13d28b55d5ba584a288b3d Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 13 May 2021 23:52:46 +0200
Subject: [PATCH 105/524] Single-source version number

---
 PFERD/__init__.py | 43 -------------------------------------------
 PFERD/__main__.py | 43 +++++++++++++++++++++++++++++++++++++++++++
 PFERD/version.py  |  1 +
 setup.cfg         |  4 ++--
 4 files changed, 46 insertions(+), 45 deletions(-)
 create mode 100644 PFERD/__main__.py
 create mode 100644 PFERD/version.py

diff --git a/PFERD/__init__.py b/PFERD/__init__.py
index a16b19b..e69de29 100644
--- a/PFERD/__init__.py
+++ b/PFERD/__init__.py
@@ -1,43 +0,0 @@
-import argparse
-import asyncio
-from pathlib import Path
-
-from .config import Config, ConfigDumpException, ConfigLoadException
-from .pferd import Pferd
-
-
-def main() -> None:
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--config", "-c",
-        type=Path,
-        metavar="PATH",
-        help="specify custom config file path",
-    )
-    parser.add_argument(
-        "--dump-config",
-        nargs="?",
-        const=True,
-        type=Path,
-        metavar="PATH",
-        help="dump current configuration to a file and exit."
-        " Uses default config file path if no path is specified",
-    )
-    args = parser.parse_args()
-
-    try:
-        config_parser = Config.load_parser(args.config)
-        config = Config(config_parser)
-    except ConfigLoadException:
-        exit(1)
-
-    if args.dump_config:
-        path = None if args.dump_config is True else args.dump_config
-        try:
-            config.dump(path)
-        except ConfigDumpException:
-            exit(1)
-        exit()
-
-    pferd = Pferd(config)
-    asyncio.run(pferd.run())
diff --git a/PFERD/__main__.py b/PFERD/__main__.py
new file mode 100644
index 0000000..a16b19b
--- /dev/null
+++ b/PFERD/__main__.py
@@ -0,0 +1,43 @@
+import argparse
+import asyncio
+from pathlib import Path
+
+from .config import Config, ConfigDumpException, ConfigLoadException
+from .pferd import Pferd
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--config", "-c",
+        type=Path,
+        metavar="PATH",
+        help="specify custom config file path",
+    )
+    parser.add_argument(
+        "--dump-config",
+        nargs="?",
+        const=True,
+        type=Path,
+        metavar="PATH",
+        help="dump current configuration to a file and exit."
+        " Uses default config file path if no path is specified",
+    )
+    args = parser.parse_args()
+
+    try:
+        config_parser = Config.load_parser(args.config)
+        config = Config(config_parser)
+    except ConfigLoadException:
+        exit(1)
+
+    if args.dump_config:
+        path = None if args.dump_config is True else args.dump_config
+        try:
+            config.dump(path)
+        except ConfigDumpException:
+            exit(1)
+        exit()
+
+    pferd = Pferd(config)
+    asyncio.run(pferd.run())
diff --git a/PFERD/version.py b/PFERD/version.py
new file mode 100644
index 0000000..528787c
--- /dev/null
+++ b/PFERD/version.py
@@ -0,0 +1 @@
+__version__ = "3.0.0"
diff --git a/setup.cfg b/setup.cfg
index 9dcb111..f2806e2 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = PFERD
-version = 3.0.0
+version = attr: PFERD.version.__version__
 
 [options]
 packages = PFERD
@@ -12,4 +12,4 @@ install_requires =
 
 [options.entry_points]
 console_scripts =
-  pferd = PFERD:main
+  pferd = PFERD.__main__:main

From 6e5fdf4e9ee05eb22345a895056509fbdfaa9dda Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 14 May 2021 00:09:58 +0200
Subject: [PATCH 106/524] Set user agent to "pferd/<version>"

---
 PFERD/crawler.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index feb3f25..ece62c1 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -15,6 +15,7 @@ from .config import Config, Section
 from .limiter import Limiter
 from .output_dir import FileSink, OnConflict, OutputDirectory, Redownload
 from .transformer import RuleParseException, Transformer
+from .version import __version__
 
 
 class CrawlerLoadException(Exception):
@@ -289,7 +290,10 @@ class HttpCrawler(Crawler):
         except Exception:
             pass
 
-        async with aiohttp.ClientSession(cookie_jar=cookie_jar) as session:
+        async with aiohttp.ClientSession(
+                headers={"User-Agent": f"pferd/{__version__}"},
+                cookie_jar=cookie_jar,
+        ) as session:
             self.session = session
             try:
                 await super().run()
@@ -299,4 +303,7 @@ class HttpCrawler(Crawler):
         try:
             cookie_jar.save(self._cookie_jar_path)
         except Exception:
-            self.print(f"[bold red]Warning:[/] Failed to save cookies to {escape(str(self.COOKIE_FILE))}")
+            self.print(
+                "[bold red]Warning:[/] Failed to save cookies to "
+                + escape(str(self.COOKIE_FILE))
+            )

From a673ab0fae35c926c4f24e1c117fa0716b704d0b Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 14 May 2021 00:20:59 +0200
Subject: [PATCH 107/524] Delete old files

I should've done this earlier
---
 PFERD/authenticators.py       | 214 -----------
 PFERD/cookie_jar.py           |  69 ----
 PFERD/diva.py                 | 169 ---------
 PFERD/download_summary.py     |  75 ----
 PFERD/downloaders.py          |  72 ----
 PFERD/errors.py               |  57 ---
 PFERD/ilias/__init__.py       |  10 -
 PFERD/ilias/authenticators.py | 138 -------
 PFERD/ilias/crawler.py        | 684 ----------------------------------
 PFERD/ilias/date_demangler.py |  51 ---
 PFERD/ilias/downloader.py     | 173 ---------
 PFERD/ipd.py                  | 154 --------
 PFERD/location.py             |  41 --
 PFERD/logging.py              | 184 ---------
 PFERD/organizer.py            | 224 -----------
 PFERD/progress.py             | 111 ------
 PFERD/tmp_dir.py              |  79 ----
 PFERD/transform.py            | 142 -------
 18 files changed, 2647 deletions(-)
 delete mode 100644 PFERD/authenticators.py
 delete mode 100644 PFERD/cookie_jar.py
 delete mode 100644 PFERD/diva.py
 delete mode 100644 PFERD/download_summary.py
 delete mode 100644 PFERD/downloaders.py
 delete mode 100644 PFERD/errors.py
 delete mode 100644 PFERD/ilias/__init__.py
 delete mode 100644 PFERD/ilias/authenticators.py
 delete mode 100644 PFERD/ilias/crawler.py
 delete mode 100644 PFERD/ilias/date_demangler.py
 delete mode 100644 PFERD/ilias/downloader.py
 delete mode 100644 PFERD/ipd.py
 delete mode 100644 PFERD/location.py
 delete mode 100644 PFERD/logging.py
 delete mode 100644 PFERD/organizer.py
 delete mode 100644 PFERD/progress.py
 delete mode 100644 PFERD/tmp_dir.py
 delete mode 100644 PFERD/transform.py

diff --git a/PFERD/authenticators.py b/PFERD/authenticators.py
deleted file mode 100644
index f85c9d3..0000000
--- a/PFERD/authenticators.py
+++ /dev/null
@@ -1,214 +0,0 @@
-"""
-General authenticators useful in many situations
-"""
-
-import getpass
-import logging
-from typing import Optional, Tuple
-
-from .logging import PrettyLogger
-
-LOGGER = logging.getLogger(__name__)
-PRETTY = PrettyLogger(LOGGER)
-
-try:
-    import keyring
-except ImportError:
-    pass
-
-
-class TfaAuthenticator:
-    # pylint: disable=too-few-public-methods
-    """
-    An authenticator for a TFA token. Always prompts the user, as the token can not be cached.
-    """
-
-    def __init__(self, reason: str):
-        """
-        Create a new tfa authenticator.
-
-        Arguments:
-            reason {str} -- the reason for obtaining the credentials
-        """
-        self._reason = reason
-
-    def get_token(self) -> str:
-        # pylint: disable=no-self-use
-        """
-        Prompts the user for the token and returns it.
-        """
-        print(f"Enter credentials ({self._reason})")
-        return getpass.getpass("TFA Token: ")
-
-
-class UserPassAuthenticator:
-    """
-    An authenticator for username-password combinations that prompts the user
-    for missing information.
-    """
-
-    def __init__(
-            self,
-            reason: str,
-            username: Optional[str] = None,
-            password: Optional[str] = None,
-    ) -> None:
-        """
-        reason   - what the credentials are used for
-        username - the username (if already known)
-        password - the password (if already known)
-        """
-
-        self._reason = reason
-
-        self._given_username = username
-        self._given_password = password
-
-        self._username = username
-        self._password = password
-
-    def get_credentials(self) -> Tuple[str, str]:
-        """
-        Returns a tuple (username, password). Prompts user for username or
-        password when necessary.
-        """
-
-        if self._username is None and self._given_username is not None:
-            self._username = self._given_username
-
-        if self._password is None and self._given_password is not None:
-            self._password = self._given_password
-
-        if self._username is None or self._password is None:
-            print(f"Enter credentials ({self._reason})")
-
-        username: str
-        if self._username is None:
-            username = input("Username: ")
-            self._username = username
-        else:
-            username = self._username
-
-        password: str
-        if self._password is None:
-            password = getpass.getpass(prompt="Password: ")
-            self._password = password
-        else:
-            password = self._password
-
-        return (username, password)
-
-    @property
-    def username(self) -> str:
-        """
-        The username. Accessing this property may cause the authenticator to
-        prompt the user.
-        """
-
-        (username, _) = self.get_credentials()
-        return username
-
-    @property
-    def password(self) -> str:
-        """
-        The password. Accessing this property may cause the authenticator to
-        prompt the user.
-        """
-
-        (_, password) = self.get_credentials()
-        return password
-
-    def invalidate_credentials(self) -> None:
-        """
-        Marks the credentials as invalid. If only a username was supplied in
-        the constructor, assumes that the username is valid and only the
-        password is invalid. If only a password was supplied in the
-        constructor, assumes that the password is valid and only the username
-        is invalid. Otherwise, assumes that username and password are both
-        invalid.
-        """
-
-        self._username = None
-        self._password = None
-
-        if self._given_username is not None and self._given_password is not None:
-            self._given_username = None
-            self._given_password = None
-
-
-class KeyringAuthenticator(UserPassAuthenticator):
-    """
-    An authenticator for username-password combinations that stores the
-    password using the system keyring service and prompts the user for missing
-    information.
-    """
-
-    def get_credentials(self) -> Tuple[str, str]:
-        """
-        Returns a tuple (username, password). Prompts user for username or
-        password when necessary.
-        """
-
-        if self._username is None and self._given_username is not None:
-            self._username = self._given_username
-
-        if self._password is None and self._given_password is not None:
-            self._password = self._given_password
-
-        if self._username is not None and self._password is None:
-            self._load_password()
-
-        if self._username is None or self._password is None:
-            print(f"Enter credentials ({self._reason})")
-
-        username: str
-        if self._username is None:
-            username = input("Username: ")
-            self._username = username
-        else:
-            username = self._username
-
-        if self._password is None:
-            self._load_password()
-
-        password: str
-        if self._password is None:
-            password = getpass.getpass(prompt="Password: ")
-            self._password = password
-            self._save_password()
-        else:
-            password = self._password
-
-        return (username, password)
-
-    def _load_password(self) -> None:
-        """
-        Loads the saved password associated with self._username from the system
-        keyring service (or None if not password has been saved yet) and stores
-        it in self._password.
-        """
-        self._password = keyring.get_password("pferd-ilias", self._username)
-
-    def _save_password(self) -> None:
-        """
-        Saves self._password to the system keyring service and associates it
-        with self._username.
-        """
-        keyring.set_password("pferd-ilias", self._username, self._password)
-
-    def invalidate_credentials(self) -> None:
-        """
-        Marks the credentials as invalid. If only a username was supplied in
-        the constructor, assumes that the username is valid and only the
-        password is invalid. If only a password was supplied in the
-        constructor, assumes that the password is valid and only the username
-        is invalid. Otherwise, assumes that username and password are both
-        invalid.
-        """
-
-        try:
-            keyring.delete_password("pferd-ilias", self._username)
-        except keyring.errors.PasswordDeleteError:
-            pass
-
-        super().invalidate_credentials()
diff --git a/PFERD/cookie_jar.py b/PFERD/cookie_jar.py
deleted file mode 100644
index e5b568f..0000000
--- a/PFERD/cookie_jar.py
+++ /dev/null
@@ -1,69 +0,0 @@
-"""A helper for requests cookies."""
-
-import logging
-from http.cookiejar import LoadError, LWPCookieJar
-from pathlib import Path
-from typing import Optional
-
-import requests
-
-LOGGER = logging.getLogger(__name__)
-
-
-class CookieJar:
-    """A cookie jar that can be persisted."""
-
-    def __init__(self, cookie_file: Optional[Path] = None) -> None:
-        """Create a new cookie jar at the given path.
-
-        If the path is None, the cookies will not be persisted.
-        """
-        self._cookies: LWPCookieJar
-        if cookie_file is None:
-            self._cookies = LWPCookieJar()
-        else:
-            self._cookies = LWPCookieJar(str(cookie_file.resolve()))
-
-    @property
-    def cookies(self) -> LWPCookieJar:
-        """Return the requests cookie jar."""
-        return self._cookies
-
-    def load_cookies(self) -> None:
-        """Load all cookies from the file given in the constructor."""
-        if self._cookies.filename is None:
-            return
-
-        try:
-            LOGGER.info("Loading old cookies from %s", self._cookies.filename)
-            self._cookies.load(ignore_discard=True)
-        except (FileNotFoundError, LoadError):
-            LOGGER.warning(
-                "No valid cookie file found at %s, continuing with no cookies",
-                self._cookies.filename
-            )
-
-    def save_cookies(self, reason: Optional[str] = None) -> None:
-        """Save the cookies in the file given in the constructor."""
-        if self._cookies.filename is None:
-            return
-
-        if reason is None:
-            LOGGER.info("Saving cookies")
-        else:
-            LOGGER.info("Saving cookies (%s)", reason)
-
-        # TODO figure out why ignore_discard is set
-        # TODO possibly catch a few more exceptions
-        self._cookies.save(ignore_discard=True)
-
-    def create_session(self) -> requests.Session:
-        """Create a new session using the cookie jar."""
-        sess = requests.Session()
-
-        # From the request docs: "All requests code should work out of the box
-        # with externally provided instances of CookieJar, e.g. LWPCookieJar
-        # and FileCookieJar."
-        sess.cookies = self.cookies  # type: ignore
-
-        return sess
diff --git a/PFERD/diva.py b/PFERD/diva.py
deleted file mode 100644
index 148fa56..0000000
--- a/PFERD/diva.py
+++ /dev/null
@@ -1,169 +0,0 @@
-"""
-Utility functions and a scraper/downloader for the KIT DIVA portal.
-"""
-import logging
-import re
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Any, Callable, List, Optional
-
-import requests
-
-from .errors import FatalException
-from .logging import PrettyLogger
-from .organizer import Organizer
-from .tmp_dir import TmpDir
-from .transform import Transformable
-from .utils import stream_to_path
-
-LOGGER = logging.getLogger(__name__)
-PRETTY = PrettyLogger(LOGGER)
-
-
-@dataclass
-class DivaDownloadInfo(Transformable):
-    """
-    Information about a DIVA video
-    """
-    url: str
-
-
-DivaDownloadStrategy = Callable[[Organizer, DivaDownloadInfo], bool]
-
-
-def diva_download_new(organizer: Organizer, info: DivaDownloadInfo) -> bool:
-    """
-    Accepts only new files.
-    """
-    resolved_file = organizer.resolve(info.path)
-    if not resolved_file.exists():
-        return True
-    PRETTY.ignored_file(info.path, "local file exists")
-    return False
-
-
-class DivaPlaylistCrawler:
-    # pylint: disable=too-few-public-methods
-    """
-    A crawler for DIVA playlists.
-    """
-
-    _PLAYLIST_BASE_URL = "https://mediaservice.bibliothek.kit.edu/asset/detail/"
-    _COLLECTION_BASE_URL = "https://mediaservice.bibliothek.kit.edu/asset/collection.json"
-
-    def __init__(self, playlist_id: str):
-        self._id = playlist_id
-
-    @classmethod
-    def fetch_id(cls, playlist_link: str) -> str:
-        """
-        Fetches the ID for a playerlist, given the base link
-        (e.g. https://mediaservice.bibliothek.kit.edu/#/details/DIVA-2019-271).
-
-        Raises a FatalException, if the id can not be resolved
-        """
-        match = re.match(r".+#/details/(.+)", playlist_link)
-        if match is None:
-            raise FatalException(
-                "DIVA: Invalid playlist link format, could not extract details."
-            )
-        base_name = match.group(1)
-
-        response = requests.get(cls._PLAYLIST_BASE_URL + base_name + ".json")
-
-        if response.status_code != 200:
-            raise FatalException(
-                f"DIVA: Got non-200 status code ({response.status_code}))"
-                f"when requesting {response.url!r}!"
-            )
-
-        body = response.json()
-
-        if body["error"]:
-            raise FatalException(f"DIVA: Server returned error {body['error']!r}.")
-
-        return body["result"]["collection"]["id"]
-
-    def crawl(self) -> List[DivaDownloadInfo]:
-        """
-        Crawls the playlist given in the constructor.
-        """
-        response = requests.get(self._COLLECTION_BASE_URL, params={"collection": self._id})
-        if response.status_code != 200:
-            raise FatalException(f"Server returned status {response.status_code}.")
-
-        body = response.json()
-
-        if body["error"]:
-            raise FatalException(f"Server returned error {body['error']!r}.")
-
-        result = body["result"]
-
-        if result["resultCount"] > result["pageSize"]:
-            PRETTY.warning("Did not receive all results, some will be missing")
-
-        download_infos: List[DivaDownloadInfo] = []
-
-        for video in result["resultList"]:
-            title = video["title"]
-            collection_title = self._follow_path(["collection", "title"], video)
-            url = self._follow_path(
-                ["resourceList", "derivateList", "mp4", "url"],
-                video
-            )
-
-            if url and collection_title and title:
-                path = Path(collection_title, title + ".mp4")
-                download_infos.append(DivaDownloadInfo(path, url))
-            else:
-                PRETTY.warning(f"Incomplete video found: {title!r} {collection_title!r} {url!r}")
-
-        return download_infos
-
-    @staticmethod
-    def _follow_path(path: List[str], obj: Any) -> Optional[Any]:
-        """
-        Follows a property path through an object, bailing at the first None.
-        """
-        current = obj
-        for path_step in path:
-            if path_step in current:
-                current = current[path_step]
-            else:
-                return None
-        return current
-
-
-class DivaDownloader:
-    """
-    A downloader for DIVA videos.
-    """
-
-    def __init__(self, tmp_dir: TmpDir, organizer: Organizer, strategy: DivaDownloadStrategy):
-        self._tmp_dir = tmp_dir
-        self._organizer = organizer
-        self._strategy = strategy
-        self._session = requests.session()
-
-    def download_all(self, infos: List[DivaDownloadInfo]) -> None:
-        """
-        Download multiple files one after the other.
-        """
-        for info in infos:
-            self.download(info)
-
-    def download(self, info: DivaDownloadInfo) -> None:
-        """
-        Download a single file.
-        """
-        if not self._strategy(self._organizer, info):
-            self._organizer.mark(info.path)
-            return
-
-        with self._session.get(info.url, stream=True) as response:
-            if response.status_code == 200:
-                tmp_file = self._tmp_dir.new_path()
-                stream_to_path(response, tmp_file, info.path.name)
-                self._organizer.accept_file(tmp_file, info.path)
-            else:
-                PRETTY.warning(f"Could not download file, got response {response.status_code}")
diff --git a/PFERD/download_summary.py b/PFERD/download_summary.py
deleted file mode 100644
index 3b9a024..0000000
--- a/PFERD/download_summary.py
+++ /dev/null
@@ -1,75 +0,0 @@
-"""
-Provides a summary that keeps track of new modified or deleted files.
-"""
-from pathlib import Path
-from typing import List
-
-
-def _mergeNoDuplicate(first: List[Path], second: List[Path]) -> List[Path]:
-    tmp = list(set(first + second))
-    tmp.sort(key=lambda x: str(x.resolve()))
-    return tmp
-
-
-class DownloadSummary:
-    """
-    Keeps track of all new, modified or deleted files and provides a summary.
-    """
-
-    def __init__(self) -> None:
-        self._new_files: List[Path] = []
-        self._modified_files: List[Path] = []
-        self._deleted_files: List[Path] = []
-
-    @property
-    def new_files(self) -> List[Path]:
-        """
-        Returns all new files.
-        """
-        return self._new_files.copy()
-
-    @property
-    def modified_files(self) -> List[Path]:
-        """
-        Returns all modified files.
-        """
-        return self._modified_files.copy()
-
-    @property
-    def deleted_files(self) -> List[Path]:
-        """
-        Returns all deleted files.
-        """
-        return self._deleted_files.copy()
-
-    def merge(self, summary: 'DownloadSummary') -> None:
-        """
-        Merges ourselves with the passed summary. Modifies this object, but not the passed one.
-        """
-        self._new_files = _mergeNoDuplicate(self._new_files, summary.new_files)
-        self._modified_files = _mergeNoDuplicate(self._modified_files, summary.modified_files)
-        self._deleted_files = _mergeNoDuplicate(self._deleted_files, summary.deleted_files)
-
-    def add_deleted_file(self, path: Path) -> None:
-        """
-        Registers a file as deleted.
-        """
-        self._deleted_files.append(path)
-
-    def add_modified_file(self, path: Path) -> None:
-        """
-        Registers a file as changed.
-        """
-        self._modified_files.append(path)
-
-    def add_new_file(self, path: Path) -> None:
-        """
-        Registers a file as new.
-        """
-        self._new_files.append(path)
-
-    def has_updates(self) -> bool:
-        """
-        Returns whether this summary has any updates.
-        """
-        return bool(self._new_files or self._modified_files or self._deleted_files)
diff --git a/PFERD/downloaders.py b/PFERD/downloaders.py
deleted file mode 100644
index 94b8b9f..0000000
--- a/PFERD/downloaders.py
+++ /dev/null
@@ -1,72 +0,0 @@
-"""
-General downloaders useful in many situations
-"""
-
-from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional
-
-import requests
-import requests.auth
-
-from .organizer import Organizer
-from .tmp_dir import TmpDir
-from .transform import Transformable
-from .utils import stream_to_path
-
-
-@dataclass
-class HttpDownloadInfo(Transformable):
-    """
-    This class describes a single file to be downloaded.
-    """
-
-    url: str
-    parameters: Dict[str, Any] = field(default_factory=dict)
-
-
-class HttpDownloader:
-    """A HTTP downloader that can handle HTTP basic auth."""
-
-    def __init__(
-            self,
-            tmp_dir: TmpDir,
-            organizer: Organizer,
-            username: Optional[str],
-            password: Optional[str],
-    ):
-        """Create a new http downloader."""
-        self._organizer = organizer
-        self._tmp_dir = tmp_dir
-        self._username = username
-        self._password = password
-        self._session = self._build_session()
-
-    def _build_session(self) -> requests.Session:
-        session = requests.Session()
-        if self._username and self._password:
-            session.auth = requests.auth.HTTPBasicAuth(
-                self._username, self._password
-            )
-        return session
-
-    def download_all(self, infos: List[HttpDownloadInfo]) -> None:
-        """
-        Download multiple files one after the other.
-        """
-
-        for info in infos:
-            self.download(info)
-
-    def download(self, info: HttpDownloadInfo) -> None:
-        """
-        Download a single file.
-        """
-
-        with self._session.get(info.url, params=info.parameters, stream=True) as response:
-            if response.status_code == 200:
-                tmp_file = self._tmp_dir.new_path()
-                stream_to_path(response, tmp_file, info.path.name)
-                self._organizer.accept_file(tmp_file, info.path)
-            else:
-                # TODO use proper exception
-                raise Exception(f"Could not download file, got response {response.status_code}")
diff --git a/PFERD/errors.py b/PFERD/errors.py
deleted file mode 100644
index d960e13..0000000
--- a/PFERD/errors.py
+++ /dev/null
@@ -1,57 +0,0 @@
-"""
-An error logging decorator.
-"""
-
-import logging
-from typing import Any, Callable, TypeVar, cast
-
-from rich.console import Console
-
-from .logging import PrettyLogger
-
-LOGGER = logging.getLogger(__name__)
-PRETTY = PrettyLogger(LOGGER)
-
-
-class FatalException(Exception):
-    """
-    A fatal exception occurred. Recovery is not possible.
-    """
-
-
-TFun = TypeVar('TFun', bound=Callable[..., Any])
-
-
-def swallow_and_print_errors(function: TFun) -> TFun:
-    """
-    Decorates a function, swallows all errors, logs them and returns none if one occurred.
-    """
-    def inner(*args: Any, **kwargs: Any) -> Any:
-        # pylint: disable=broad-except
-        try:
-            return function(*args, **kwargs)
-        except FatalException as error:
-            PRETTY.error(str(error))
-            return None
-        except Exception as error:
-            Console().print_exception()
-            return None
-    return cast(TFun, inner)
-
-
-def retry_on_io_exception(max_retries: int, message: str) -> Callable[[TFun], TFun]:
-    """
-    Decorates a function and retries it on any exception until the max retries count is hit.
-    """
-    def retry(function: TFun) -> TFun:
-        def inner(*args: Any, **kwargs: Any) -> Any:
-            for i in range(0, max_retries):
-                # pylint: disable=broad-except
-                try:
-                    return function(*args, **kwargs)
-                except IOError as error:
-                    PRETTY.warning(f"Error duing operation '{message}': {error}")
-                    PRETTY.warning(
-                        f"Retrying operation '{message}'. Remaining retries: {max_retries - 1 - i}")
-        return cast(TFun, inner)
-    return retry
diff --git a/PFERD/ilias/__init__.py b/PFERD/ilias/__init__.py
deleted file mode 100644
index 0a5f08b..0000000
--- a/PFERD/ilias/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-"""
-Synchronizing files from ILIAS instances (https://www.ilias.de/).
-"""
-
-from .authenticators import IliasAuthenticator, KitShibbolethAuthenticator
-from .crawler import (IliasCrawler, IliasCrawlerEntry, IliasDirectoryFilter,
-                      IliasElementType)
-from .downloader import (IliasDownloader, IliasDownloadInfo,
-                         IliasDownloadStrategy, download_everything,
-                         download_modified_or_new)
diff --git a/PFERD/ilias/authenticators.py b/PFERD/ilias/authenticators.py
deleted file mode 100644
index 4b99dd8..0000000
--- a/PFERD/ilias/authenticators.py
+++ /dev/null
@@ -1,138 +0,0 @@
-"""
-Authenticators that can obtain proper ILIAS session cookies.
-"""
-
-import abc
-import logging
-from typing import Optional
-
-import bs4
-import requests
-
-from ..authenticators import TfaAuthenticator, UserPassAuthenticator
-from ..utils import soupify
-
-LOGGER = logging.getLogger(__name__)
-
-
-class IliasAuthenticator(abc.ABC):
-    # pylint: disable=too-few-public-methods
-
-    """
-    An authenticator that logs an existing requests session into an ILIAS
-    account.
-    """
-
-    @abc.abstractmethod
-    def authenticate(self, sess: requests.Session) -> None:
-        """
-        Log a requests session into this authenticator's ILIAS account.
-        """
-
-
-class KitShibbolethAuthenticator(IliasAuthenticator):
-    # pylint: disable=too-few-public-methods
-
-    """
-    Authenticate via KIT's shibboleth system.
-    """
-
-    def __init__(self, authenticator: Optional[UserPassAuthenticator] = None) -> None:
-        if authenticator:
-            self._auth = authenticator
-        else:
-            self._auth = UserPassAuthenticator("KIT ILIAS Shibboleth")
-
-        self._tfa_auth = TfaAuthenticator("KIT ILIAS Shibboleth")
-
-    def authenticate(self, sess: requests.Session) -> None:
-        """
-        Performs the ILIAS Shibboleth authentication dance and saves the login
-        cookies it receieves.
-
-        This function should only be called whenever it is detected that you're
-        not logged in. The cookies obtained should be good for a few minutes,
-        maybe even an hour or two.
-        """
-
-        # Equivalent: Click on "Mit KIT-Account anmelden" button in
-        # https://ilias.studium.kit.edu/login.php
-        LOGGER.debug("Begin authentication process with ILIAS")
-        url = "https://ilias.studium.kit.edu/Shibboleth.sso/Login"
-        data = {
-            "sendLogin": "1",
-            "idp_selection": "https://idp.scc.kit.edu/idp/shibboleth",
-            "target": "/shib_login.php",
-            "home_organization_selection": "Mit KIT-Account anmelden",
-        }
-        soup = soupify(sess.post(url, data=data))
-
-        # Attempt to login using credentials, if necessary
-        while not self._login_successful(soup):
-            # Searching the form here so that this fails before asking for
-            # credentials rather than after asking.
-            form = soup.find("form", {"class": "full content", "method": "post"})
-            action = form["action"]
-
-            csrf_token = form.find("input", {"name": "csrf_token"})["value"]
-
-            # Equivalent: Enter credentials in
-            # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
-            LOGGER.debug("Attempt to log in to Shibboleth using credentials")
-            url = "https://idp.scc.kit.edu" + action
-            data = {
-                "_eventId_proceed": "",
-                "j_username": self._auth.username,
-                "j_password": self._auth.password,
-                "csrf_token": csrf_token
-            }
-            soup = soupify(sess.post(url, data=data))
-
-            if self._tfa_required(soup):
-                soup = self._authenticate_tfa(sess, soup)
-
-            if not self._login_successful(soup):
-                print("Incorrect credentials.")
-                self._auth.invalidate_credentials()
-
-        # Equivalent: Being redirected via JS automatically
-        # (or clicking "Continue" if you have JS disabled)
-        LOGGER.debug("Redirect back to ILIAS with login information")
-        relay_state = soup.find("input", {"name": "RelayState"})
-        saml_response = soup.find("input", {"name": "SAMLResponse"})
-        url = "https://ilias.studium.kit.edu/Shibboleth.sso/SAML2/POST"
-        data = {  # using the info obtained in the while loop above
-            "RelayState": relay_state["value"],
-            "SAMLResponse": saml_response["value"],
-        }
-        sess.post(url, data=data)
-
-    def _authenticate_tfa(
-            self,
-            session: requests.Session,
-            soup: bs4.BeautifulSoup
-    ) -> bs4.BeautifulSoup:
-        # Searching the form here so that this fails before asking for
-        # credentials rather than after asking.
-        form = soup.find("form", {"method": "post"})
-        action = form["action"]
-
-        # Equivalent: Enter token in
-        # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
-        LOGGER.debug("Attempt to log in to Shibboleth with TFA token")
-        url = "https://idp.scc.kit.edu" + action
-        data = {
-            "_eventId_proceed": "",
-            "j_tokenNumber": self._tfa_auth.get_token()
-        }
-        return soupify(session.post(url, data=data))
-
-    @staticmethod
-    def _login_successful(soup: bs4.BeautifulSoup) -> bool:
-        relay_state = soup.find("input", {"name": "RelayState"})
-        saml_response = soup.find("input", {"name": "SAMLResponse"})
-        return relay_state is not None and saml_response is not None
-
-    @staticmethod
-    def _tfa_required(soup: bs4.BeautifulSoup) -> bool:
-        return soup.find(id="j_tokenNumber") is not None
diff --git a/PFERD/ilias/crawler.py b/PFERD/ilias/crawler.py
deleted file mode 100644
index edab284..0000000
--- a/PFERD/ilias/crawler.py
+++ /dev/null
@@ -1,684 +0,0 @@
-"""
-Contains an ILIAS crawler alongside helper functions.
-"""
-
-import datetime
-import json
-import logging
-import re
-from enum import Enum
-from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, Union
-from urllib.parse import (parse_qs, urlencode, urljoin, urlparse, urlsplit,
-                          urlunsplit)
-
-import bs4
-import requests
-
-from ..errors import FatalException, retry_on_io_exception
-from ..logging import PrettyLogger
-from ..utils import soupify
-from .authenticators import IliasAuthenticator
-from .date_demangler import demangle_date
-from .downloader import IliasDownloadInfo
-
-LOGGER = logging.getLogger(__name__)
-PRETTY = PrettyLogger(LOGGER)
-
-
-def _sanitize_path_name(name: str) -> str:
-    return name.replace("/", "-").replace("\\", "-")
-
-
-class IliasElementType(Enum):
-    """
-    The type of an ilias element.
-    """
-    REGULAR_FOLDER = "REGULAR_FOLDER"
-    VIDEO_FOLDER = "VIDEO_FOLDER"
-    EXERCISE_FOLDER = "EXERCISE_FOLDER"
-    REGULAR_FILE = "REGULAR_FILE"
-    VIDEO_FILE = "VIDEO_FILE"
-    FORUM = "FORUM"
-    MEETING = "MEETING"
-    EXTERNAL_LINK = "EXTERNAL_LINK"
-
-    def is_folder(self) -> bool:
-        """
-        Returns whether this type is some kind of folder.
-        """
-        return "FOLDER" in str(self.name)
-
-
-IliasDirectoryFilter = Callable[[Path, IliasElementType], bool]
-
-
-class IliasCrawlerEntry:
-    # pylint: disable=too-few-public-methods
-    """
-    An ILIAS crawler entry used internally to find, catalogue and recursively crawl elements.
-    """
-
-    def __init__(
-            self,
-            path: Path,
-            url: Union[str, Callable[[], Optional[str]]],
-            entry_type: IliasElementType,
-            modification_date: Optional[datetime.datetime]
-    ):
-        self.path = path
-        if isinstance(url, str):
-            str_url = url
-            self.url: Callable[[], Optional[str]] = lambda: str_url
-        else:
-            self.url = url
-        self.entry_type = entry_type
-        self.modification_date = modification_date
-
-    def to_download_info(self) -> Optional[IliasDownloadInfo]:
-        """
-        Converts this crawler entry to an IliasDownloadInfo, if possible.
-        This method will only succeed for *File* types.
-        """
-        if self.entry_type in [IliasElementType.REGULAR_FILE, IliasElementType.VIDEO_FILE]:
-            return IliasDownloadInfo(self.path, self.url, self.modification_date)
-        return None
-
-
-class IliasCrawler:
-    # pylint: disable=too-few-public-methods
-
-    """
-    A crawler for ILIAS.
-    """
-
-    # pylint: disable=too-many-arguments
-    def __init__(
-            self,
-            base_url: str,
-            session: requests.Session,
-            authenticator: IliasAuthenticator,
-            dir_filter: IliasDirectoryFilter
-    ):
-        """
-        Create a new ILIAS crawler.
-        """
-
-        self._base_url = base_url
-        self._session = session
-        self._authenticator = authenticator
-        self.dir_filter = dir_filter
-
-    @staticmethod
-    def _url_set_query_param(url: str, param: str, value: str) -> str:
-        """
-        Set a query parameter in an url, overwriting existing ones with the same name.
-        """
-        scheme, netloc, path, query, fragment = urlsplit(url)
-        query_parameters = parse_qs(query)
-        query_parameters[param] = [value]
-        new_query_string = urlencode(query_parameters, doseq=True)
-
-        return urlunsplit((scheme, netloc, path, new_query_string, fragment))
-
-    def recursive_crawl_url(self, url: str) -> List[IliasDownloadInfo]:
-        """
-        Crawls a given url *and all reachable elements in it*.
-
-        Args:
-            url {str} -- the *full* url to crawl
-        """
-        start_entries: List[IliasCrawlerEntry] = self._crawl_folder(Path(""), url)
-        return self._iterate_entries_to_download_infos(start_entries)
-
-    def crawl_course(self, course_id: str) -> List[IliasDownloadInfo]:
-        """
-        Starts the crawl process for a course, yielding a list of elements to (potentially)
-        download.
-
-        Arguments:
-            course_id {str} -- the course id
-
-        Raises:
-            FatalException: if an unrecoverable error occurs or the course id is not valid
-        """
-        # Start crawling at the given course
-        root_url = self._url_set_query_param(
-            self._base_url + "/goto.php", "target", f"crs_{course_id}"
-        )
-
-        if not self._is_course_id_valid(root_url, course_id):
-            raise FatalException(
-                "Invalid course id? I didn't find anything looking like a course!"
-            )
-
-        # And treat it as a folder
-        entries: List[IliasCrawlerEntry] = self._crawl_folder(Path(""), root_url)
-        return self._iterate_entries_to_download_infos(entries)
-
-    def _is_course_id_valid(self, root_url: str, course_id: str) -> bool:
-        response: requests.Response = self._session.get(root_url)
-        # We were redirected ==> Non-existant ID
-        if course_id not in response.url:
-            return False
-
-        link_element: bs4.Tag = self._get_page(root_url, {}).find(id="current_perma_link")
-        if not link_element:
-            return False
-        # It wasn't a course but a category list, forum, etc.
-        return "crs_" in link_element.get("value")
-
-    def find_course_name(self, course_id: str) -> Optional[str]:
-        """
-        Returns the name of a given course. None if it is not a valid course
-        or it could not be found.
-        """
-        course_url = self._url_set_query_param(
-            self._base_url + "/goto.php", "target", f"crs_{course_id}"
-        )
-        return self.find_element_name(course_url)
-
-    def find_element_name(self, url: str) -> Optional[str]:
-        """
-        Returns the name of the element at the given URL, if it can find one.
-        """
-        focus_element: bs4.Tag = self._get_page(url, {}).find(id="il_mhead_t_focus")
-        if not focus_element:
-            return None
-        return focus_element.text
-
-    def crawl_personal_desktop(self) -> List[IliasDownloadInfo]:
-        """
-        Crawls the ILIAS personal desktop (and every subelements that can be reached from there).
-
-        Raises:
-            FatalException: if an unrecoverable error occurs
-        """
-        entries: List[IliasCrawlerEntry] = self._crawl_folder(
-            Path(""), self._base_url + "?baseClass=ilPersonalDesktopGUI"
-        )
-        return self._iterate_entries_to_download_infos(entries)
-
-    def _iterate_entries_to_download_infos(
-            self,
-            entries: List[IliasCrawlerEntry]
-    ) -> List[IliasDownloadInfo]:
-        result: List[IliasDownloadInfo] = []
-        entries_to_process: List[IliasCrawlerEntry] = entries.copy()
-        while len(entries_to_process) > 0:
-            entry = entries_to_process.pop()
-
-            if entry.entry_type == IliasElementType.EXTERNAL_LINK:
-                PRETTY.not_searching(entry.path, "external link")
-                continue
-            if entry.entry_type == IliasElementType.FORUM:
-                PRETTY.not_searching(entry.path, "forum")
-                continue
-
-            if entry.entry_type.is_folder() and not self.dir_filter(entry.path, entry.entry_type):
-                PRETTY.not_searching(entry.path, "user filter")
-                continue
-
-            download_info = entry.to_download_info()
-            if download_info is not None:
-                result.append(download_info)
-                continue
-
-            url = entry.url()
-
-            if url is None:
-                PRETTY.warning(f"Could not find url for {str(entry.path)!r}, skipping it")
-                continue
-
-            PRETTY.searching(entry.path)
-
-            if entry.entry_type == IliasElementType.EXERCISE_FOLDER:
-                entries_to_process += self._crawl_exercises(entry.path, url)
-                continue
-            if entry.entry_type == IliasElementType.REGULAR_FOLDER:
-                entries_to_process += self._crawl_folder(entry.path, url)
-                continue
-            if entry.entry_type == IliasElementType.VIDEO_FOLDER:
-                entries_to_process += self._crawl_video_directory(entry.path, url)
-                continue
-
-            PRETTY.warning(f"Unknown type: {entry.entry_type}!")
-
-        return result
-
-    def _crawl_folder(self, folder_path: Path, url: str) -> List[IliasCrawlerEntry]:
-        """
-        Crawl all files in a folder-like element.
-        """
-        soup = self._get_page(url, {})
-
-        if soup.find(id="headerimage"):
-            element: bs4.Tag = soup.find(id="headerimage")
-            if "opencast" in element.attrs["src"].lower():
-                PRETTY.warning(f"Switched to crawling a video at {folder_path}")
-                if not self.dir_filter(folder_path, IliasElementType.VIDEO_FOLDER):
-                    PRETTY.not_searching(folder_path, "user filter")
-                    return []
-                return self._crawl_video_directory(folder_path, url)
-
-        result: List[IliasCrawlerEntry] = []
-
-        # Fetch all links and throw them to the general interpreter
-        links: List[bs4.Tag] = soup.select("a.il_ContainerItemTitle")
-        for link in links:
-            abs_url = self._abs_url_from_link(link)
-            element_path = Path(folder_path, _sanitize_path_name(link.getText().strip()))
-            element_type = self._find_type_from_link(element_path, link, abs_url)
-
-            if element_type == IliasElementType.REGULAR_FILE:
-                result += self._crawl_file(folder_path, link, abs_url)
-            elif element_type == IliasElementType.MEETING:
-                meeting_name = str(element_path.name)
-                date_portion_str = meeting_name.split(" - ")[0]
-                date_portion = demangle_date(date_portion_str)
-
-                if not date_portion:
-                    result += [IliasCrawlerEntry(element_path, abs_url, element_type, None)]
-                    continue
-
-                rest_of_name = meeting_name
-                if rest_of_name.startswith(date_portion_str):
-                    rest_of_name = rest_of_name[len(date_portion_str):]
-
-                new_name = datetime.datetime.strftime(date_portion, "%Y-%m-%d, %H:%M") \
-                    + rest_of_name
-                new_path = Path(folder_path, _sanitize_path_name(new_name))
-                result += [
-                    IliasCrawlerEntry(new_path, abs_url, IliasElementType.REGULAR_FOLDER, None)
-                ]
-            elif element_type is not None:
-                result += [IliasCrawlerEntry(element_path, abs_url, element_type, None)]
-            else:
-                PRETTY.warning(f"Found element without a type at {str(element_path)!r}")
-
-        return result
-
-    def _abs_url_from_link(self, link_tag: bs4.Tag) -> str:
-        """
-        Create an absolute url from an <a> tag.
-        """
-        return urljoin(self._base_url, link_tag.get("href"))
-
-    @staticmethod
-    def _find_type_from_link(
-            path: Path,
-            link_element: bs4.Tag,
-            url: str
-    ) -> Optional[IliasElementType]:
-        """
-        Decides which sub crawler to use for a given top level element.
-        """
-        parsed_url = urlparse(url)
-        LOGGER.debug("Parsed url: %r", parsed_url)
-
-        # file URLs contain "target=file"
-        if "target=file_" in parsed_url.query:
-            return IliasElementType.REGULAR_FILE
-
-        # Skip forums
-        if "cmd=showThreads" in parsed_url.query:
-            return IliasElementType.FORUM
-
-        # Everything with a ref_id can *probably* be opened to reveal nested things
-        # video groups, directories, exercises, etc
-        if "ref_id=" in parsed_url.query:
-            return IliasCrawler._find_type_from_folder_like(link_element, url)
-
-        PRETTY.warning(
-            "Got unknown element type in switch. I am not sure what horror I found on the"
-            f" ILIAS page. The element was at {str(path)!r} and it is {link_element!r})"
-        )
-        return None
-
-    @staticmethod
-    def _find_type_from_folder_like(link_element: bs4.Tag, url: str) -> Optional[IliasElementType]:
-        """
-        Try crawling something that looks like a folder.
-        """
-        # pylint: disable=too-many-return-statements
-
-        found_parent: Optional[bs4.Tag] = None
-
-        # We look for the outer div of our inner link, to find information around it
-        # (mostly the icon)
-        for parent in link_element.parents:
-            if "ilContainerListItemOuter" in parent["class"]:
-                found_parent = parent
-                break
-
-        if found_parent is None:
-            PRETTY.warning(f"Could not find element icon for {url!r}")
-            return None
-
-        # Find the small descriptive icon to figure out the type
-        img_tag: Optional[bs4.Tag] = found_parent.select_one("img.ilListItemIcon")
-
-        if img_tag is None:
-            PRETTY.warning(f"Could not find image tag for {url!r}")
-            return None
-
-        if "opencast" in str(img_tag["alt"]).lower():
-            return IliasElementType.VIDEO_FOLDER
-
-        if str(img_tag["src"]).endswith("icon_exc.svg"):
-            return IliasElementType.EXERCISE_FOLDER
-
-        if str(img_tag["src"]).endswith("icon_webr.svg"):
-            return IliasElementType.EXTERNAL_LINK
-
-        if str(img_tag["src"]).endswith("frm.svg"):
-            return IliasElementType.FORUM
-
-        if str(img_tag["src"]).endswith("sess.svg"):
-            return IliasElementType.MEETING
-
-        return IliasElementType.REGULAR_FOLDER
-
-    @staticmethod
-    def _crawl_file(path: Path, link_element: bs4.Tag, url: str) -> List[IliasCrawlerEntry]:
-        """
-        Crawls a file.
-        """
-        # Files have a list of properties (type, modification date, size, etc.)
-        # In a series of divs.
-        # Find the parent containing all those divs, so we can filter our what we need
-        properties_parent: bs4.Tag = link_element.findParent(
-            "div", {"class": lambda x: "il_ContainerListItem" in x}
-        ).select_one(".il_ItemProperties")
-        # The first one is always the filetype
-        file_type = properties_parent.select_one("span.il_ItemProperty").getText().strip()
-
-        # The rest does not have a stable order. Grab the whole text and reg-ex the date
-        # out of it
-        all_properties_text = properties_parent.getText().strip()
-        modification_date_match = re.search(
-            r"(((\d+\. \w+ \d+)|(Gestern|Yesterday)|(Heute|Today)|(Morgen|Tomorrow)), \d+:\d+)",
-            all_properties_text
-        )
-        if modification_date_match is None:
-            modification_date = None
-            PRETTY.warning(f"Could not extract start date from {all_properties_text!r}")
-        else:
-            modification_date_str = modification_date_match.group(1)
-            modification_date = demangle_date(modification_date_str)
-
-        # Grab the name from the link text
-        name = _sanitize_path_name(link_element.getText())
-        full_path = Path(path, name + "." + file_type)
-
-        return [
-            IliasCrawlerEntry(full_path, url, IliasElementType.REGULAR_FILE, modification_date)
-        ]
-
-    def _crawl_video_directory(self, video_dir_path: Path, url: str) -> List[IliasCrawlerEntry]:
-        """
-        Crawl the video overview site.
-        """
-        initial_soup = self._get_page(url, {})
-
-        # The page is actually emtpy but contains a much needed token in the link below.
-        # That token can be used to fetch the *actual* video listing
-        content_link: bs4.Tag = initial_soup.select_one("#tab_series a")
-        # Fetch the actual video listing. The given parameters return all videos (max 800)
-        # in a standalone html page
-        video_list_soup = self._get_page(
-            self._abs_url_from_link(content_link),
-            {"limit": 800, "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
-        )
-
-        # If we find a page selected, we probably need to respect pagination
-        if self._is_paginated_video_page(video_list_soup):
-            second_stage_url = self._abs_url_from_link(content_link)
-
-            return self._crawl_paginated_video_directory(
-                video_dir_path, video_list_soup, second_stage_url
-            )
-
-        return self._crawl_video_directory_second_stage(video_dir_path, video_list_soup)
-
-    @staticmethod
-    def _is_paginated_video_page(soup: bs4.BeautifulSoup) -> bool:
-        return soup.find(id=re.compile(r"tab_page_sel.+")) is not None
-
-    def _crawl_paginated_video_directory(
-            self,
-            video_dir_path: Path,
-            paged_video_list_soup: bs4.BeautifulSoup,
-            second_stage_url: str
-    ) -> List[IliasCrawlerEntry]:
-        LOGGER.info("Found paginated video page, trying 800 elements")
-
-        # Try to find the table id. This can be used to build the query parameter indicating
-        # you want 800 elements
-
-        table_element: bs4.Tag = paged_video_list_soup.find(
-            name="table", id=re.compile(r"tbl_xoct_.+")
-        )
-        if table_element is None:
-            PRETTY.warning(
-                "Could not increase elements per page (table not found)."
-                " Some might not be crawled!"
-            )
-            return self._crawl_video_directory_second_stage(video_dir_path, paged_video_list_soup)
-
-        match = re.match(r"tbl_xoct_(.+)", table_element.attrs["id"])
-        if match is None:
-            PRETTY.warning(
-                "Could not increase elements per page (table id not found)."
-                " Some might not be crawled!"
-            )
-            return self._crawl_video_directory_second_stage(video_dir_path, paged_video_list_soup)
-        table_id = match.group(1)
-
-        extended_video_page = self._get_page(
-            second_stage_url,
-            {f"tbl_xoct_{table_id}_trows": 800, "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
-        )
-
-        if self._is_paginated_video_page(extended_video_page):
-            PRETTY.warning(
-                "800 elements do not seem to be enough (or I failed to fetch that many)."
-                " I will miss elements."
-            )
-
-        return self._crawl_video_directory_second_stage(video_dir_path, extended_video_page)
-
-    def _crawl_video_directory_second_stage(
-            self,
-            video_dir_path: Path,
-            video_list_soup: bs4.BeautifulSoup
-    ) -> List[IliasCrawlerEntry]:
-        """
-        Crawls the "second stage" video page. This page contains the actual video urls.
-        """
-        direct_download_links: List[bs4.Tag] = video_list_soup.findAll(
-            name="a", text=re.compile(r"\s*Download\s*")
-        )
-
-        # Video start links are marked with an "Abspielen" link
-        video_links: List[bs4.Tag] = video_list_soup.findAll(
-            name="a", text=re.compile(r"\s*Abspielen\s*")
-        )
-
-        results: List[IliasCrawlerEntry] = []
-
-        # We can download everything directly!
-        # FIXME: Sadly the download button is currently broken, so never do that
-        if False and len(direct_download_links) == len(video_links):
-            for link in direct_download_links:
-                results += self._crawl_single_video(video_dir_path, link, True)
-        else:
-            for link in video_links:
-                results += self._crawl_single_video(video_dir_path, link, False)
-
-        return results
-
-    def _crawl_single_video(
-            self,
-            parent_path: Path,
-            link: bs4.Tag,
-            direct_download: bool
-    ) -> List[IliasCrawlerEntry]:
-        """
-        Crawl a single video based on its "Abspielen" link from the video listing.
-        """
-        # The link is part of a table with multiple columns, describing metadata.
-        # 6th child (1 indexed) is the modification time string
-        modification_string = link.parent.parent.parent.select_one(
-            "td.std:nth-child(6)"
-        ).getText().strip()
-        modification_time = datetime.datetime.strptime(modification_string, "%d.%m.%Y - %H:%M")
-
-        title = link.parent.parent.parent.select_one(
-            "td.std:nth-child(3)"
-        ).getText().strip()
-        title += ".mp4"
-
-        video_path: Path = Path(parent_path, _sanitize_path_name(title))
-
-        video_url = self._abs_url_from_link(link)
-
-        # The video had a direct download button we can use instead
-        if direct_download:
-            LOGGER.debug("Using direct download for video %r", str(video_path))
-            return [IliasCrawlerEntry(
-                video_path, video_url, IliasElementType.VIDEO_FILE, modification_time
-            )]
-
-        return [IliasCrawlerEntry(
-            video_path,
-            self._crawl_video_url_from_play_link(video_url),
-            IliasElementType.VIDEO_FILE,
-            modification_time
-        )]
-
-    def _crawl_video_url_from_play_link(self, play_url: str) -> Callable[[], Optional[str]]:
-        def inner() -> Optional[str]:
-            # Fetch the actual video page. This is a small wrapper page initializing a javscript
-            # player. Sadly we can not execute that JS. The actual video stream url is nowhere
-            # on the page, but defined in a JS object inside a script tag, passed to the player
-            # library.
-            # We do the impossible and RegEx the stream JSON object out of the page's HTML source
-            video_page_soup = soupify(self._session.get(play_url))
-            regex: re.Pattern = re.compile(
-                r"({\"streams\"[\s\S]+?),\s*{\"paella_config_file", re.IGNORECASE
-            )
-            json_match = regex.search(str(video_page_soup))
-
-            if json_match is None:
-                PRETTY.warning(f"Could not find json stream info for {play_url!r}")
-                return None
-            json_str = json_match.group(1)
-
-            # parse it
-            json_object = json.loads(json_str)
-            # and fetch the video url!
-            video_url = json_object["streams"][0]["sources"]["mp4"][0]["src"]
-            return video_url
-        return inner
-
-    def _crawl_exercises(self, element_path: Path, url: str) -> List[IliasCrawlerEntry]:
-        """
-        Crawl files offered for download in exercises.
-        """
-        soup = self._get_page(url, {})
-
-        results: List[IliasCrawlerEntry] = []
-
-        # Each assignment is in an accordion container
-        assignment_containers: List[bs4.Tag] = soup.select(".il_VAccordionInnerContainer")
-
-        for container in assignment_containers:
-            # Fetch the container name out of the header to use it in the path
-            container_name = container.select_one(".ilAssignmentHeader").getText().strip()
-            # Find all download links in the container (this will contain all the files)
-            files: List[bs4.Tag] = container.findAll(
-                name="a",
-                # download links contain the given command class
-                attrs={"href": lambda x: x and "cmdClass=ilexsubmissiongui" in x},
-                text="Download"
-            )
-
-            LOGGER.debug("Found exercise container %r", container_name)
-
-            # Grab each file as you now have the link
-            for file_link in files:
-                # Two divs, side by side. Left is the name, right is the link ==> get left
-                # sibling
-                file_name = file_link.parent.findPrevious(name="div").getText().strip()
-                file_name = _sanitize_path_name(file_name)
-                url = self._abs_url_from_link(file_link)
-
-                LOGGER.debug("Found file %r at %r", file_name, url)
-
-                results.append(IliasCrawlerEntry(
-                    Path(element_path, container_name, file_name),
-                    url,
-                    IliasElementType.REGULAR_FILE,
-                    None  # We do not have any timestamp
-                ))
-
-        return results
-
-    @retry_on_io_exception(3, "fetching webpage")
-    def _get_page(self, url: str, params: Dict[str, Any],
-                  retry_count: int = 0) -> bs4.BeautifulSoup:
-        """
-        Fetches a page from ILIAS, authenticating when needed.
-        """
-
-        if retry_count >= 4:
-            raise FatalException("Could not get a proper page after 4 tries. "
-                                 "Maybe your URL is wrong, authentication fails continuously, "
-                                 "your ILIAS connection is spotty or ILIAS is not well.")
-
-        LOGGER.debug("Fetching %r", url)
-
-        response = self._session.get(url, params=params)
-        content_type = response.headers["content-type"]
-
-        if not content_type.startswith("text/html"):
-            raise FatalException(
-                f"Invalid content type {content_type} when crawling ilias page"
-                " {url!r} with {params!r}"
-            )
-
-        soup = soupify(response)
-
-        if self._is_logged_in(soup):
-            return soup
-
-        LOGGER.info("Not authenticated, changing that...")
-
-        self._authenticator.authenticate(self._session)
-
-        return self._get_page(url, params, retry_count + 1)
-
-    @staticmethod
-    def _is_logged_in(soup: bs4.BeautifulSoup) -> bool:
-        # Normal ILIAS pages
-        userlog = soup.find("li", {"id": "userlog"})
-        if userlog is not None:
-            LOGGER.debug("Auth: Found #userlog")
-            return True
-        # Video listing embeds do not have complete ILIAS html. Try to match them by
-        # their video listing table
-        video_table = soup.find(
-            recursive=True,
-            name="table",
-            attrs={"id": lambda x: x is not None and x.startswith("tbl_xoct")}
-        )
-        if video_table is not None:
-            LOGGER.debug("Auth: Found #tbl_xoct.+")
-            return True
-        # The individual video player wrapper page has nothing of the above.
-        # Match it by its playerContainer.
-        if soup.select_one("#playerContainer") is not None:
-            LOGGER.debug("Auth: Found #playerContainer")
-            return True
-        return False
diff --git a/PFERD/ilias/date_demangler.py b/PFERD/ilias/date_demangler.py
deleted file mode 100644
index 2950d4d..0000000
--- a/PFERD/ilias/date_demangler.py
+++ /dev/null
@@ -1,51 +0,0 @@
-"""
-Helper methods to demangle an ILIAS date.
-"""
-
-import datetime
-import locale
-import logging
-import re
-from typing import Optional
-
-from ..logging import PrettyLogger
-
-LOGGER = logging.getLogger(__name__)
-PRETTY = PrettyLogger(LOGGER)
-
-
-def demangle_date(date: str) -> Optional[datetime.datetime]:
-    """
-    Demangle a given date in one of the following formats:
-    "Gestern, HH:MM"
-    "Heute, HH:MM"
-    "Morgen, HH:MM"
-    "dd. mon yyyy, HH:MM
-    """
-    saved = locale.setlocale(locale.LC_ALL)
-    try:
-        try:
-            locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
-        except locale.Error:
-            PRETTY.warning(
-                "Could not set language to german. Assuming you use english everywhere."
-            )
-
-        date = re.sub(r"\s+", " ", date)
-        date = re.sub("Gestern|Yesterday", _yesterday().strftime("%d. %b %Y"), date, re.I)
-        date = re.sub("Heute|Today", datetime.date.today().strftime("%d. %b %Y"), date, re.I)
-        date = re.sub("Morgen|Tomorrow", _tomorrow().strftime("%d. %b %Y"), date, re.I)
-        return datetime.datetime.strptime(date, "%d. %b %Y, %H:%M")
-    except ValueError:
-        PRETTY.warning(f"Could not parse date {date!r}")
-        return None
-    finally:
-        locale.setlocale(locale.LC_ALL, saved)
-
-
-def _yesterday() -> datetime.date:
-    return datetime.date.today() - datetime.timedelta(days=1)
-
-
-def _tomorrow() -> datetime.date:
-    return datetime.date.today() + datetime.timedelta(days=1)
diff --git a/PFERD/ilias/downloader.py b/PFERD/ilias/downloader.py
deleted file mode 100644
index f6132bf..0000000
--- a/PFERD/ilias/downloader.py
+++ /dev/null
@@ -1,173 +0,0 @@
-"""Contains a downloader for ILIAS."""
-
-import datetime
-import logging
-import math
-import os
-from pathlib import Path, PurePath
-from typing import Callable, List, Optional, Union
-
-import bs4
-import requests
-
-from ..errors import retry_on_io_exception
-from ..logging import PrettyLogger
-from ..organizer import Organizer
-from ..tmp_dir import TmpDir
-from ..transform import Transformable
-from ..utils import soupify, stream_to_path
-from .authenticators import IliasAuthenticator
-
-LOGGER = logging.getLogger(__name__)
-PRETTY = PrettyLogger(LOGGER)
-
-
-class ContentTypeException(Exception):
-    """Thrown when the content type of the ilias element can not be handled."""
-
-
-class IliasDownloadInfo(Transformable):
-    """
-    This class describes a single file to be downloaded.
-    """
-
-    def __init__(
-            self,
-            path: PurePath,
-            url: Union[str, Callable[[], Optional[str]]],
-            modifcation_date: Optional[datetime.datetime]
-    ):
-        super().__init__(path)
-        if isinstance(url, str):
-            string_url = url
-            self.url: Callable[[], Optional[str]] = lambda: string_url
-        else:
-            self.url = url
-        self.modification_date = modifcation_date
-
-
-IliasDownloadStrategy = Callable[[Organizer, IliasDownloadInfo], bool]
-
-
-def download_everything(organizer: Organizer, info: IliasDownloadInfo) -> bool:
-    # pylint: disable=unused-argument
-    """
-    Accepts everything.
-    """
-    return True
-
-
-def download_modified_or_new(organizer: Organizer, info: IliasDownloadInfo) -> bool:
-    """
-    Accepts new files or files with a more recent modification date.
-    """
-    resolved_file = organizer.resolve(info.path)
-    if not resolved_file.exists() or info.modification_date is None:
-        return True
-    resolved_mod_time_seconds = resolved_file.stat().st_mtime
-
-    # Download if the info is newer
-    if info.modification_date.timestamp() > resolved_mod_time_seconds:
-        return True
-
-    PRETTY.ignored_file(info.path, "local file has newer or equal modification time")
-    return False
-
-
-class IliasDownloader:
-    # pylint: disable=too-many-arguments
-    """A downloader for ILIAS."""
-
-    def __init__(
-            self,
-            tmp_dir: TmpDir,
-            organizer: Organizer,
-            session: requests.Session,
-            authenticator: IliasAuthenticator,
-            strategy: IliasDownloadStrategy,
-            timeout: int = 5
-    ):
-        """
-        Create a new IliasDownloader.
-
-        The timeout applies to the download request only, as bwcloud uses IPv6
-        and requests has a problem with that: https://github.com/psf/requests/issues/5522
-        """
-
-        self._tmp_dir = tmp_dir
-        self._organizer = organizer
-        self._session = session
-        self._authenticator = authenticator
-        self._strategy = strategy
-        self._timeout = timeout
-
-    def download_all(self, infos: List[IliasDownloadInfo]) -> None:
-        """
-        Download multiple files one after the other.
-        """
-
-        for info in infos:
-            self.download(info)
-
-    def download(self, info: IliasDownloadInfo) -> None:
-        """
-        Download a file from ILIAS.
-
-        Retries authentication until eternity if it could not fetch the file.
-        """
-
-        LOGGER.debug("Downloading %r", info)
-
-        if not self._strategy(self._organizer, info):
-            self._organizer.mark(info.path)
-            return
-
-        tmp_file = self._tmp_dir.new_path()
-
-        @retry_on_io_exception(3, "downloading file")
-        def download_impl() -> bool:
-            if not self._try_download(info, tmp_file):
-                LOGGER.info("Re-Authenticating due to download failure: %r", info)
-                self._authenticator.authenticate(self._session)
-                raise IOError("Scheduled retry")
-            else:
-                return True
-
-        if not download_impl():
-            PRETTY.error(f"Download of file {info.path} failed too often! Skipping it...")
-            return
-
-        dst_path = self._organizer.accept_file(tmp_file, info.path)
-        if dst_path and info.modification_date:
-            os.utime(
-                dst_path,
-                times=(
-                    math.ceil(info.modification_date.timestamp()),
-                    math.ceil(info.modification_date.timestamp())
-                )
-            )
-
-    def _try_download(self, info: IliasDownloadInfo, target: Path) -> bool:
-        url = info.url()
-        if url is None:
-            PRETTY.warning(f"Could not download {str(info.path)!r} as I got no URL :/")
-            return True
-
-        with self._session.get(url, stream=True, timeout=self._timeout) as response:
-            content_type = response.headers["content-type"]
-            has_content_disposition = "content-disposition" in response.headers
-
-            if content_type.startswith("text/html") and not has_content_disposition:
-                if self._is_logged_in(soupify(response)):
-                    raise ContentTypeException("Attempting to download a web page, not a file")
-
-                return False
-
-            # Yay, we got the file :)
-            stream_to_path(response, target, info.path.name)
-            return True
-
-    @staticmethod
-    def _is_logged_in(soup: bs4.BeautifulSoup) -> bool:
-        userlog = soup.find("li", {"id": "userlog"})
-        return userlog is not None
diff --git a/PFERD/ipd.py b/PFERD/ipd.py
deleted file mode 100644
index ece6a97..0000000
--- a/PFERD/ipd.py
+++ /dev/null
@@ -1,154 +0,0 @@
-"""
-Utility functions and a scraper/downloader for the IPD pages.
-"""
-import datetime
-import logging
-import math
-import os
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Callable, List, Optional
-from urllib.parse import urljoin
-
-import bs4
-import requests
-
-from PFERD.errors import FatalException
-from PFERD.utils import soupify
-
-from .logging import PrettyLogger
-from .organizer import Organizer
-from .tmp_dir import TmpDir
-from .transform import Transformable
-from .utils import stream_to_path
-
-LOGGER = logging.getLogger(__name__)
-PRETTY = PrettyLogger(LOGGER)
-
-
-@dataclass
-class IpdDownloadInfo(Transformable):
-    """
-    Information about an ipd entry.
-    """
-    url: str
-    modification_date: Optional[datetime.datetime]
-
-
-IpdDownloadStrategy = Callable[[Organizer, IpdDownloadInfo], bool]
-
-
-def ipd_download_new_or_modified(organizer: Organizer, info: IpdDownloadInfo) -> bool:
-    """
-    Accepts new files or files with a more recent modification date.
-    """
-    resolved_file = organizer.resolve(info.path)
-    if not resolved_file.exists():
-        return True
-    if not info.modification_date:
-        PRETTY.ignored_file(info.path, "could not find modification time, file exists")
-        return False
-
-    resolved_mod_time_seconds = resolved_file.stat().st_mtime
-
-    # Download if the info is newer
-    if info.modification_date.timestamp() > resolved_mod_time_seconds:
-        return True
-
-    PRETTY.ignored_file(info.path, "local file has newer or equal modification time")
-    return False
-
-
-class IpdCrawler:
-    # pylint: disable=too-few-public-methods
-    """
-    A crawler for IPD pages.
-    """
-
-    def __init__(self, base_url: str):
-        self._base_url = base_url
-
-    def _abs_url_from_link(self, link_tag: bs4.Tag) -> str:
-        """
-        Create an absolute url from an <a> tag.
-        """
-        return urljoin(self._base_url, link_tag.get("href"))
-
-    def crawl(self) -> List[IpdDownloadInfo]:
-        """
-        Crawls the playlist given in the constructor.
-        """
-        page = soupify(requests.get(self._base_url))
-
-        items: List[IpdDownloadInfo] = []
-
-        def is_relevant_url(x: str) -> bool:
-            return x.endswith(".pdf") or x.endswith(".c") or x.endswith(".java") or x.endswith(".zip")
-
-        for link in page.findAll(name="a", attrs={"href": lambda x: x and is_relevant_url(x)}):
-            href: str = link.attrs.get("href")
-            name = href.split("/")[-1]
-
-            modification_date: Optional[datetime.datetime] = None
-            try:
-                enclosing_row: bs4.Tag = link.findParent(name="tr")
-                if enclosing_row:
-                    date_text = enclosing_row.find(name="td").text
-                    modification_date = datetime.datetime.strptime(date_text, "%d.%m.%Y")
-            except ValueError:
-                modification_date = None
-
-            items.append(IpdDownloadInfo(
-                Path(name),
-                url=self._abs_url_from_link(link),
-                modification_date=modification_date
-            ))
-
-        return items
-
-
-class IpdDownloader:
-    """
-    A downloader for ipd files.
-    """
-
-    def __init__(self, tmp_dir: TmpDir, organizer: Organizer, strategy: IpdDownloadStrategy):
-        self._tmp_dir = tmp_dir
-        self._organizer = organizer
-        self._strategy = strategy
-        self._session = requests.session()
-
-    def download_all(self, infos: List[IpdDownloadInfo]) -> None:
-        """
-        Download multiple files one after the other.
-        """
-        for info in infos:
-            self.download(info)
-
-    def download(self, info: IpdDownloadInfo) -> None:
-        """
-        Download a single file.
-        """
-        if not self._strategy(self._organizer, info):
-            self._organizer.mark(info.path)
-            return
-
-        with self._session.get(info.url, stream=True) as response:
-            if response.status_code == 200:
-                tmp_file = self._tmp_dir.new_path()
-                stream_to_path(response, tmp_file, info.path.name)
-                dst_path = self._organizer.accept_file(tmp_file, info.path)
-
-                if dst_path and info.modification_date:
-                    os.utime(
-                        dst_path,
-                        times=(
-                            math.ceil(info.modification_date.timestamp()),
-                            math.ceil(info.modification_date.timestamp())
-                        )
-                    )
-
-            elif response.status_code == 403:
-                raise FatalException("Received 403. Are you not using the KIT VPN?")
-            else:
-                PRETTY.warning(f"Could not download file, got response {response.status_code}")
diff --git a/PFERD/location.py b/PFERD/location.py
deleted file mode 100644
index 7f4c8ca..0000000
--- a/PFERD/location.py
+++ /dev/null
@@ -1,41 +0,0 @@
-"""
-Contains a Location class for objects with an inherent path.
-"""
-
-from pathlib import Path, PurePath
-
-
-class ResolveException(Exception):
-    """An exception while resolving a file."""
-    # TODO take care of this when doing exception handling
-
-
-class Location:
-    """
-    An object that has an inherent path.
-    """
-
-    def __init__(self, path: Path):
-        self._path = path.resolve()
-
-    @property
-    def path(self) -> Path:
-        """
-        This object's location.
-        """
-
-        return self._path
-
-    def resolve(self, target: PurePath) -> Path:
-        """
-        Resolve a file relative to the path of this location.
-
-        Raises a [ResolveException] if the file is outside the given directory.
-        """
-        absolute_path = self.path.joinpath(target).resolve()
-
-        # TODO Make this less inefficient
-        if self.path not in absolute_path.parents:
-            raise ResolveException(f"Path {target} is not inside directory {self.path}")
-
-        return absolute_path
diff --git a/PFERD/logging.py b/PFERD/logging.py
deleted file mode 100644
index c25019e..0000000
--- a/PFERD/logging.py
+++ /dev/null
@@ -1,184 +0,0 @@
-"""
-Contains a few logger utility functions and implementations.
-"""
-
-import logging
-from typing import Optional
-
-from rich._log_render import LogRender
-from rich.console import Console
-from rich.style import Style
-from rich.text import Text
-from rich.theme import Theme
-
-from .download_summary import DownloadSummary
-from .utils import PathLike, to_path
-
-STYLE = "{"
-FORMAT = "[{levelname:<7}] {message}"
-DATE_FORMAT = "%F %T"
-
-
-def enable_logging(name: str = "PFERD", level: int = logging.INFO) -> None:
-    """
-    Enable and configure logging via the logging module.
-    """
-
-    logger = logging.getLogger(name)
-    logger.setLevel(level)
-    logger.addHandler(RichLoggingHandler(level=level))
-
-    # This should be logged by our own handler, and not the root logger's
-    # default handler, so we don't pass it on to the root logger.
-    logger.propagate = False
-
-
-class RichLoggingHandler(logging.Handler):
-    """
-    A logging handler that uses rich for highlighting
-    """
-
-    def __init__(self, level: int) -> None:
-        super().__init__(level=level)
-        self.console = Console(theme=Theme({
-            "logging.level.warning": Style(color="yellow")
-        }))
-        self._log_render = LogRender(show_level=True, show_time=False, show_path=False)
-
-    def emit(self, record: logging.LogRecord) -> None:
-        """
-        Invoked by logging.
-        """
-        log_style = f"logging.level.{record.levelname.lower()}"
-        message = self.format(record)
-
-        level = Text()
-        level.append(record.levelname, log_style)
-        message_text = Text.from_markup(message)
-
-        self.console.print(
-            self._log_render(
-                self.console,
-                [message_text],
-                level=level,
-            )
-        )
-
-
-class PrettyLogger:
-    """
-    A logger that prints some specially formatted log messages in color.
-    """
-
-    def __init__(self, logger: logging.Logger) -> None:
-        self.logger = logger
-
-    @staticmethod
-    def _format_path(path: PathLike) -> str:
-        return repr(str(to_path(path)))
-
-    def error(self, message: str) -> None:
-        """
-        Print an error message indicating some operation fatally failed.
-        """
-        self.logger.error(
-            f"[bold red]{message}[/bold red]"
-        )
-
-    def warning(self, message: str) -> None:
-        """
-        Print a warning message indicating some operation failed, but the error can be recovered
-        or ignored.
-        """
-        self.logger.warning(
-            f"[bold yellow]{message}[/bold yellow]"
-        )
-
-    def modified_file(self, path: PathLike) -> None:
-        """
-        An existing file has changed.
-        """
-
-        self.logger.info(
-            f"[bold magenta]Modified {self._format_path(path)}.[/bold magenta]"
-        )
-
-    def new_file(self, path: PathLike) -> None:
-        """
-        A new file has been downloaded.
-        """
-
-        self.logger.info(
-            f"[bold green]Created {self._format_path(path)}.[/bold green]"
-        )
-
-    def deleted_file(self, path: PathLike) -> None:
-        """
-        A file has been deleted.
-        """
-
-        self.logger.info(
-            f"[bold red]Deleted {self._format_path(path)}.[/bold red]"
-        )
-
-    def ignored_file(self, path: PathLike, reason: str) -> None:
-        """
-        File was not downloaded or modified.
-        """
-
-        self.logger.info(
-            f"[dim]Ignored {self._format_path(path)} "
-            f"([/dim]{reason}[dim]).[/dim]"
-        )
-
-    def searching(self, path: PathLike) -> None:
-        """
-        A crawler searches a particular object.
-        """
-
-        self.logger.info(f"Searching {self._format_path(path)}")
-
-    def not_searching(self, path: PathLike, reason: str) -> None:
-        """
-        A crawler does not search a particular object.
-        """
-
-        self.logger.info(
-            f"[dim]Not searching {self._format_path(path)} "
-            f"([/dim]{reason}[dim]).[/dim]"
-        )
-
-    def summary(self, download_summary: DownloadSummary) -> None:
-        """
-        Prints a download summary.
-        """
-        self.logger.info("")
-        self.logger.info("[bold cyan]Download Summary[/bold cyan]")
-        if not download_summary.has_updates():
-            self.logger.info("[bold dim]Nothing changed![/bold dim]")
-            return
-
-        for new_file in download_summary.new_files:
-            self.new_file(new_file)
-        for modified_file in download_summary.modified_files:
-            self.modified_file(modified_file)
-        for deleted_files in download_summary.deleted_files:
-            self.deleted_file(deleted_files)
-
-    def starting_synchronizer(
-            self,
-            target_directory: PathLike,
-            synchronizer_name: str,
-            subject: Optional[str] = None,
-    ) -> None:
-        """
-        A special message marking that a synchronizer has been started.
-        """
-
-        subject_str = f"{subject} " if subject else ""
-        self.logger.info("")
-        self.logger.info((
-            f"[bold cyan]Synchronizing "
-            f"{subject_str}to {self._format_path(target_directory)} "
-            f"using the {synchronizer_name} synchronizer.[/bold cyan]"
-        ))
diff --git a/PFERD/organizer.py b/PFERD/organizer.py
deleted file mode 100644
index fe5052b..0000000
--- a/PFERD/organizer.py
+++ /dev/null
@@ -1,224 +0,0 @@
-"""A simple helper for managing downloaded files.
-
-A organizer is bound to a single directory.
-"""
-
-import filecmp
-import logging
-import os
-import shutil
-from enum import Enum
-from pathlib import Path, PurePath
-from typing import Callable, List, Optional, Set
-
-from .download_summary import DownloadSummary
-from .location import Location
-from .logging import PrettyLogger
-from .utils import prompt_yes_no
-
-LOGGER = logging.getLogger(__name__)
-PRETTY = PrettyLogger(LOGGER)
-
-
-class ConflictType(Enum):
-    """
-    The type of the conflict. A file might not exist anymore and will be deleted
-    or it might be overwritten with a newer version.
-
-    FILE_OVERWRITTEN: An existing file will be updated
-    MARKED_FILE_OVERWRITTEN: A file is written for the second+ time in this run
-    FILE_DELETED: The file was deleted
-    """
-    FILE_OVERWRITTEN = "overwritten"
-    MARKED_FILE_OVERWRITTEN = "marked_file_overwritten"
-    FILE_DELETED = "deleted"
-
-
-class FileConflictResolution(Enum):
-    """
-    The reaction when confronted with a file conflict:
-
-    DESTROY_EXISTING: Delete/overwrite the current file
-    KEEP_EXISTING: Keep the current file
-    DEFAULT: Do whatever the PFERD authors thought is sensible
-    PROMPT: Interactively ask the user
-    """
-
-    DESTROY_EXISTING = "destroy"
-
-    KEEP_EXISTING = "keep"
-
-    DEFAULT = "default"
-
-    PROMPT = "prompt"
-
-
-FileConflictResolver = Callable[[PurePath, ConflictType], FileConflictResolution]
-
-
-def resolve_prompt_user(_path: PurePath, conflict: ConflictType) -> FileConflictResolution:
-    """
-    Resolves conflicts by asking the user if a file was written twice or will be deleted.
-    """
-    if conflict == ConflictType.FILE_OVERWRITTEN:
-        return FileConflictResolution.DESTROY_EXISTING
-    return FileConflictResolution.PROMPT
-
-
-class FileAcceptException(Exception):
-    """An exception while accepting a file."""
-
-
-class Organizer(Location):
-    """A helper for managing downloaded files."""
-
-    def __init__(self, path: Path, conflict_resolver: FileConflictResolver = resolve_prompt_user):
-        """Create a new organizer for a given path."""
-        super().__init__(path)
-        self._known_files: Set[Path] = set()
-
-        # Keep the root dir
-        self._known_files.add(path.resolve())
-
-        self.download_summary = DownloadSummary()
-
-        self.conflict_resolver = conflict_resolver
-
-    def accept_file(self, src: Path, dst: PurePath) -> Optional[Path]:
-        """
-        Move a file to this organizer and mark it.
-
-        Returns the path the file was moved to, to allow the caller to adjust the metadata.
-        As you might still need to adjust the metadata when the file was identical
-        (e.g. update the timestamp), the path is also returned in this case.
-        In all other cases (ignored, not overwritten, etc.) this method returns None.
-        """
-        # Windows limits the path length to 260 for *some* historical reason
-        # If you want longer paths, you will have to add the "\\?\" prefix in front of
-        # your path...
-        # See:
-        # https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file#maximum-path-length-limitation
-        if os.name == 'nt':
-            src_absolute = Path("\\\\?\\" + str(src.resolve()))
-            dst_absolute = Path("\\\\?\\" + str(self.resolve(dst)))
-        else:
-            src_absolute = src.resolve()
-            dst_absolute = self.resolve(dst)
-
-        if not src_absolute.exists():
-            raise FileAcceptException("Source file does not exist")
-
-        if not src_absolute.is_file():
-            raise FileAcceptException("Source is a directory")
-
-        LOGGER.debug("Copying %s to %s", src_absolute, dst_absolute)
-
-        if self._is_marked(dst):
-            PRETTY.warning(f"File {str(dst_absolute)!r} was already written!")
-            conflict = ConflictType.MARKED_FILE_OVERWRITTEN
-            if self._resolve_conflict("Overwrite file?", dst_absolute, conflict, default=False):
-                PRETTY.ignored_file(dst_absolute, "file was written previously")
-                return None
-
-        # Destination file is directory
-        if dst_absolute.exists() and dst_absolute.is_dir():
-            prompt = f"Overwrite folder {dst_absolute} with file?"
-            conflict = ConflictType.FILE_OVERWRITTEN
-            if self._resolve_conflict(prompt, dst_absolute, conflict, default=False):
-                shutil.rmtree(dst_absolute)
-            else:
-                PRETTY.warning(f"Could not add file {str(dst_absolute)!r}")
-                return None
-
-        # Destination file exists
-        if dst_absolute.exists() and dst_absolute.is_file():
-            if filecmp.cmp(str(src_absolute), str(dst_absolute), shallow=False):
-                # Bail out, nothing more to do
-                PRETTY.ignored_file(dst_absolute, "same file contents")
-                self.mark(dst)
-                return dst_absolute
-
-            prompt = f"Overwrite file {dst_absolute}?"
-            conflict = ConflictType.FILE_OVERWRITTEN
-            if not self._resolve_conflict(prompt, dst_absolute, conflict, default=True):
-                PRETTY.ignored_file(dst_absolute, "user conflict resolution")
-                return None
-
-            self.download_summary.add_modified_file(dst_absolute)
-            PRETTY.modified_file(dst_absolute)
-        else:
-            self.download_summary.add_new_file(dst_absolute)
-            PRETTY.new_file(dst_absolute)
-
-        # Create parent dir if needed
-        dst_parent_dir: Path = dst_absolute.parent
-        dst_parent_dir.mkdir(exist_ok=True, parents=True)
-
-        # Move file
-        shutil.move(str(src_absolute), str(dst_absolute))
-
-        self.mark(dst)
-
-        return dst_absolute
-
-    def mark(self, path: PurePath) -> None:
-        """Mark a file as used so it will not get cleaned up."""
-        absolute_path = self.resolve(path)
-        self._known_files.add(absolute_path)
-        LOGGER.debug("Tracked %s", absolute_path)
-
-    def _is_marked(self, path: PurePath) -> bool:
-        """
-        Checks whether a file is marked.
-        """
-        absolute_path = self.resolve(path)
-        return absolute_path in self._known_files
-
-    def cleanup(self) -> None:
-        """Remove all untracked files in the organizer's dir."""
-        LOGGER.debug("Deleting all untracked files...")
-
-        self._cleanup(self.path)
-
-    def _cleanup(self, start_dir: Path) -> None:
-        if not start_dir.exists():
-            return
-        paths: List[Path] = list(start_dir.iterdir())
-
-        # Recursively clean paths
-        for path in paths:
-            if path.is_dir():
-                self._cleanup(path)
-            else:
-                if path.resolve() not in self._known_files:
-                    self._delete_file_if_confirmed(path)
-
-        # Delete dir if it was empty and untracked
-        dir_empty = len(list(start_dir.iterdir())) == 0
-        if start_dir.resolve() not in self._known_files and dir_empty:
-            start_dir.rmdir()
-
-    def _delete_file_if_confirmed(self, path: Path) -> None:
-        prompt = f"Do you want to delete {path}"
-
-        if self._resolve_conflict(prompt, path, ConflictType.FILE_DELETED, default=False):
-            self.download_summary.add_deleted_file(path)
-            path.unlink()
-        else:
-            PRETTY.ignored_file(path, "user conflict resolution")
-
-    def _resolve_conflict(
-            self, prompt: str, path: Path, conflict: ConflictType, default: bool
-    ) -> bool:
-        if not self.conflict_resolver:
-            return prompt_yes_no(prompt, default=default)
-
-        result = self.conflict_resolver(path, conflict)
-        if result == FileConflictResolution.DEFAULT:
-            return default
-        if result == FileConflictResolution.KEEP_EXISTING:
-            return False
-        if result == FileConflictResolution.DESTROY_EXISTING:
-            return True
-
-        return prompt_yes_no(prompt, default=default)
diff --git a/PFERD/progress.py b/PFERD/progress.py
deleted file mode 100644
index 6ad098f..0000000
--- a/PFERD/progress.py
+++ /dev/null
@@ -1,111 +0,0 @@
-"""
-A small progress bar implementation.
-"""
-import sys
-from dataclasses import dataclass
-from types import TracebackType
-from typing import Optional, Type
-
-import requests
-from rich.console import Console
-from rich.progress import (BarColumn, DownloadColumn, Progress, TaskID,
-                           TextColumn, TimeRemainingColumn,
-                           TransferSpeedColumn)
-
-_progress: Progress = Progress(
-    TextColumn("[bold blue]{task.fields[name]}", justify="right"),
-    BarColumn(bar_width=None),
-    "[progress.percentage]{task.percentage:>3.1f}%",
-    "•",
-    DownloadColumn(),
-    "•",
-    TransferSpeedColumn(),
-    "•",
-    TimeRemainingColumn(),
-    console=Console(file=sys.stdout),
-    transient=True
-)
-
-
-def size_from_headers(response: requests.Response) -> Optional[int]:
-    """
-    Return the size of the download based on the response headers.
-
-    Arguments:
-        response {requests.Response} -- the response
-
-    Returns:
-        Optional[int] -- the size
-    """
-    if "Content-Length" in response.headers:
-        return int(response.headers["Content-Length"])
-    return None
-
-
-@dataclass
-class ProgressSettings:
-    """
-    Settings you can pass to customize the progress bar.
-    """
-    name: str
-    max_size: int
-
-
-def progress_for(settings: Optional[ProgressSettings]) -> 'ProgressContextManager':
-    """
-    Returns a context manager that displays progress
-
-    Returns:
-        ProgressContextManager -- the progress manager
-    """
-    return ProgressContextManager(settings)
-
-
-class ProgressContextManager:
-    """
-    A context manager used for displaying progress.
-    """
-
-    def __init__(self, settings: Optional[ProgressSettings]):
-        self._settings = settings
-        self._task_id: Optional[TaskID] = None
-
-    def __enter__(self) -> 'ProgressContextManager':
-        """Context manager entry function."""
-        if not self._settings:
-            return self
-
-        _progress.start()
-        self._task_id = _progress.add_task(
-            self._settings.name,
-            total=self._settings.max_size,
-            name=self._settings.name
-        )
-        return self
-
-    # pylint: disable=useless-return
-    def __exit__(
-            self,
-            exc_type: Optional[Type[BaseException]],
-            exc_value: Optional[BaseException],
-            traceback: Optional[TracebackType],
-    ) -> Optional[bool]:
-        """Context manager exit function. Removes the task."""
-        if self._task_id is None:
-            return None
-
-        _progress.remove_task(self._task_id)
-
-        if len(_progress.task_ids) == 0:
-            # We need to clean up after ourselves, as we were the last one
-            _progress.stop()
-            _progress.refresh()
-
-        return None
-
-    def advance(self, amount: float) -> None:
-        """
-        Advances the progress bar.
-        """
-        if self._task_id is not None:
-            _progress.advance(self._task_id, amount)
diff --git a/PFERD/tmp_dir.py b/PFERD/tmp_dir.py
deleted file mode 100644
index 51ade2d..0000000
--- a/PFERD/tmp_dir.py
+++ /dev/null
@@ -1,79 +0,0 @@
-"""Helper functions and classes for temporary folders."""
-
-import logging
-import shutil
-from pathlib import Path
-from types import TracebackType
-from typing import Optional, Type
-
-from .location import Location
-
-LOGGER = logging.getLogger(__name__)
-
-
-class TmpDir(Location):
-    """A temporary folder that can create files or nested temp folders."""
-
-    def __init__(self, path: Path):
-        """Create a new temporary folder for the given path."""
-        super().__init__(path)
-        self._counter = 0
-        self.cleanup()
-        self.path.mkdir(parents=True, exist_ok=True)
-
-    def __str__(self) -> str:
-        """Format the folder as a string."""
-        return f"Folder at {self.path}"
-
-    def __enter__(self) -> 'TmpDir':
-        """Context manager entry function."""
-        return self
-
-    # pylint: disable=useless-return
-    def __exit__(
-            self,
-            exc_type: Optional[Type[BaseException]],
-            exc_value: Optional[BaseException],
-            traceback: Optional[TracebackType],
-    ) -> Optional[bool]:
-        """Context manager exit function. Calls cleanup()."""
-        self.cleanup()
-        return None
-
-    def new_path(self, prefix: Optional[str] = None) -> Path:
-        """
-        Return a unique path inside the directory. Doesn't create a file or
-        directory.
-        """
-
-        name = f"{prefix if prefix else 'tmp'}-{self._inc_and_get_counter():03}"
-
-        LOGGER.debug("Creating temp file %s", name)
-
-        return self.resolve(Path(name))
-
-    def new_subdir(self, prefix: Optional[str] = None) -> 'TmpDir':
-        """
-        Create a new nested temporary folder and return it.
-        """
-
-        name = f"{prefix if prefix else 'tmp'}-{self._inc_and_get_counter():03}"
-        sub_path = self.resolve(Path(name))
-        sub_path.mkdir(parents=True)
-
-        LOGGER.debug("Creating temp dir %s at %s", name, sub_path)
-
-        return TmpDir(sub_path)
-
-    def cleanup(self) -> None:
-        """Delete this folder and all contained files."""
-        LOGGER.debug("Deleting temp folder %s", self.path)
-
-        if self.path.resolve().exists():
-            shutil.rmtree(self.path.resolve())
-
-    def _inc_and_get_counter(self) -> int:
-        """Get and increment the counter by one."""
-        counter = self._counter
-        self._counter += 1
-        return counter
diff --git a/PFERD/transform.py b/PFERD/transform.py
deleted file mode 100644
index a2152ba..0000000
--- a/PFERD/transform.py
+++ /dev/null
@@ -1,142 +0,0 @@
-"""
-Transforms let the user define functions to decide where the downloaded files
-should be placed locally. They let the user do more advanced things like moving
-only files whose names match a regex, or renaming files from one numbering
-scheme to another.
-"""
-
-import os
-import re
-from dataclasses import dataclass
-from pathlib import PurePath
-from typing import Callable, List, Optional, TypeVar
-
-from .utils import PathLike, Regex, to_path, to_pattern
-
-Transform = Callable[[PurePath], Optional[PurePath]]
-
-
-@dataclass
-class Transformable:
-    """
-    An object that can be transformed by a Transform.
-    """
-
-    path: PurePath
-
-
-TF = TypeVar("TF", bound=Transformable)
-
-
-def apply_transform(
-        transform: Transform,
-        transformables: List[TF],
-) -> List[TF]:
-    """
-    Apply a Transform to multiple Transformables, discarding those that were
-    not transformed by the Transform.
-    """
-
-    result: List[TF] = []
-    for transformable in transformables:
-        new_path = transform(transformable.path)
-        if new_path:
-            transformable.path = new_path
-            result.append(transformable)
-    return result
-
-# Transform combinators
-
-def keep(path: PurePath) -> Optional[PurePath]:
-    return path
-
-def attempt(*args: Transform) -> Transform:
-    def inner(path: PurePath) -> Optional[PurePath]:
-        for transform in args:
-            result = transform(path)
-            if result:
-                return result
-        return None
-    return inner
-
-def optionally(transform: Transform) -> Transform:
-    return attempt(transform, lambda path: path)
-
-def do(*args: Transform) -> Transform:
-    def inner(path: PurePath) -> Optional[PurePath]:
-        current = path
-        for transform in args:
-            result = transform(current)
-            if result:
-                current = result
-            else:
-                return None
-        return current
-    return inner
-
-def predicate(pred: Callable[[PurePath], bool]) -> Transform:
-    def inner(path: PurePath) -> Optional[PurePath]:
-        if pred(path):
-            return path
-        return None
-    return inner
-
-def glob(pattern: str) -> Transform:
-    return predicate(lambda path: path.match(pattern))
-
-def move_dir(source_dir: PathLike, target_dir: PathLike) -> Transform:
-    source_path = to_path(source_dir)
-    target_path = to_path(target_dir)
-    def inner(path: PurePath) -> Optional[PurePath]:
-        if source_path in path.parents:
-            return target_path / path.relative_to(source_path)
-        return None
-    return inner
-
-def move(source: PathLike, target: PathLike) -> Transform:
-    source_path = to_path(source)
-    target_path = to_path(target)
-    def inner(path: PurePath) -> Optional[PurePath]:
-        if path == source_path:
-            return target_path
-        return None
-    return inner
-
-def rename(source: str, target: str) -> Transform:
-    def inner(path: PurePath) -> Optional[PurePath]:
-        if path.name == source:
-            return path.with_name(target)
-        return None
-    return inner
-
-def re_move(regex: Regex, target: str) -> Transform:
-    def inner(path: PurePath) -> Optional[PurePath]:
-        match = to_pattern(regex).fullmatch(str(path))
-        if match:
-            groups = [match.group(0)]
-            groups.extend(match.groups())
-            return PurePath(target.format(*groups))
-        return None
-    return inner
-
-def re_rename(regex: Regex, target: str) -> Transform:
-    def inner(path: PurePath) -> Optional[PurePath]:
-        match = to_pattern(regex).fullmatch(path.name)
-        if match:
-            groups = [match.group(0)]
-            groups.extend(match.groups())
-            return path.with_name(target.format(*groups))
-        return None
-    return inner
-
-
-def sanitize_windows_path(path: PurePath) -> PurePath:
-    """
-    A small function to escape characters that are forbidden in windows path names.
-    This method is a no-op on other operating systems.
-    """
-    # Escape windows illegal path characters
-    if os.name == 'nt':
-        sanitized_parts = [re.sub(r'[<>:"/|?]', "_", x) for x in list(path.parts)]
-        return PurePath(*sanitized_parts)
-    return path

From 0c9167512c7345a54c60f493fd574a56c43800e1 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 14 May 2021 21:28:38 +0200
Subject: [PATCH 108/524] Fix output dir

I missed these while renaming the resolve function. Shame on me for not running
mypy earlier.
---
 PFERD/output_dir.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 1be9a16..89c5839 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -86,7 +86,7 @@ class OutputDirectory:
 
         self._report = Report()
 
-    def register_reserved(self, path: PurePath):
+    def register_reserved(self, path: PurePath) -> None:
         self._report.mark_reserved(path)
 
     def _mark(self, path: PurePath) -> None:
@@ -265,7 +265,7 @@ class OutputDirectory:
         heuristics = Heuristics(mtime)
         redownload = self._redownload if redownload is None else redownload
         on_conflict = self._on_conflict if on_conflict is None else on_conflict
-        local_path = self._resolve(path)
+        local_path = self.resolve(path)
 
         self._mark(path)
 
@@ -281,7 +281,7 @@ class OutputDirectory:
 
         # Detect and solve local-file-remote-dir conflict
         for parent in path.parents:
-            local_parent = self._resolve(parent)
+            local_parent = self.resolve(parent)
             if local_parent.exists() and not local_parent.is_dir():
                 if await self._conflict_lfrd(on_conflict, path, parent):
                     local_parent.unlink()

From 1591cb9197e3e5e6b8b11a572543aa231d8a2653 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 14 May 2021 21:41:24 +0200
Subject: [PATCH 109/524] Add options to slow down local crawler

These options are meant to make the local crawler behave more like a
network-based crawler for purposes of testing and debugging other parts of the
code base.
---
 CONFIG.md               | 12 ++++++--
 PFERD/conductor.py      |  1 +
 PFERD/crawlers/local.py | 61 +++++++++++++++++++++++++++++++++++++----
 3 files changed, 65 insertions(+), 9 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index 92c36ae..2cac906 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -102,9 +102,15 @@ authenticators is `type`:
 ### The `local` crawler
 
 This crawler crawls a local directory. It is really simple and mostly useful for
-testing different setups.
+testing different setups. The various delay options are meant to make the
+crawler simulate a slower, network-based crawler.
 
 - `path`: Path to the local directory to crawl. (Required)
+- `crawl_delay`: Maximum artificial delay (in seconds) to simulate for crawl
+  requests. (Optional)
+- `download_delay`: Maximum artificial delay (in seconds) to simulate for
+  download requests. (Optional)
+- `download_speed`: Download speed (in bytes per second) to simulate. (Optional)
 
 ## Authenticator types
 
@@ -114,8 +120,8 @@ With this authenticator, the username and password can be set directly in the
 config file. If the username or password are not specified, the user is prompted
 via the terminal.
 
-- `username`: The username (Optional)
-- `password`: The password (Optional)
+- `username`: The username. (Optional)
+- `password`: The password. (Optional)
 
 ## Transformation rules
 
diff --git a/PFERD/conductor.py b/PFERD/conductor.py
index 5022a22..d50574e 100644
--- a/PFERD/conductor.py
+++ b/PFERD/conductor.py
@@ -17,6 +17,7 @@ class ProgressBar:
 
     def set_total(self, total: float) -> None:
         self._progress.update(self._taskid, total=total)
+        self._progress.start_task(self._taskid)
 
 
 class TerminalConductor:
diff --git a/PFERD/crawlers/local.py b/PFERD/crawlers/local.py
index fb08cc9..1677ff0 100644
--- a/PFERD/crawlers/local.py
+++ b/PFERD/crawlers/local.py
@@ -1,6 +1,8 @@
 import asyncio
 import datetime
+import random
 from pathlib import Path, PurePath
+from typing import Optional
 
 from ..conductor import TerminalConductor
 from ..config import Config
@@ -14,6 +16,24 @@ class LocalCrawlerSection(CrawlerSection):
             self.missing_value("path")
         return Path(value).expanduser()
 
+    def crawl_delay(self) -> Optional[float]:
+        value = self.s.getfloat("crawl_delay")
+        if value <= 0:
+            self.invalid_value("crawl_delay", value)
+        return value
+
+    def download_delay(self) -> Optional[float]:
+        value = self.s.getfloat("download_delay")
+        if value <= 0:
+            self.invalid_value("download_delay", value)
+        return value
+
+    def download_speed(self) -> Optional[int]:
+        value = self.s.getint("download_speed")
+        if value <= 0:
+            self.invalid_value("download_speed", value)
+        return value
+
 
 class LocalCrawler(Crawler):
     def __init__(
@@ -26,6 +46,14 @@ class LocalCrawler(Crawler):
         super().__init__(name, section, config, conductor)
 
         self._path = config.working_dir / section.path()
+        self._crawl_delay = section.crawl_delay()
+        self._download_delay = section.download_delay()
+        self._download_speed = section.download_speed()
+
+        if self._download_speed:
+            self._block_size = self._download_speed // 10
+        else:
+            self._block_size = 1024**2  # 1 MiB
 
     async def crawl(self) -> None:
         await self._crawl_path(self._path, PurePath())
@@ -41,28 +69,49 @@ class LocalCrawler(Crawler):
 
     async def _crawl_dir(self, path: Path, pure: PurePath) -> None:
         tasks = []
+
         async with self.crawl_bar(pure):
+            if self._crawl_delay:
+                await asyncio.sleep(random.uniform(
+                    0.5 * self._crawl_delay,
+                    self._crawl_delay,
+                ))
+
             for child in path.iterdir():
                 pure_child = pure / child.name
                 tasks.append(self._crawl_path(child, pure_child))
+
         await asyncio.gather(*tasks)
 
     async def _crawl_file(self, path: Path, pure: PurePath) -> None:
+        stat = path.stat()
+        mtime = datetime.datetime.fromtimestamp(stat.st_mtime)
+        dl = await self.download(pure, mtime=mtime)
+        if not dl:
+            return
+
         async with self.download_bar(path) as bar:
-            stat = path.stat()
-            mtime = datetime.datetime.fromtimestamp(stat.st_mtime)
-            dl = await self.download(pure, mtime=mtime)
-            if not dl:
-                return
+            if self._download_delay:
+                await asyncio.sleep(random.uniform(
+                    0.5 * self._download_delay,
+                    self._download_delay,
+                ))
 
             bar.set_total(stat.st_size)
 
             async with dl as sink:
                 with open(path, "rb") as f:
                     while True:
-                        data = f.read(1024**2)
+                        data = f.read(self._block_size)
                         if len(data) == 0:
                             break
+
                         sink.file.write(data)
                         bar.advance(len(data))
+
+                        if self._download_speed:
+                            delay = self._block_size / self._download_speed
+                            delay = random.uniform(0.8 * delay, 1.2 * delay)
+                            await asyncio.sleep(delay)
+
                     sink.done()

From 296a169dd30e68a679624b2a53ef516281a51a0d Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 15 May 2021 00:38:46 +0200
Subject: [PATCH 110/524] Make limiter logic more complex

The limiter can now distinguish between crawl and download actions and has a
fancy slot system and delay logic.
---
 CONFIG.md        | 11 ++++++++
 PFERD/config.py  | 12 +++++++--
 PFERD/crawler.py | 65 +++++++++++++++++++++++++++++++++---------------
 PFERD/limiter.py | 65 ++++++++++++++++++++++++++++++++++++++++++++----
 4 files changed, 126 insertions(+), 27 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index 2cac906..a74eef3 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -64,6 +64,17 @@ crawlers:
       remote file is different.
 - `transform`: Rules for renaming and excluding certain files and directories.
   For more details, see [this section](#transformation-rules). (Default: empty)
+- `max_concurrent_crawls`: The maximum number of concurrent crawl actions. What
+  constitutes a crawl action might vary from crawler to crawler, but it usually
+  means an HTTP request of a page to analyze. (Default: 1)
+- `max_concurrent_downloads`: The maximum number of concurrent download actions.
+  What constitutes a download action might vary from crawler to crawler, but it
+  usually means an HTTP request for a single file. (Default: 1)
+- `request_delay`: Time (in seconds) that the crawler should wait between
+  subsequent requests. Can be used to avoid unnecessary strain for the crawl
+  target. Crawl and download actions are handled separately, meaning that a
+  download action might immediately follow a crawl action even if this is set to
+  a nonzero value. (Default: 0)
 
 Some crawlers may also require credentials for authentication. To configure how
 the crawler obtains its credentials, the `auth` option is used. It is set to the
diff --git a/PFERD/config.py b/PFERD/config.py
index 56ea9af..0520f74 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -33,8 +33,16 @@ class Section:
     def error(self, key: str, desc: str) -> NoReturn:
         raise ConfigFormatException(self.s.name, key, desc)
 
-    def invalid_value(self, key: str, value: Any) -> NoReturn:
-        self.error(key, f"Invalid value: {value!r}")
+    def invalid_value(
+            self,
+            key: str,
+            value: Any,
+            reason: Optional[str],
+    ) -> NoReturn:
+        if reason is None:
+            self.error(key, f"Invalid value {value!r}")
+        else:
+            self.error(key, f"Invalid value {value!r}: {reason}")
 
     def missing_value(self, key: str) -> NoReturn:
         self.error(key, "Missing value")
diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index ece62c1..f506294 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -139,6 +139,28 @@ class CrawlerSection(Section):
     def transform(self) -> str:
         return self.s.get("transform", "")
 
+    def max_concurrent_crawls(self) -> int:
+        value = self.s.getint("max_concurrent_crawls", fallback=1)
+        if value <= 0:
+            self.invalid_value("max_concurrent_crawls", value,
+                               "Must be greater than 0")
+        return value
+
+    def max_concurrent_downloads(self) -> int:
+        value = self.s.getint("max_concurrent_downloads", fallback=1)
+
+        if value <= 0:
+            self.invalid_value("max_concurrent_downloads", value,
+                               "Must be greater than 0")
+        return value
+
+    def request_delay(self) -> float:
+        value = self.s.getfloat("request_delay", fallback=0.0)
+        if value < 0:
+            self.invalid_value("request_delay", value,
+                               "Must be greater than or equal to 0")
+        return value
+
     def auth(self, authenticators: Dict[str, Authenticator]) -> Authenticator:
         value = self.s.get("auth")
         if value is None:
@@ -168,9 +190,14 @@ class Crawler(ABC):
 
         self.name = name
         self._conductor = conductor
-        self._limiter = Limiter()
         self.error_free = True
 
+        self._limiter = Limiter(
+            crawl_limit=section.max_concurrent_crawls(),
+            download_limit=section.max_concurrent_downloads(),
+            delay=section.request_delay(),
+        )
+
         try:
             self._transformer = Transformer(section.transform())
         except RuleParseException as e:
@@ -210,28 +237,26 @@ class Crawler(ABC):
         return self._conductor.exclusive_output()
 
     @asynccontextmanager
-    async def progress_bar(
-            self,
-            desc: str,
-            total: Optional[int] = None,
-    ) -> AsyncIterator[ProgressBar]:
-        async with self._limiter.limit():
-            with self._conductor.progress_bar(desc, total=total) as bar:
-                yield bar
-
-    def crawl_bar(self, path: PurePath) -> AsyncContextManager[ProgressBar]:
-        pathstr = escape(str(path))
-        desc = f"[bold magenta]Crawling[/bold magenta] {pathstr}"
-        return self.progress_bar(desc)
-
-    def download_bar(
+    async def crawl_bar(
             self,
             path: PurePath,
             total: Optional[int] = None,
-    ) -> AsyncContextManager[ProgressBar]:
-        pathstr = escape(str(path))
-        desc = f"[bold green]Downloading[/bold green] {pathstr}"
-        return self.progress_bar(desc, total=total)
+    ) -> AsyncIterator[ProgressBar]:
+        desc = f"[bold bright_cyan]Crawling[/] {escape(str(path))}"
+        async with self._limiter.limit_crawl():
+            with self._conductor.progress_bar(desc, total=total) as bar:
+                yield bar
+
+    @asynccontextmanager
+    async def download_bar(
+            self,
+            path: PurePath,
+            total: Optional[int] = None,
+    ) -> AsyncIterator[ProgressBar]:
+        desc = f"[bold bright_cyan]Downloading[/] {escape(str(path))}"
+        async with self._limiter.limit_download():
+            with self._conductor.progress_bar(desc, total=total) as bar:
+                yield bar
 
     async def download(
             self,
diff --git a/PFERD/limiter.py b/PFERD/limiter.py
index ae72fe6..6359221 100644
--- a/PFERD/limiter.py
+++ b/PFERD/limiter.py
@@ -1,13 +1,68 @@
 import asyncio
+import time
 from contextlib import asynccontextmanager
-from typing import AsyncIterator
+from dataclasses import dataclass
+from typing import AsyncContextManager, AsyncIterator, Optional
 
 
-class Limiter:
-    def __init__(self, limit: int = 10):
-        self._semaphore = asyncio.Semaphore(limit)
+@dataclass
+class Slot:
+    active: bool = False
+    last_left: Optional[float] = None
+
+
+class SlotPool:
+    def __init__(self, limit: int, delay: float):
+        if limit <= 0:
+            raise ValueError("limit must be greater than 0")
+
+        self._slots = [Slot() for _ in range(limit)]
+        self._delay = delay
+
+        self._free = asyncio.Condition()
+
+    def _acquire_slot(self) -> Optional[Slot]:
+        for slot in self._slots:
+            if not slot.active:
+                slot.active = True
+                return slot
+
+        return None
+
+    def _release_slot(self, slot: Slot) -> None:
+        slot.last_left = time.time()
+        slot.active = False
 
     @asynccontextmanager
     async def limit(self) -> AsyncIterator[None]:
-        async with self._semaphore:
+        slot: Slot
+        async with self._free:
+            while True:
+                if found_slot := self._acquire_slot():
+                    slot = found_slot
+                    break
+                await self._free.wait()
+
+        if slot.last_left is not None:
+            delay = slot.last_left + self._delay - time.time()
+            if delay > 0:
+                await asyncio.sleep(delay)
+
+        try:
             yield
+        finally:
+            async with self._free:
+                self._release_slot(slot)
+                self._free.notify()
+
+
+class Limiter:
+    def __init__(self, crawl_limit: int, download_limit: int, delay: float):
+        self._crawl_pool = SlotPool(crawl_limit, delay)
+        self._download_pool = SlotPool(download_limit, delay)
+
+    def limit_crawl(self) -> AsyncContextManager[None]:
+        return self._crawl_pool.limit()
+
+    def limit_download(self) -> AsyncContextManager[None]:
+        return self._crawl_pool.limit()

From ed2e19a150004fa61544528195dfc4acf9b70ec2 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 15 May 2021 00:39:55 +0200
Subject: [PATCH 111/524] Add reasons for invalid values

---
 PFERD/crawler.py        | 16 +++++++++++++---
 PFERD/crawlers/local.py |  9 ++++++---
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index f506294..48dfcb4 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -122,7 +122,12 @@ class CrawlerSection(Section):
             return Redownload.ALWAYS
         elif value == "always-smart":
             return Redownload.ALWAYS_SMART
-        self.invalid_value("redownload", value)
+
+        self.invalid_value(
+            "redownload",
+            value,
+            "Must be 'never', 'never-smart', 'always' or 'always-smart'"
+        )
 
     def on_conflict(self) -> OnConflict:
         value = self.s.get("on_conflict", "prompt")
@@ -134,7 +139,12 @@ class CrawlerSection(Section):
             return OnConflict.REMOTE_FIRST
         elif value == "no-delete":
             return OnConflict.NO_DELETE
-        self.invalid_value("on_conflict", value)
+
+        self.invalid_value(
+            "on_conflict",
+            value,
+            "Must be 'prompt', 'local-first', 'remote-first' or 'no-delete'",
+        )
 
     def transform(self) -> str:
         return self.s.get("transform", "")
@@ -167,7 +177,7 @@ class CrawlerSection(Section):
             self.missing_value("auth")
         auth = authenticators.get(f"auth:{value}")
         if auth is None:
-            self.invalid_value("auth", value)
+            self.invalid_value("auth", value, "No such auth section exists")
         return auth
 
 
diff --git a/PFERD/crawlers/local.py b/PFERD/crawlers/local.py
index 1677ff0..07e6133 100644
--- a/PFERD/crawlers/local.py
+++ b/PFERD/crawlers/local.py
@@ -19,19 +19,22 @@ class LocalCrawlerSection(CrawlerSection):
     def crawl_delay(self) -> Optional[float]:
         value = self.s.getfloat("crawl_delay")
         if value <= 0:
-            self.invalid_value("crawl_delay", value)
+            self.invalid_value("crawl_delay", value,
+                               "Must be greater than 0")
         return value
 
     def download_delay(self) -> Optional[float]:
         value = self.s.getfloat("download_delay")
         if value <= 0:
-            self.invalid_value("download_delay", value)
+            self.invalid_value("download_delay", value,
+                               "Must be greater than 0")
         return value
 
     def download_speed(self) -> Optional[int]:
         value = self.s.getint("download_speed")
         if value <= 0:
-            self.invalid_value("download_speed", value)
+            self.invalid_value("download_speed", value,
+                               "Must be greater than 0")
         return value
 
 

From b0f9e1e8b4fb22f7bbe0b5f1839bd405651d9eb1 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 15 May 2021 11:20:20 +0200
Subject: [PATCH 112/524] Add vscode directory to gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index c888722..2928b54 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@
 /.venv/
 /PFERD.egg-info/
 __pycache__/
+/.vscode/

From acd674f0a076fba8bfaf64b90bfc3000d3f5cb73 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 15 May 2021 13:21:38 +0200
Subject: [PATCH 113/524] Change limiter logic

Now download tasks are a subset of all tasks.
---
 CONFIG.md        | 19 +++++-----
 PFERD/crawler.py | 27 ++++++++------
 PFERD/limiter.py | 93 +++++++++++++++++++++++++++++++-----------------
 3 files changed, 85 insertions(+), 54 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index a74eef3..2338d8f 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -64,17 +64,14 @@ crawlers:
       remote file is different.
 - `transform`: Rules for renaming and excluding certain files and directories.
   For more details, see [this section](#transformation-rules). (Default: empty)
-- `max_concurrent_crawls`: The maximum number of concurrent crawl actions. What
-  constitutes a crawl action might vary from crawler to crawler, but it usually
-  means an HTTP request of a page to analyze. (Default: 1)
-- `max_concurrent_downloads`: The maximum number of concurrent download actions.
-  What constitutes a download action might vary from crawler to crawler, but it
-  usually means an HTTP request for a single file. (Default: 1)
-- `request_delay`: Time (in seconds) that the crawler should wait between
-  subsequent requests. Can be used to avoid unnecessary strain for the crawl
-  target. Crawl and download actions are handled separately, meaning that a
-  download action might immediately follow a crawl action even if this is set to
-  a nonzero value. (Default: 0)
+- `max_concurrent_tasks`: The maximum number of concurrent tasks (such as
+  crawling or downloading). (Default: 1)
+- `max_concurrent_downloads`: How many of those tasks can be download tasks at
+  the same time. Must not be greater than `max_concurrent_tasks`. When not set,
+  this is the same as `max_concurrent_tasks`. (Optional)
+- `delay_between_tasks`: Time (in seconds) that the crawler should wait between
+  subsequent tasks. Can be used as a sort of rate limit to avoid unnecessary
+  load for the crawl target. (Default: 0.0)
 
 Some crawlers may also require credentials for authentication. To configure how
 the crawler obtains its credentials, the `auth` option is used. It is set to the
diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index 48dfcb4..9ec5991 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -149,26 +149,31 @@ class CrawlerSection(Section):
     def transform(self) -> str:
         return self.s.get("transform", "")
 
-    def max_concurrent_crawls(self) -> int:
-        value = self.s.getint("max_concurrent_crawls", fallback=1)
+    def max_concurrent_tasks(self) -> int:
+        value = self.s.getint("max_concurrent_tasks", fallback=1)
         if value <= 0:
-            self.invalid_value("max_concurrent_crawls", value,
+            self.invalid_value("max_concurrent_tasks", value,
                                "Must be greater than 0")
         return value
 
     def max_concurrent_downloads(self) -> int:
-        value = self.s.getint("max_concurrent_downloads", fallback=1)
-
+        tasks = self.max_concurrent_tasks()
+        value = self.s.getint("max_concurrent_downloads", fallback=None)
+        if value is None:
+            return tasks
         if value <= 0:
             self.invalid_value("max_concurrent_downloads", value,
                                "Must be greater than 0")
+        if value > tasks:
+            self.invalid_value("max_concurrent_downloads", value,
+                               "Must not be greater than max_concurrent_tasks")
         return value
 
-    def request_delay(self) -> float:
-        value = self.s.getfloat("request_delay", fallback=0.0)
+    def delay_between_tasks(self) -> float:
+        value = self.s.getfloat("delay_between_tasks", fallback=0.0)
         if value < 0:
-            self.invalid_value("request_delay", value,
-                               "Must be greater than or equal to 0")
+            self.invalid_value("delay_between_tasks", value,
+                               "Must not be negative")
         return value
 
     def auth(self, authenticators: Dict[str, Authenticator]) -> Authenticator:
@@ -203,9 +208,9 @@ class Crawler(ABC):
         self.error_free = True
 
         self._limiter = Limiter(
-            crawl_limit=section.max_concurrent_crawls(),
+            task_limit=section.max_concurrent_tasks(),
             download_limit=section.max_concurrent_downloads(),
-            delay=section.request_delay(),
+            task_delay=section.delay_between_tasks(),
         )
 
         try:
diff --git a/PFERD/limiter.py b/PFERD/limiter.py
index 6359221..3122a7a 100644
--- a/PFERD/limiter.py
+++ b/PFERD/limiter.py
@@ -2,7 +2,7 @@ import asyncio
 import time
 from contextlib import asynccontextmanager
 from dataclasses import dataclass
-from typing import AsyncContextManager, AsyncIterator, Optional
+from typing import AsyncIterator, Optional
 
 
 @dataclass
@@ -11,15 +11,27 @@ class Slot:
     last_left: Optional[float] = None
 
 
-class SlotPool:
-    def __init__(self, limit: int, delay: float):
-        if limit <= 0:
-            raise ValueError("limit must be greater than 0")
+class Limiter:
+    def __init__(
+            self,
+            task_limit: int,
+            download_limit: int,
+            task_delay: float
+    ):
+        if task_limit <= 0:
+            raise ValueError("task limit must be at least 1")
+        if download_limit <= 0:
+            raise ValueError("download limit must be at least 1")
+        if download_limit > task_limit:
+            raise ValueError("download limit can't be greater than task limit")
+        if task_delay < 0:
+            raise ValueError("Task delay must not be negative")
 
-        self._slots = [Slot() for _ in range(limit)]
-        self._delay = delay
+        self._slots = [Slot() for _ in range(task_limit)]
+        self._downloads = download_limit
+        self._delay = task_delay
 
-        self._free = asyncio.Condition()
+        self._condition = asyncio.Condition()
 
     def _acquire_slot(self) -> Optional[Slot]:
         for slot in self._slots:
@@ -29,40 +41,57 @@ class SlotPool:
 
         return None
 
-    def _release_slot(self, slot: Slot) -> None:
-        slot.last_left = time.time()
-        slot.active = False
-
-    @asynccontextmanager
-    async def limit(self) -> AsyncIterator[None]:
-        slot: Slot
-        async with self._free:
-            while True:
-                if found_slot := self._acquire_slot():
-                    slot = found_slot
-                    break
-                await self._free.wait()
-
+    async def _wait_for_slot_delay(self, slot: Slot) -> None:
         if slot.last_left is not None:
             delay = slot.last_left + self._delay - time.time()
             if delay > 0:
                 await asyncio.sleep(delay)
 
+    def _release_slot(self, slot: Slot) -> None:
+        slot.last_left = time.time()
+        slot.active = False
+
+    @asynccontextmanager
+    async def limit_crawl(self) -> AsyncIterator[None]:
+        slot: Slot
+        async with self._condition:
+            while True:
+                if found_slot := self._acquire_slot():
+                    slot = found_slot
+                    break
+                await self._condition.wait()
+
+        await self._wait_for_slot_delay(slot)
+
         try:
             yield
         finally:
-            async with self._free:
+            async with self._condition:
                 self._release_slot(slot)
-                self._free.notify()
+                self._condition.notify_all()
 
+    @asynccontextmanager
+    async def limit_download(self) -> AsyncIterator[None]:
+        slot: Slot
+        async with self._condition:
+            while True:
+                if self._downloads <= 0:
+                    await self._condition.wait()
+                    continue
 
-class Limiter:
-    def __init__(self, crawl_limit: int, download_limit: int, delay: float):
-        self._crawl_pool = SlotPool(crawl_limit, delay)
-        self._download_pool = SlotPool(download_limit, delay)
+                if found_slot := self._acquire_slot():
+                    slot = found_slot
+                    self._downloads -= 1
+                    break
 
-    def limit_crawl(self) -> AsyncContextManager[None]:
-        return self._crawl_pool.limit()
+                await self._condition.wait()
 
-    def limit_download(self) -> AsyncContextManager[None]:
-        return self._crawl_pool.limit()
+        await self._wait_for_slot_delay(slot)
+
+        try:
+            yield
+        finally:
+            async with self._condition:
+                self._release_slot(slot)
+                self._downloads += 1
+                self._condition.notify_all()

From 302b8c0c3466a51c29f919d519edf2b0ce8f40e8 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 15 May 2021 13:32:13 +0200
Subject: [PATCH 114/524] Fix errors loading local crawler config

Apparently getint and getfloat may return a None even though this is not
mentioned in their type annotations.
---
 CONFIG.md               |  4 ++--
 PFERD/crawlers/local.py | 36 +++++++++++++++++-------------------
 2 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index 2338d8f..dd38c11 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -115,9 +115,9 @@ crawler simulate a slower, network-based crawler.
 
 - `path`: Path to the local directory to crawl. (Required)
 - `crawl_delay`: Maximum artificial delay (in seconds) to simulate for crawl
-  requests. (Optional)
+  requests. (Default: 0.0)
 - `download_delay`: Maximum artificial delay (in seconds) to simulate for
-  download requests. (Optional)
+  download requests. (Default: 0.0)
 - `download_speed`: Download speed (in bytes per second) to simulate. (Optional)
 
 ## Authenticator types
diff --git a/PFERD/crawlers/local.py b/PFERD/crawlers/local.py
index 07e6133..99bc700 100644
--- a/PFERD/crawlers/local.py
+++ b/PFERD/crawlers/local.py
@@ -16,23 +16,23 @@ class LocalCrawlerSection(CrawlerSection):
             self.missing_value("path")
         return Path(value).expanduser()
 
-    def crawl_delay(self) -> Optional[float]:
-        value = self.s.getfloat("crawl_delay")
-        if value <= 0:
+    def crawl_delay(self) -> float:
+        value = self.s.getfloat("crawl_delay", fallback=0.0)
+        if value < 0:
             self.invalid_value("crawl_delay", value,
-                               "Must be greater than 0")
+                               "Must not be negative")
         return value
 
-    def download_delay(self) -> Optional[float]:
-        value = self.s.getfloat("download_delay")
-        if value <= 0:
+    def download_delay(self) -> float:
+        value = self.s.getfloat("download_delay", fallback=0.0)
+        if value < 0:
             self.invalid_value("download_delay", value,
-                               "Must be greater than 0")
+                               "Must not be negative")
         return value
 
     def download_speed(self) -> Optional[int]:
         value = self.s.getint("download_speed")
-        if value <= 0:
+        if value is not None and value <= 0:
             self.invalid_value("download_speed", value,
                                "Must be greater than 0")
         return value
@@ -74,11 +74,10 @@ class LocalCrawler(Crawler):
         tasks = []
 
         async with self.crawl_bar(pure):
-            if self._crawl_delay:
-                await asyncio.sleep(random.uniform(
-                    0.5 * self._crawl_delay,
-                    self._crawl_delay,
-                ))
+            await asyncio.sleep(random.uniform(
+                0.5 * self._crawl_delay,
+                self._crawl_delay,
+            ))
 
             for child in path.iterdir():
                 pure_child = pure / child.name
@@ -94,11 +93,10 @@ class LocalCrawler(Crawler):
             return
 
         async with self.download_bar(path) as bar:
-            if self._download_delay:
-                await asyncio.sleep(random.uniform(
-                    0.5 * self._download_delay,
-                    self._download_delay,
-                ))
+            await asyncio.sleep(random.uniform(
+                0.5 * self._download_delay,
+                self._download_delay,
+            ))
 
             bar.set_total(stat.st_size)
 

From b0f731bf84dfd60cc78f08dfbd6ed0992faba3c8 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 15 May 2021 14:03:15 +0200
Subject: [PATCH 115/524] Make crawlers use transformers

---
 PFERD/crawler.py        | 9 ++++++++-
 PFERD/crawlers/local.py | 3 ++-
 PFERD/transformer.py    | 2 +-
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index 9ec5991..f8cf091 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -273,6 +273,9 @@ class Crawler(ABC):
             with self._conductor.progress_bar(desc, total=total) as bar:
                 yield bar
 
+    def should_crawl(self, path: PurePath) -> bool:
+        return self._transformer.transform(path) is not None
+
     async def download(
             self,
             path: PurePath,
@@ -280,8 +283,12 @@ class Crawler(ABC):
             redownload: Optional[Redownload] = None,
             on_conflict: Optional[OnConflict] = None,
     ) -> Optional[AsyncContextManager[FileSink]]:
+        transformed_path = self._transformer.transform(path)
+        if transformed_path is None:
+            return None
+
         return await self._output_dir.download(
-            path, mtime, redownload, on_conflict)
+            transformed_path, mtime, redownload, on_conflict)
 
     async def cleanup(self) -> None:
         await self._output_dir.cleanup()
diff --git a/PFERD/crawlers/local.py b/PFERD/crawlers/local.py
index 99bc700..360a9a9 100644
--- a/PFERD/crawlers/local.py
+++ b/PFERD/crawlers/local.py
@@ -81,7 +81,8 @@ class LocalCrawler(Crawler):
 
             for child in path.iterdir():
                 pure_child = pure / child.name
-                tasks.append(self._crawl_path(child, pure_child))
+                if self.should_crawl(child):
+                    tasks.append(self._crawl_path(child, pure_child))
 
         await asyncio.gather(*tasks)
 
diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index 84332df..fb47c60 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -292,4 +292,4 @@ class Transformer:
             else:
                 continue
 
-        return None
+        return path

From f897d7c2e15f99780fc81945d107b88c1dc668e7 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 15 May 2021 15:06:45 +0200
Subject: [PATCH 116/524] Add name variants for all arrows

---
 CONFIG.md            | 57 ++++++++++++++++++++++++++++++++++++++++++--
 PFERD/transformer.py | 44 +++++++++++++++++++++++++++-------
 2 files changed, 91 insertions(+), 10 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index dd38c11..cccc751 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -143,7 +143,8 @@ Each line has the format `SOURCE ARROW TARGET` where `TARGET` is optional.
 literal delimited by `"` or `'` (e. g. `"foo\" bar/baz"`). Python's string
 escape syntax is supported. Trailing slashes are ignored. `TARGET` can be
 formatted like `SOURCE`, but it can also be a single exclamation mark without
-quotes (`!`). `ARROW` is one of `-->`, `-exact->` and `-re->`.
+quotes (`!`). `ARROW` is one of `-->`, `-exact->`, `-name->`, `-re->` and
+`-name-re->`
 
 If a rule's target is `!`, this means that when the rule matches on a path, the
 corresponding file or directory is ignored. If a rule's target is missing, the
@@ -157,6 +158,14 @@ that part of the path is replaced with `TARGET`. This means that the rule
 into `baz/xyz`. The rule `foo --> !` would ignore a directory named `foo` as
 well as all its contents.
 
+### The `-name->` arrow
+
+The `-name->` arrow works similar to the `-->` arrow, but pretends it is in the
+same directory as the file or directory it is applied to. For example, the rule
+`bar -name-> baz` would convert `foo/bar` into `foo/baz` and `foo/bar/xyz` into
+`foo/baz/xyz`. The rule `foo --> !` would ignore all files and directories named
+`foo` as well as their contents.
+
 ### The `-exact->` arrow
 
 The `-exact->` arrow requires the path to match `SOURCE` exactly. This means
@@ -165,6 +174,14 @@ but `foo/bar/xyz` would be unaffected. Also, `foo -exact-> !` would only ignore
 `foo`, but not its contents (if it has any). The examples below show why this is
 useful.
 
+### The `-name-exact->` arrow
+
+The `-name-exact->` arrow works similar to the `-exact->` arrow, but pretends it
+is in the same directory as the file or directory it is applied to. For example,
+the rule `bar -name-exact-> baz` would convert `foo/bar` into `foo/baz` but
+`foo/bar/xyz` would be unaffected. The rule `foo --> !` would ignore only ignore
+files and directories named `foo`, but not their contents.
+
 ### The `-re->` arrow
 
 The `-re->` arrow uses regular expressions. `SOURCE` is a regular expression
@@ -186,10 +203,15 @@ example `{g2.lower()}` or `{g3.replace(' ', '_')}`.
 
 [3]: <https://docs.python.org/3/library/string.html#format-string-syntax> "Format String Syntax"
 
+### The `-name-re->` arrow
+
+The `-name-re>` arrow works similar to the `-re->` arrow, but pretends it is in
+the same directory as the file or directory it is applied to.
+
 ### Example: Tutorials
 
 You have an ILIAS course with lots of tutorials, but are only interested in a
-single one?
+single one.
 
 ```
 tutorials/
@@ -236,3 +258,34 @@ To do this, you can use the most powerful of arrows: The regex arrow.
 ```
 
 Note the escaped backslashes on the `SOURCE` side.
+
+### Example: Crawl a python project
+
+You are crawling a python project and want to ignore all hidden files (files
+whose name starts with a `.`), all `__pycache__` directories and all markdown
+files (for some weird reason).
+
+```
+.gitignore
+.mypy_cache/
+.venv/
+CONFIG.md
+PFERD/
+  |- __init__.py
+  |- __main__.py
+  |- __pycache__/
+  |- authenticator.py
+  |- config.py
+  ...
+README.md
+...
+```
+
+For this task, the name arrows can be used. They are variants of the normal
+arrows that only look at the file name instead of the entire path.
+
+```
+\..* -name-re-> !
+__pycache__ -name-> !
+.*\.md -name-re-> !
+```
diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index fb47c60..1b80433 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -77,6 +77,19 @@ class ExactRule(Rule):
         return False
 
 
+class NameRule(Rule):
+    def __init__(self, subrule: Rule):
+        self._subrule = subrule
+
+    def transform(self, path: PurePath) -> Union[PurePath, bool]:
+        name = PurePath(*path.parts[-1:])
+        result = self._subrule.transform(name)
+        if isinstance(result, PurePath):
+            return path.parent / result
+        else:
+            return result
+
+
 class ReRule(Rule):
     def __init__(self, left: str, right: Union[str, bool]):
         self._left = left
@@ -220,16 +233,25 @@ def parse_arrow(line: Line) -> str:
 
     name = []
     while True:
-        if c := line.get():
-            if c == "-":
-                break
-            else:
-                name.append(c)
-            line.advance()
-        else:
+        c = line.get()
+        if not c:
             raise RuleParseException(line, "Expected rest of arrow")
+        elif c == "-":
+            line.advance()
+            c = line.get()
+            if not c:
+                raise RuleParseException(line, "Expected rest of arrow")
+            elif c == ">":
+                line.advance()
+                break  # End of arrow
+            else:
+                name.append("-")
+                name.append(c)
+        else:
+            name.append(c)
+
+        line.advance()
 
-    line.expect("->")
     return "".join(name)
 
 
@@ -261,10 +283,16 @@ def parse_rule(line: Line) -> Rule:
     # Dispatch
     if arrowname == "":
         return NormalRule(PurePath(left), rightpath)
+    elif arrowname == "name":
+        return NameRule(NormalRule(PurePath(left), rightpath))
     elif arrowname == "exact":
         return ExactRule(PurePath(left), rightpath)
+    elif arrowname == "name-exact":
+        return NameRule(ExactRule(PurePath(left), rightpath))
     elif arrowname == "re":
         return ReRule(left, right)
+    elif arrowname == "name-re":
+        return NameRule(ReRule(left, right))
     else:
         line.index = arrowindex + 1  # For nicer error message
         raise RuleParseException(line, "Invalid arrow name")

From a6fdf05ee91902806dcfa51ad9cec6b6e843947b Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 15 May 2021 15:13:34 +0200
Subject: [PATCH 117/524] Allow variable whitespace in arrow rules

---
 CONFIG.md            |  6 +++---
 PFERD/transformer.py | 10 ++++++++--
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index cccc751..df3e8f2 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -285,7 +285,7 @@ For this task, the name arrows can be used. They are variants of the normal
 arrows that only look at the file name instead of the entire path.
 
 ```
-\..* -name-re-> !
-__pycache__ -name-> !
-.*\.md -name-re-> !
+\..*        -name-re-> !
+__pycache__ -name->    !
+.*\.md      -name-re-> !
 ```
diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index 1b80433..135baf2 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -255,6 +255,12 @@ def parse_arrow(line: Line) -> str:
     return "".join(name)
 
 
+def parse_whitespace(line: Line) -> None:
+    line.expect(" ")
+    while line.get() == " ":
+        line.advance()
+
+
 def parse_rule(line: Line) -> Rule:
     # Parse left side
     leftindex = line.index
@@ -264,13 +270,13 @@ def parse_rule(line: Line) -> Rule:
         raise RuleParseException(line, "Left side can't be '!'")
 
     # Parse arrow
-    line.expect(" ")
+    parse_whitespace(line)
     arrowindex = line.index
     arrowname = parse_arrow(line)
 
     # Parse right side
     if line.get():
-        line.expect(" ")
+        parse_whitespace(line)
         right = parse_string(line)
     else:
         right = False

From 595de88d964332782133c51846d468b0412b45e4 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 15 May 2021 15:18:16 +0200
Subject: [PATCH 118/524] Fix authenticator and crawler names

Now, the "auth:" and "crawl:" parts are considered part of the name. This fixes
crawlers not being able to find their authenticators.
---
 PFERD/config.py  | 14 ++++++--------
 PFERD/crawler.py |  2 +-
 PFERD/pferd.py   |  6 +++---
 3 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/PFERD/config.py b/PFERD/config.py
index 0520f74..66b882e 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -139,19 +139,17 @@ class Config:
 
     def crawler_sections(self) -> List[Tuple[str, SectionProxy]]:
         result = []
-        for section_name, section_proxy in self._parser.items():
-            if section_name.startswith("crawler:"):
-                crawler_name = section_name[8:]
-                result.append((crawler_name, section_proxy))
+        for name, proxy in self._parser.items():
+            if name.startswith("crawler:"):
+                result.append((name, proxy))
 
         return result
 
     def authenticator_sections(self) -> List[Tuple[str, SectionProxy]]:
         result = []
-        for section_name, section_proxy in self._parser.items():
-            if section_name.startswith("auth:"):
-                crawler_name = section_name[5:]
-                result.append((crawler_name, section_proxy))
+        for name, proxy in self._parser.items():
+            if name.startswith("auth:"):
+                result.append((name, proxy))
 
         return result
 
diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index f8cf091..f49eba8 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -180,7 +180,7 @@ class CrawlerSection(Section):
         value = self.s.get("auth")
         if value is None:
             self.missing_value("auth")
-        auth = authenticators.get(f"auth:{value}")
+        auth = authenticators.get(value)
         if auth is None:
             self.invalid_value("auth", value, "No such auth section exists")
         return auth
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index 4500ba9..9154a80 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -25,7 +25,7 @@ class Pferd:
     def _load_authenticators(self) -> None:
         abort = False
         for name, section in self._config.authenticator_sections():
-            print(f"[bold bright_cyan]Loading[/] auth:{escape(name)}")
+            print(f"[bold bright_cyan]Loading[/] {escape(name)}")
             authenticator_type = section.get("type")
             authenticator_constructor = AUTHENTICATORS.get(authenticator_type)
             if authenticator_constructor is None:
@@ -48,7 +48,7 @@ class Pferd:
     def _load_crawlers(self) -> None:
         abort = False
         for name, section in self._config.crawler_sections():
-            print(f"[bold bright_cyan]Loading[/] crawler:{escape(name)}")
+            print(f"[bold bright_cyan]Loading[/] {escape(name)}")
             crawler_type = section.get("type")
             crawler_constructor = CRAWLERS.get(crawler_type)
             if crawler_constructor is None:
@@ -79,6 +79,6 @@ class Pferd:
 
         for name, crawler in self._crawlers.items():
             print()
-            print(f"[bold bright_cyan]Running[/] crawler:{escape(name)}")
+            print(f"[bold bright_cyan]Running[/] {escape(name)}")
 
             await crawler.run()

From b2a2b5999bd38abfebfcc8ee3d48dcd90ccb59b6 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 15 May 2021 15:18:51 +0200
Subject: [PATCH 119/524] Implement ILIAS auth and crawl home page

This commit introduces the necessary machinery to authenticate with
ILIAS and crawl the home page.

It can't do much yet and just silently fetches the homepage.
---
 PFERD/crawlers/__init__.py |   3 +
 PFERD/crawlers/ilias.py    | 209 +++++++++++++++++++++++++++++++++++++
 PFERD/utils.py             |   8 ++
 setup.cfg                  |   1 +
 4 files changed, 221 insertions(+)
 create mode 100644 PFERD/crawlers/ilias.py

diff --git a/PFERD/crawlers/__init__.py b/PFERD/crawlers/__init__.py
index b2e5af5..0ae2ca3 100644
--- a/PFERD/crawlers/__init__.py
+++ b/PFERD/crawlers/__init__.py
@@ -5,6 +5,7 @@ from ..authenticator import Authenticator
 from ..conductor import TerminalConductor
 from ..config import Config
 from ..crawler import Crawler
+from .ilias import IliasCrawler, IliasCrawlerSection
 from .local import LocalCrawler, LocalCrawlerSection
 
 CrawlerConstructor = Callable[[
@@ -18,4 +19,6 @@ CrawlerConstructor = Callable[[
 CRAWLERS: Dict[str, CrawlerConstructor] = {
     "local": lambda n, s, c, t, a:
         LocalCrawler(n, LocalCrawlerSection(s), c, t),
+    "ilias": lambda n, s, c, t, a:
+        IliasCrawler(n, IliasCrawlerSection(s), c, t, a),
 }
diff --git a/PFERD/crawlers/ilias.py b/PFERD/crawlers/ilias.py
new file mode 100644
index 0000000..84a7c15
--- /dev/null
+++ b/PFERD/crawlers/ilias.py
@@ -0,0 +1,209 @@
+from configparser import SectionProxy
+from pathlib import PurePath
+from typing import Any, Dict, Optional
+
+import aiohttp
+from bs4 import BeautifulSoup
+from PFERD.utils import soupify
+
+from ..authenticators import Authenticator
+from ..conductor import TerminalConductor
+from ..config import Config
+from ..crawler import (Crawler, CrawlerSection, HttpCrawler, anoncritical,
+                       arepeat)
+
+
+class IliasCrawlerSection(CrawlerSection):
+
+    def __init__(self, section: SectionProxy):
+        super().__init__(section)
+
+        if not self.course_id() and not self.element_url():
+            self.missing_value("course_id or element_url")
+
+    def course_id(self) -> Optional[str]:
+        return self.s.get("course_id")
+
+    def element_url(self) -> Optional[str]:
+        return self.s.get("element_url")
+
+    def base_url(self) -> str:
+        return self.s.get("ilias_url", "https://ilias.studium.kit.edu/")
+
+    def tfa_auth(self, authenticators: Dict[str, Authenticator]) -> Optional[Authenticator]:
+        value = self.s.get("tfa_auth")
+        if not value:
+            return None
+
+        auth = authenticators.get(f"auth:{value}")
+        if auth is None:
+            self.invalid_value("auth", value, "No such auth section exists")
+        return auth
+
+
+class IliasCrawler(HttpCrawler):
+    def __init__(
+            self,
+            name: str,
+            section: IliasCrawlerSection,
+            config: Config,
+            conductor: TerminalConductor,
+            authenticators: Dict[str, Authenticator]
+    ):
+        super().__init__(name, section, config, conductor)
+
+        self._shibboleth_login = KitShibbolethLogin(
+            section.auth(authenticators),
+            section.tfa_auth(authenticators)
+        )
+        self._base_url = section.base_url()
+
+        self._course_id = section.course_id()
+        self._element_url = section.element_url()
+
+    async def crawl(self) -> None:
+        async with self.crawl_bar(PurePath("/")) as bar:
+            soup = await self._get_page(self._base_url)
+            self.print("[green]Gotcha![/]")
+
+    async def _get_page(self, url: str, retries_left: int = 3) -> BeautifulSoup:
+        if retries_left < 0:
+            # TODO: Proper exception
+            raise RuntimeError("Get page failed too often")
+        async with self.session.get(url) as request:
+            soup = soupify(await request.read())
+            if self._is_logged_in(soup):
+                return soup
+
+        await self._shibboleth_login.login(self.session)
+
+        return await self._get_page(url, retries_left - 1)
+
+    @staticmethod
+    def _is_logged_in(soup: BeautifulSoup) -> bool:
+        # Normal ILIAS pages
+        userlog = soup.find("li", {"id": "userlog"})
+        if userlog is not None:
+            return True
+        # Video listing embeds do not have complete ILIAS html. Try to match them by
+        # their video listing table
+        video_table = soup.find(
+            recursive=True,
+            name="table",
+            attrs={"id": lambda x: x is not None and x.startswith("tbl_xoct")}
+        )
+        if video_table is not None:
+            return True
+        # The individual video player wrapper page has nothing of the above.
+        # Match it by its playerContainer.
+        if soup.select_one("#playerContainer") is not None:
+            return True
+        return False
+
+
+class KitShibbolethLogin:
+    """
+    Login via KIT's shibboleth system.
+    """
+
+    def __init__(self, authenticator: Authenticator, tfa_authenticator: Optional[Authenticator]) -> None:
+        self._auth = authenticator
+        self._tfa_auth = tfa_authenticator
+
+    async def login(self, sess: aiohttp.ClientSession) -> None:
+        """
+        Performs the ILIAS Shibboleth authentication dance and saves the login
+        cookies it receieves.
+
+        This function should only be called whenever it is detected that you're
+        not logged in. The cookies obtained should be good for a few minutes,
+        maybe even an hour or two.
+        """
+
+        # Equivalent: Click on "Mit KIT-Account anmelden" button in
+        # https://ilias.studium.kit.edu/login.php
+        url = "https://ilias.studium.kit.edu/Shibboleth.sso/Login"
+        data = {
+            "sendLogin": "1",
+            "idp_selection": "https://idp.scc.kit.edu/idp/shibboleth",
+            "target": "/shib_login.php",
+            "home_organization_selection": "Mit KIT-Account anmelden",
+        }
+        soup: BeautifulSoup = await _post(sess, url, data)
+
+        # Attempt to login using credentials, if necessary
+        while not self._login_successful(soup):
+            # Searching the form here so that this fails before asking for
+            # credentials rather than after asking.
+            form = soup.find("form", {"class": "full content", "method": "post"})
+            action = form["action"]
+
+            csrf_token = form.find("input", {"name": "csrf_token"})["value"]
+
+            # Equivalent: Enter credentials in
+            # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
+            url = "https://idp.scc.kit.edu" + action
+            username, password = await self._auth.credentials()
+            data = {
+                "_eventId_proceed": "",
+                "j_username": username,
+                "j_password": password,
+                "csrf_token": csrf_token
+            }
+            soup = await _post(sess, url, data)
+
+            if self._tfa_required(soup):
+                soup = await self._authenticate_tfa(sess, soup)
+
+            if not self._login_successful(soup):
+                self._auth.invalid_credentials()
+
+        # Equivalent: Being redirected via JS automatically
+        # (or clicking "Continue" if you have JS disabled)
+        relay_state = soup.find("input", {"name": "RelayState"})
+        saml_response = soup.find("input", {"name": "SAMLResponse"})
+        url = "https://ilias.studium.kit.edu/Shibboleth.sso/SAML2/POST"
+        data = {  # using the info obtained in the while loop above
+            "RelayState": relay_state["value"],
+            "SAMLResponse": saml_response["value"],
+        }
+        await sess.post(url, data=data)
+
+    async def _authenticate_tfa(
+            self,
+            session: aiohttp.ClientSession,
+            soup: BeautifulSoup
+    ) -> BeautifulSoup:
+        if not self._tfa_auth:
+            raise RuntimeError("No 'tfa_auth' present but you use two-factor authentication!")
+
+        _, tfa_token = await self._tfa_auth.credentials()
+
+        # Searching the form here so that this fails before asking for
+        # credentials rather than after asking.
+        form = soup.find("form", {"method": "post"})
+        action = form["action"]
+
+        # Equivalent: Enter token in
+        # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
+        url = "https://idp.scc.kit.edu" + action
+        data = {
+            "_eventId_proceed": "",
+            "j_tokenNumber": tfa_token
+        }
+        return _post(session, url, data)
+
+    @staticmethod
+    def _login_successful(soup: BeautifulSoup) -> bool:
+        relay_state = soup.find("input", {"name": "RelayState"})
+        saml_response = soup.find("input", {"name": "SAMLResponse"})
+        return relay_state is not None and saml_response is not None
+
+    @staticmethod
+    def _tfa_required(soup: BeautifulSoup) -> bool:
+        return soup.find(id="j_tokenNumber") is not None
+
+
+async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup:
+    async with session.post(url, data=data) as response:
+        return soupify(await response.read())
diff --git a/PFERD/utils.py b/PFERD/utils.py
index 08017aa..d7c61ec 100644
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -4,6 +4,8 @@ import functools
 import getpass
 from typing import Any, Callable, Optional, TypeVar
 
+import bs4
+
 T = TypeVar("T")
 
 
@@ -23,6 +25,12 @@ async def ainput(prompt: str) -> str:
 async def agetpass(prompt: str) -> str:
     return await to_thread(lambda: getpass.getpass(prompt))
 
+def soupify(data: bytes) -> bs4.BeautifulSoup:
+    """
+    Parses HTML to a beautifulsoup object.
+    """
+
+    return bs4.BeautifulSoup(data, "html.parser")
 
 async def prompt_yes_no(query: str, default: Optional[bool]) -> bool:
     """
diff --git a/setup.cfg b/setup.cfg
index f2806e2..18ff558 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -9,6 +9,7 @@ install_requires =
   aiohttp>=3.7.4.post0
   beautifulsoup4>=4.9.3
   rich>=10.1.0
+  beautifulsoup4>=4.9.3
 
 [options.entry_points]
 console_scripts =

From 868f4869225a4f4b5cd75a7483c8f8599f3a46f4 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 15 May 2021 17:12:25 +0200
Subject: [PATCH 120/524] Rename local crawler path to target

---
 CONFIG.md               |  2 +-
 PFERD/crawlers/local.py | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index df3e8f2..22078ae 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -113,7 +113,7 @@ This crawler crawls a local directory. It is really simple and mostly useful for
 testing different setups. The various delay options are meant to make the
 crawler simulate a slower, network-based crawler.
 
-- `path`: Path to the local directory to crawl. (Required)
+- `target`: Path to the local directory to crawl. (Required)
 - `crawl_delay`: Maximum artificial delay (in seconds) to simulate for crawl
   requests. (Default: 0.0)
 - `download_delay`: Maximum artificial delay (in seconds) to simulate for
diff --git a/PFERD/crawlers/local.py b/PFERD/crawlers/local.py
index 360a9a9..2dde0d4 100644
--- a/PFERD/crawlers/local.py
+++ b/PFERD/crawlers/local.py
@@ -10,10 +10,10 @@ from ..crawler import Crawler, CrawlerSection, anoncritical
 
 
 class LocalCrawlerSection(CrawlerSection):
-    def path(self) -> Path:
-        value = self.s.get("path")
+    def target(self) -> Path:
+        value = self.s.get("target")
         if value is None:
-            self.missing_value("path")
+            self.missing_value("target")
         return Path(value).expanduser()
 
     def crawl_delay(self) -> float:
@@ -48,7 +48,7 @@ class LocalCrawler(Crawler):
     ):
         super().__init__(name, section, config, conductor)
 
-        self._path = config.working_dir / section.path()
+        self._target = config.working_dir / section.target()
         self._crawl_delay = section.crawl_delay()
         self._download_delay = section.download_delay()
         self._download_speed = section.download_speed()
@@ -59,7 +59,7 @@ class LocalCrawler(Crawler):
             self._block_size = 1024**2  # 1 MiB
 
     async def crawl(self) -> None:
-        await self._crawl_path(self._path, PurePath())
+        await self._crawl_path(self._target, PurePath())
         if self.error_free:
             await self.cleanup()
 

From b70b62cef542b282c69071b5cf963ed91ead2b65 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 15 May 2021 17:23:33 +0200
Subject: [PATCH 121/524] Make crawler sections start with "crawl:"

Also, use only the part of the section name after the "crawl:" as the crawler's
output directory. Now, the implementation matches the documentation again
---
 CONFIG.md        | 2 +-
 PFERD/config.py  | 2 +-
 PFERD/crawler.py | 3 +++
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index 22078ae..11c4282 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -40,7 +40,7 @@ crawlers:
 
 - `type`: The types are specified in [this section](#crawler-types).
 - `output_dir`: The directory the crawler synchronizes files to. A crawler will
-  never place any files outside of this directory. (Default: crawler's name)
+  never place any files outside of this directory. (Default: the crawler's name)
 - `redownload`: When to download again a file that is already present locally.
   (Default: `never-smart`)
     - `never`: If a file is present locally, it is not downloaded again.
diff --git a/PFERD/config.py b/PFERD/config.py
index 66b882e..7a7e832 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -140,7 +140,7 @@ class Config:
     def crawler_sections(self) -> List[Tuple[str, SectionProxy]]:
         result = []
         for name, proxy in self._parser.items():
-            if name.startswith("crawler:"):
+            if name.startswith("crawl:"):
                 result.append((name, proxy))
 
         return result
diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index f49eba8..4148614 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -110,6 +110,9 @@ def arepeat(attempts: int) -> Callable[[AWrapped], AWrapped]:
 
 class CrawlerSection(Section):
     def output_dir(self, name: str) -> Path:
+        # TODO Use removeprefix() after switching to 3.9
+        if name.startswith("crawl:"):
+            name = name[len("crawl:"):]
         return Path(self.s.get("output_dir", name)).expanduser()
 
     def redownload(self) -> Redownload:

From d63494908dcbea7146ab4b62157878d15c15aedb Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 15 May 2021 17:37:05 +0200
Subject: [PATCH 122/524] Properly invalidate exceptions

The simple authenticator now properly invalidates its credentials. Also, the
invalidation functions have been given better names and documentation.
---
 PFERD/authenticator.py         | 30 +++++++++++++++++++++++++++---
 PFERD/authenticators/simple.py | 25 ++++++++++++++++++++++---
 PFERD/crawlers/ilias.py        |  2 +-
 3 files changed, 50 insertions(+), 7 deletions(-)

diff --git a/PFERD/authenticator.py b/PFERD/authenticator.py
index 42d8bb9..b2f6164 100644
--- a/PFERD/authenticator.py
+++ b/PFERD/authenticator.py
@@ -42,11 +42,35 @@ class Authenticator(ABC):
     async def credentials(self) -> Tuple[str, str]:
         pass
 
-    def invalid_credentials(self) -> None:
+    def invalidate_credentials(self) -> None:
+        """
+        Tell the authenticator that some or all of its credentials are invalid.
+
+        Authenticators should overwrite this function if they have a way to
+        deal with this issue that is likely to result in valid credentials
+        (e. g. prompting the user).
+        """
+
         raise AuthException("Invalid credentials")
 
-    def invalid_username(self) -> None:
+    def invalidate_username(self) -> None:
+        """
+        Tell the authenticator that specifically its username is invalid.
+
+        Authenticators should overwrite this function if they have a way to
+        deal with this issue that is likely to result in valid credentials
+        (e. g. prompting the user).
+        """
+
         raise AuthException("Invalid username")
 
-    def invalid_password(self) -> None:
+    def invalidate_password(self) -> None:
+        """
+        Tell the authenticator that specifically its password is invalid.
+
+        Authenticators should overwrite this function if they have a way to
+        deal with this issue that is likely to result in valid credentials
+        (e. g. prompting the user).
+        """
+
         raise AuthException("Invalid password")
diff --git a/PFERD/authenticators/simple.py b/PFERD/authenticators/simple.py
index 3a57faf..6ce6265 100644
--- a/PFERD/authenticators/simple.py
+++ b/PFERD/authenticators/simple.py
@@ -1,6 +1,6 @@
 from typing import Optional, Tuple
 
-from ..authenticator import Authenticator, AuthSection
+from ..authenticator import Authenticator, AuthException, AuthSection
 from ..conductor import TerminalConductor
 from ..config import Config
 from ..utils import agetpass, ainput
@@ -42,7 +42,26 @@ class SimpleAuthenticator(Authenticator):
 
             if self.password is None:
                 self.password = await agetpass("Password: ")
-            else:
-                print("Password: *******")
 
         return self.username, self.password
+
+    def invalidate_credentials(self) -> None:
+        if self.username_fixed and self.password_fixed:
+            raise AuthException("Configured credentials are invalid")
+
+        if not self.username_fixed:
+            self.username = None
+        if not self.password_fixed:
+            self.password = None
+
+    def invalidate_username(self) -> None:
+        if self.username_fixed:
+            raise AuthException("Configured username is invalid")
+        else:
+            self.username = None
+
+    def invalidate_password(self) -> None:
+        if self.password_fixed:
+            raise AuthException("Configured password is invalid")
+        else:
+            self.password = None
diff --git a/PFERD/crawlers/ilias.py b/PFERD/crawlers/ilias.py
index 84a7c15..ed3fd9c 100644
--- a/PFERD/crawlers/ilias.py
+++ b/PFERD/crawlers/ilias.py
@@ -156,7 +156,7 @@ class KitShibbolethLogin:
                 soup = await self._authenticate_tfa(sess, soup)
 
             if not self._login_successful(soup):
-                self._auth.invalid_credentials()
+                self._auth.invalidate_credentials()
 
         # Equivalent: Being redirected via JS automatically
         # (or clicking "Continue" if you have JS disabled)

From 8c32da7f19ef613b136288a1a8f9a4ab06433c09 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 15 May 2021 18:24:03 +0200
Subject: [PATCH 123/524] Let authenticators provide username and password
 separately

---
 CONFIG.md                      |  2 +-
 PFERD/authenticator.py         |  8 +++++++
 PFERD/authenticators/simple.py | 42 ++++++++++++++++++----------------
 PFERD/crawlers/ilias.py        |  2 +-
 4 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index 11c4282..ca6d92b 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -93,7 +93,7 @@ auth = auth:example
 ## The `auth:*` sections
 
 Sections whose names start with `auth:` are used to configure authenticators. An
-authenticator provides login credentials to one or more crawlers.
+authenticator provides a username and a password to one or more crawlers.
 
 Authenticators work similar to crawlers: A section represents an authenticator
 instance, whose name is the rest of the section name. The type is specified by
diff --git a/PFERD/authenticator.py b/PFERD/authenticator.py
index b2f6164..7475e2a 100644
--- a/PFERD/authenticator.py
+++ b/PFERD/authenticator.py
@@ -42,6 +42,14 @@ class Authenticator(ABC):
     async def credentials(self) -> Tuple[str, str]:
         pass
 
+    async def username(self) -> str:
+        username, _ = await self.credentials()
+        return username
+
+    async def password(self) -> str:
+        _, password = await self.credentials()
+        return password
+
     def invalidate_credentials(self) -> None:
         """
         Tell the authenticator that some or all of its credentials are invalid.
diff --git a/PFERD/authenticators/simple.py b/PFERD/authenticators/simple.py
index 6ce6265..f21661c 100644
--- a/PFERD/authenticators/simple.py
+++ b/PFERD/authenticators/simple.py
@@ -24,44 +24,46 @@ class SimpleAuthenticator(Authenticator):
     ) -> None:
         super().__init__(name, section, config, conductor)
 
-        self.username = section.username()
-        self.password = section.password()
+        self._username = section.username()
+        self._password = section.password()
 
-        self.username_fixed = self.username is not None
-        self.password_fixed = self.password is not None
+        self._username_fixed = self.username is not None
+        self._password_fixed = self.password is not None
 
     async def credentials(self) -> Tuple[str, str]:
-        if self.username is not None and self.password is not None:
-            return self.username, self.password
+        if self._username is not None and self._password is not None:
+            return self._username, self._password
 
         async with self.conductor.exclusive_output():
-            if self.username is None:
-                self.username = await ainput("Username: ")
+            if self._username is None:
+                self._username = await ainput("Username: ")
             else:
                 print(f"Username: {self.username}")
 
-            if self.password is None:
-                self.password = await agetpass("Password: ")
+            if self._password is None:
+                self._password = await agetpass("Password: ")
 
-        return self.username, self.password
+            # Intentionally returned inside the context manager so we know
+            # they're both not None
+            return self._username, self._password
 
     def invalidate_credentials(self) -> None:
-        if self.username_fixed and self.password_fixed:
+        if self._username_fixed and self._password_fixed:
             raise AuthException("Configured credentials are invalid")
 
-        if not self.username_fixed:
-            self.username = None
-        if not self.password_fixed:
-            self.password = None
+        if not self._username_fixed:
+            self._username = None
+        if not self._password_fixed:
+            self._password = None
 
     def invalidate_username(self) -> None:
-        if self.username_fixed:
+        if self._username_fixed:
             raise AuthException("Configured username is invalid")
         else:
-            self.username = None
+            self._username = None
 
     def invalidate_password(self) -> None:
-        if self.password_fixed:
+        if self._password_fixed:
             raise AuthException("Configured password is invalid")
         else:
-            self.password = None
+            self._password = None
diff --git a/PFERD/crawlers/ilias.py b/PFERD/crawlers/ilias.py
index ed3fd9c..2352945 100644
--- a/PFERD/crawlers/ilias.py
+++ b/PFERD/crawlers/ilias.py
@@ -177,7 +177,7 @@ class KitShibbolethLogin:
         if not self._tfa_auth:
             raise RuntimeError("No 'tfa_auth' present but you use two-factor authentication!")
 
-        _, tfa_token = await self._tfa_auth.credentials()
+        tfa_token = await self._tfa_auth.password()
 
         # Searching the form here so that this fails before asking for
         # credentials rather than after asking.

From e1104f888d761568e950c70437a72e2168d6c9e2 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 15 May 2021 18:27:16 +0200
Subject: [PATCH 124/524] Add tfa authenticator

---
 CONFIG.md                        |  6 ++++++
 PFERD/authenticators/__init__.py |  5 ++++-
 PFERD/authenticators/tfa.py      | 37 ++++++++++++++++++++++++++++++++
 3 files changed, 47 insertions(+), 1 deletion(-)
 create mode 100644 PFERD/authenticators/tfa.py

diff --git a/CONFIG.md b/CONFIG.md
index ca6d92b..53c0706 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -131,6 +131,12 @@ via the terminal.
 - `username`: The username. (Optional)
 - `password`: The password. (Optional)
 
+### The `tfa` authenticator
+
+This authenticator prompts the user on the console for a two-factor
+authentication token. The token is provided as password and it is not cached.
+This authenticator does not support usernames.
+
 ## Transformation rules
 
 Transformation rules are rules for renaming and excluding files and directories.
diff --git a/PFERD/authenticators/__init__.py b/PFERD/authenticators/__init__.py
index d021d40..97ff03a 100644
--- a/PFERD/authenticators/__init__.py
+++ b/PFERD/authenticators/__init__.py
@@ -1,10 +1,11 @@
 from configparser import SectionProxy
 from typing import Callable, Dict
 
-from ..authenticator import Authenticator
+from ..authenticator import Authenticator, AuthSection
 from ..conductor import TerminalConductor
 from ..config import Config
 from .simple import SimpleAuthenticator, SimpleAuthSection
+from .tfa import TfaAuthenticator
 
 AuthConstructor = Callable[[
     str,                # Name (without the "auth:" prefix)
@@ -16,4 +17,6 @@ AuthConstructor = Callable[[
 AUTHENTICATORS: Dict[str, AuthConstructor] = {
     "simple": lambda n, s, c, t:
         SimpleAuthenticator(n, SimpleAuthSection(s), c, t),
+    "tfa": lambda n, s, c, t:
+        TfaAuthenticator(n, AuthSection(s), c, t),
 }
diff --git a/PFERD/authenticators/tfa.py b/PFERD/authenticators/tfa.py
new file mode 100644
index 0000000..3513d09
--- /dev/null
+++ b/PFERD/authenticators/tfa.py
@@ -0,0 +1,37 @@
+from typing import Tuple
+
+from ..authenticator import Authenticator, AuthException, AuthSection
+from ..conductor import TerminalConductor
+from ..config import Config
+from ..utils import ainput
+
+
+class TfaAuthenticator(Authenticator):
+    def __init__(
+            self,
+            name: str,
+            section: AuthSection,
+            config: Config,
+            conductor: TerminalConductor,
+    ) -> None:
+        super().__init__(name, section, config, conductor)
+
+    async def username(self) -> str:
+        raise AuthException("TFA authenticator does not support usernames")
+
+    async def password(self) -> str:
+        async with self.conductor.exclusive_output():
+            code = await ainput("TFA code: ")
+            return code
+
+    async def credentials(self) -> Tuple[str, str]:
+        raise AuthException("TFA authenticator does not support usernames")
+
+    def invalidate_username(self) -> None:
+        raise AuthException("TFA authenticator does not support usernames")
+
+    def invalidate_password(self) -> None:
+        pass
+
+    def invalidate_credentials(self) -> None:
+        pass

From 1123c8884d54822bf2a285fd9c6c423fa0eb1a2e Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 15 May 2021 18:57:17 +0200
Subject: [PATCH 125/524] Implement an IliasPage

This allows PFERD to semantically understand ILIAS HTML and is the
foundation for the ILIAS crawler. This patch extends the ILIAS crawler
to crawl the personal desktop and print the elements on it.
---
 PFERD/crawlers/ilias.py | 338 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 335 insertions(+), 3 deletions(-)

diff --git a/PFERD/crawlers/ilias.py b/PFERD/crawlers/ilias.py
index 2352945..2d9a9c9 100644
--- a/PFERD/crawlers/ilias.py
+++ b/PFERD/crawlers/ilias.py
@@ -1,9 +1,15 @@
+import json
+import re
 from configparser import SectionProxy
+from dataclasses import dataclass, field
+from datetime import datetime
+from enum import Enum
 from pathlib import PurePath
-from typing import Any, Dict, Optional
+from typing import Any, Dict, List, Optional
+from urllib.parse import urljoin, urlparse
 
 import aiohttp
-from bs4 import BeautifulSoup
+from bs4 import BeautifulSoup, Tag
 from PFERD.utils import soupify
 
 from ..authenticators import Authenticator
@@ -41,6 +47,330 @@ class IliasCrawlerSection(CrawlerSection):
         return auth
 
 
+class IliasElementType(Enum):
+    EXERCISE = "exercise"
+    FILE = "file"
+    FOLDER = "folder"
+    FORUM = "forum"
+    LINK = "link"
+    MEETING = "meeting"
+    VIDEO = "video"
+    VIDEO_PLAYER = "video_player"
+    VIDEO_FOLDER = "video_folder"
+    VIDEO_FOLDER_MAYBE_PAGINATED = "video_folder_maybe_paginated"
+
+
+@dataclass
+class IliasPageElement:
+    type: IliasElementType
+    url: str
+    name: str
+    mtime: Optional[datetime] = None
+    query_parameter: Dict[str, str] = field(default_factory=dict)
+
+
+class IliasPage:
+
+    def __init__(self, soup: BeautifulSoup, _page_url: str, source_element: Optional[IliasPageElement]):
+        self._soup = soup
+        self._page_url = _page_url
+        self._page_type = source_element.type if source_element else None
+        self._source_name = source_element.name if source_element else ""
+
+    def get_child_elements(self) -> List[IliasPageElement]:
+        """
+        Return all child page elements you can find here.
+        """
+        if self._is_video_player():
+            return self._player_to_video()
+        if self._is_video_listing():
+            return self._find_video_entries()
+        return self._find_normal_entries()
+
+    def _is_video_player(self) -> bool:
+        return "paella_config_file" in str(self._soup)
+
+    def _is_video_listing(self) -> bool:
+        if self._soup.find(id="headerimage"):
+            element: Tag = self._soup.find(id="headerimage")
+            if "opencast" in element.attrs["src"].lower():
+                return True
+        return False
+
+    def _player_to_video(self) -> List[IliasPageElement]:
+        # Fetch the actual video page. This is a small wrapper page initializing a javscript
+        # player. Sadly we can not execute that JS. The actual video stream url is nowhere
+        # on the page, but defined in a JS object inside a script tag, passed to the player
+        # library.
+        # We do the impossible and RegEx the stream JSON object out of the page's HTML source
+        regex: re.Pattern[str] = re.compile(
+            r"({\"streams\"[\s\S]+?),\s*{\"paella_config_file", re.IGNORECASE
+        )
+        json_match = regex.search(str(self._soup))
+
+        if json_match is None:
+            print(f"Could not find json stream info for {self._page_url!r}")
+            return []
+        json_str = json_match.group(1)
+
+        # parse it
+        json_object = json.loads(json_str)
+        # and fetch the video url!
+        video_url = json_object["streams"][0]["sources"]["mp4"][0]["src"]
+        return [IliasPageElement(IliasElementType.VIDEO, video_url, self._source_name)]
+
+    def _find_video_entries(self) -> List[IliasPageElement]:
+        # ILIAS has three stages for video pages
+        # 1. The initial dummy page without any videos. This page contains the link to the listing
+        # 2. The video listing which might be paginated
+        # 3. An unpaginated video listing (or at least one that includes 800 videos)
+        #
+        # We need to figure out where we are.
+
+        video_element_table: Tag = self._soup.find(
+            name="table", id=re.compile(r"tbl_xoct_.+")
+        )
+
+        if video_element_table is None:
+            # We are in stage 1
+            # The page is actually emtpy but contains the link to stage 2
+            content_link: Tag = self._soup.select_one("#tab_series a")
+            url: str = self._abs_url_from_link(content_link)
+            query_params = {"limit": "800", "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
+            return [IliasPageElement(
+                IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED, url, "", query_parameter=query_params
+            )]
+
+        is_paginated = self._soup.find(id=re.compile(r"tab_page_sel.+")) is not None
+
+        if is_paginated and not self._page_type == IliasElementType.VIDEO_FOLDER:
+            # We are in stage 2 - try to break pagination
+            return self._find_video_entries_paginated()
+
+        return self._find_video_entries_no_paging()
+
+    def _find_video_entries_paginated(self) -> List[IliasPageElement]:
+        table_element: Tag = self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
+
+        if table_element is None:
+            # TODO: Properly log this
+            print(
+                "Could not increase elements per page (table not found)."
+                " Some might not be crawled!"
+            )
+            return self._find_video_entries_no_paging()
+
+        id_match = re.match(r"tbl_xoct_(.+)", table_element.attrs["id"])
+        if id_match is None:
+            # TODO: Properly log this
+            print(
+                "Could not increase elements per page (table id not found)."
+                " Some might not be crawled!"
+            )
+            return self._find_video_entries_no_paging()
+
+        table_id = id_match.group(1)
+
+        query_params = {f"tbl_xoct_{table_id}_trows": "800",
+                        "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
+        return [IliasPageElement(
+            IliasElementType.VIDEO_FOLDER, self._page_url, "", query_parameter=query_params
+        )]
+
+    def _find_video_entries_no_paging(self) -> List[IliasPageElement]:
+        """
+        Crawls the "second stage" video page. This page contains the actual video urls.
+        """
+        # Video start links are marked with an "Abspielen" link
+        video_links: List[Tag] = self._soup.findAll(
+            name="a", text=re.compile(r"\s*Abspielen\s*")
+        )
+
+        results: List[IliasPageElement] = []
+
+        # TODO: Sadly the download button is currently broken, so never do that
+        for link in video_links:
+            results.append(self._listed_video_to_element(link))
+
+        return results
+
+    def _listed_video_to_element(self, link: Tag) -> IliasPageElement:
+        # The link is part of a table with multiple columns, describing metadata.
+        # 6th child (1 indexed) is the modification time string
+        modification_string = link.parent.parent.parent.select_one(
+            "td.std:nth-child(6)"
+        ).getText().strip()
+        modification_time = datetime.strptime(modification_string, "%d.%m.%Y - %H:%M")
+
+        title = link.parent.parent.parent.select_one("td.std:nth-child(3)").getText().strip()
+        title += ".mp4"
+
+        video_name: str = _sanitize_path_name(title)
+
+        video_url = self._abs_url_from_link(link)
+
+        return IliasPageElement(IliasElementType.VIDEO_PLAYER, video_url, video_name, modification_time)
+
+    def _find_normal_entries(self) -> List[IliasPageElement]:
+        result: List[IliasPageElement] = []
+
+        # Fetch all links and throw them to the general interpreter
+        links: List[Tag] = self._soup.select("a.il_ContainerItemTitle")
+
+        for link in links:
+            abs_url = self._abs_url_from_link(link)
+            element_name = _sanitize_path_name(link.getText())
+            element_type = self._find_type_from_link(element_name, link, abs_url)
+
+            if not element_type:
+                continue
+            elif element_type == IliasElementType.MEETING:
+                element_path = _sanitize_path_name(self._normalize_meeting_name(element_name))
+            elif element_type == IliasElementType.FILE:
+                result.append(self._file_to_element(element_name, abs_url, link))
+                continue
+
+            result.append(IliasPageElement(element_type, abs_url, element_name, None))
+
+        return result
+
+    def _file_to_element(self, name: str, url: str, link_element: Tag) -> IliasPageElement:
+        # Files have a list of properties (type, modification date, size, etc.)
+        # In a series of divs.
+        # Find the parent containing all those divs, so we can filter our what we need
+        properties_parent: Tag = link_element.findParent(
+            "div", {"class": lambda x: "il_ContainerListItem" in x}
+        ).select_one(".il_ItemProperties")
+        # The first one is always the filetype
+        file_type = properties_parent.select_one("span.il_ItemProperty").getText().strip()
+
+        # The rest does not have a stable order. Grab the whole text and reg-ex the date
+        # out of it
+        all_properties_text = properties_parent.getText().strip()
+        modification_date_match = re.search(
+            r"(((\d+\. \w+ \d+)|(Gestern|Yesterday)|(Heute|Today)|(Morgen|Tomorrow)), \d+:\d+)",
+            all_properties_text
+        )
+        if modification_date_match is None:
+            modification_date = None
+            # TODO: Properly log this
+            print(f"Could not extract start date from {all_properties_text!r}")
+        else:
+            modification_date_str = modification_date_match.group(1)
+            modification_date = demangle_date(modification_date_str)
+
+        # Grab the name from the link text
+        name = _sanitize_path_name(link_element.getText())
+        full_path = name + "." + file_type
+
+        return IliasPageElement(IliasElementType.FILE, url, full_path, modification_date)
+
+    @staticmethod
+    def _find_type_from_link(
+            element_name: str,
+            link_element: Tag,
+            url: str
+    ) -> Optional[IliasElementType]:
+        """
+        Decides which sub crawler to use for a given top level element.
+        """
+        parsed_url = urlparse(url)
+
+        # file URLs contain "target=file"
+        if "target=file_" in parsed_url.query:
+            return IliasElementType.FILE
+
+        # Skip forums
+        if "cmd=showThreads" in parsed_url.query:
+            return IliasElementType.FORUM
+
+        # Everything with a ref_id can *probably* be opened to reveal nested things
+        # video groups, directories, exercises, etc
+        if "ref_id=" in parsed_url.query:
+            return IliasPage._find_type_from_folder_like(link_element, url)
+
+        # TODO: Log this properly
+        print(f"Unknown type: The element was at {str(element_name)!r} and it is {link_element!r})")
+        return None
+
+    @staticmethod
+    def _find_type_from_folder_like(link_element: Tag, url: str) -> Optional[IliasElementType]:
+        """
+        Try crawling something that looks like a folder.
+        """
+        # pylint: disable=too-many-return-statements
+
+        found_parent: Optional[Tag] = None
+
+        # We look for the outer div of our inner link, to find information around it
+        # (mostly the icon)
+        for parent in link_element.parents:
+            if "ilContainerListItemOuter" in parent["class"]:
+                found_parent = parent
+                break
+
+        if found_parent is None:
+            # TODO: Log this properly
+            print(f"Could not find element icon for {url!r}")
+            return None
+
+        # Find the small descriptive icon to figure out the type
+        img_tag: Optional[Tag] = found_parent.select_one("img.ilListItemIcon")
+
+        if img_tag is None:
+            # TODO: Log this properly
+            print(f"Could not find image tag for {url!r}")
+            return None
+
+        if "opencast" in str(img_tag["alt"]).lower():
+            return IliasElementType.VIDEO_FOLDER
+
+        if str(img_tag["src"]).endswith("icon_exc.svg"):
+            return IliasElementType.EXERCISE
+
+        if str(img_tag["src"]).endswith("icon_webr.svg"):
+            return IliasElementType.LINK
+
+        if str(img_tag["src"]).endswith("frm.svg"):
+            return IliasElementType.FORUM
+
+        if str(img_tag["src"]).endswith("sess.svg"):
+            return IliasElementType.MEETING
+
+        return IliasElementType.FOLDER
+
+    @staticmethod
+    def _normalize_meeting_name(meeting_name: str) -> str:
+        """
+        Normalizes meeting names, which have a relative time as their first part,
+        to their date in ISO format.
+        """
+        date_portion_str = meeting_name.split(" - ")[0]
+        date_portion = demangle_date(date_portion_str)
+
+        if not date_portion:
+            return meeting_name
+
+        rest_of_name = meeting_name
+        if rest_of_name.startswith(date_portion_str):
+            rest_of_name = rest_of_name[len(date_portion_str):]
+
+        return datetime.strftime(date_portion, "%Y-%m-%d, %H:%M") + rest_of_name
+
+    def _abs_url_from_link(self, link_tag: Tag) -> str:
+        """
+        Create an absolute url from an <a> tag.
+        """
+        return urljoin(self._page_url, link_tag.get("href"))
+
+def demangle_date(date_str: str) -> Optional[datetime]:
+    return None
+
+
+def _sanitize_path_name(name: str) -> str:
+    return name.replace("/", "-").replace("\\", "-").strip()
+
+
 class IliasCrawler(HttpCrawler):
     def __init__(
             self,
@@ -64,7 +394,9 @@ class IliasCrawler(HttpCrawler):
     async def crawl(self) -> None:
         async with self.crawl_bar(PurePath("/")) as bar:
             soup = await self._get_page(self._base_url)
-            self.print("[green]Gotcha![/]")
+            page = IliasPage(soup, self._base_url, None)
+            for element in page.get_child_elements():
+                self.print(element.name + " " + str(element.type))
 
     async def _get_page(self, url: str, retries_left: int = 3) -> BeautifulSoup:
         if retries_left < 0:

From c7494e32ce6de3f5b9ab8e717a15bdfd43dbf766 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 15 May 2021 20:42:18 +0200
Subject: [PATCH 126/524] Start implementing crawling in ILIAS crawler

The ilias crawler can now crawl quite a few filetypes, splits off
folders and crawls them concurrently.
---
 PFERD/crawlers/ilias.py | 182 +++++++++++++++++++++++++++++++++-------
 1 file changed, 152 insertions(+), 30 deletions(-)

diff --git a/PFERD/crawlers/ilias.py b/PFERD/crawlers/ilias.py
index 2d9a9c9..39c7184 100644
--- a/PFERD/crawlers/ilias.py
+++ b/PFERD/crawlers/ilias.py
@@ -1,3 +1,4 @@
+import asyncio
 import json
 import re
 from configparser import SectionProxy
@@ -5,8 +6,9 @@ from dataclasses import dataclass, field
 from datetime import datetime
 from enum import Enum
 from pathlib import PurePath
-from typing import Any, Dict, List, Optional
-from urllib.parse import urljoin, urlparse
+from typing import Any, Dict, List, Optional, Set, Union
+from urllib.parse import (parse_qs, urlencode, urljoin, urlparse, urlsplit,
+                          urlunsplit)
 
 import aiohttp
 from bs4 import BeautifulSoup, Tag
@@ -18,23 +20,27 @@ from ..config import Config
 from ..crawler import (Crawler, CrawlerSection, HttpCrawler, anoncritical,
                        arepeat)
 
+TargetType = Union[str, int]
+
 
 class IliasCrawlerSection(CrawlerSection):
 
-    def __init__(self, section: SectionProxy):
-        super().__init__(section)
+    def target(self) -> TargetType:
+        target = self.s.get("target")
+        if not target:
+            self.missing_value("target")
 
-        if not self.course_id() and not self.element_url():
-            self.missing_value("course_id or element_url")
+        if re.fullmatch(r"\d+", target):
+            # Course id
+            return int(target)
+        if target == "desktop":
+            # Full personal desktop
+            return target
+        if target.startswith("https://ilias.studium.kit.edu"):
+            # ILIAS URL
+            return target
 
-    def course_id(self) -> Optional[str]:
-        return self.s.get("course_id")
-
-    def element_url(self) -> Optional[str]:
-        return self.s.get("element_url")
-
-    def base_url(self) -> str:
-        return self.s.get("ilias_url", "https://ilias.studium.kit.edu/")
+        self.invalid_value("target", target, "Should be <course id | desktop | kit ilias URL>")
 
     def tfa_auth(self, authenticators: Dict[str, Authenticator]) -> Optional[Authenticator]:
         value = self.s.get("tfa_auth")
@@ -66,7 +72,6 @@ class IliasPageElement:
     url: str
     name: str
     mtime: Optional[datetime] = None
-    query_parameter: Dict[str, str] = field(default_factory=dict)
 
 
 class IliasPage:
@@ -91,11 +96,17 @@ class IliasPage:
         return "paella_config_file" in str(self._soup)
 
     def _is_video_listing(self) -> bool:
+        # ILIAS fluff around it
         if self._soup.find(id="headerimage"):
             element: Tag = self._soup.find(id="headerimage")
             if "opencast" in element.attrs["src"].lower():
                 return True
-        return False
+
+        # Raw listing without ILIAS fluff
+        video_element_table: Tag = self._soup.find(
+            name="table", id=re.compile(r"tbl_xoct_.+")
+        )
+        return video_element_table is not None
 
     def _player_to_video(self) -> List[IliasPageElement]:
         # Fetch the actual video page. This is a small wrapper page initializing a javscript
@@ -137,9 +148,8 @@ class IliasPage:
             content_link: Tag = self._soup.select_one("#tab_series a")
             url: str = self._abs_url_from_link(content_link)
             query_params = {"limit": "800", "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
-            return [IliasPageElement(
-                IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED, url, "", query_parameter=query_params
-            )]
+            url = _url_set_query_params(url, query_params)
+            return [IliasPageElement(IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED, url, "")]
 
         is_paginated = self._soup.find(id=re.compile(r"tab_page_sel.+")) is not None
 
@@ -173,9 +183,8 @@ class IliasPage:
 
         query_params = {f"tbl_xoct_{table_id}_trows": "800",
                         "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
-        return [IliasPageElement(
-            IliasElementType.VIDEO_FOLDER, self._page_url, "", query_parameter=query_params
-        )]
+        url = _url_set_query_params(self._page_url, query_params)
+        return [IliasPageElement(IliasElementType.VIDEO_FOLDER, url, "")]
 
     def _find_video_entries_no_paging(self) -> List[IliasPageElement]:
         """
@@ -363,6 +372,7 @@ class IliasPage:
         """
         return urljoin(self._page_url, link_tag.get("href"))
 
+
 def demangle_date(date_str: str) -> Optional[datetime]:
     return None
 
@@ -371,6 +381,36 @@ def _sanitize_path_name(name: str) -> str:
     return name.replace("/", "-").replace("\\", "-").strip()
 
 
+def _url_set_query_param(url: str, param: str, value: str) -> str:
+    """
+    Set a query parameter in an url, overwriting existing ones with the same name.
+    """
+    scheme, netloc, path, query, fragment = urlsplit(url)
+    query_parameters = parse_qs(query)
+    query_parameters[param] = [value]
+    new_query_string = urlencode(query_parameters, doseq=True)
+
+    return urlunsplit((scheme, netloc, path, new_query_string, fragment))
+
+
+def _url_set_query_params(url: str, params: Dict[str, str]) -> str:
+    result = url
+
+    for key, val in params.items():
+        result = _url_set_query_param(result, key, val)
+
+    return result
+
+
+_DIRECTORY_PAGES: Set[IliasElementType] = set([
+    IliasElementType.EXERCISE,
+    IliasElementType.FOLDER,
+    IliasElementType.MEETING,
+    IliasElementType.VIDEO_FOLDER,
+    IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED,
+])
+
+
 class IliasCrawler(HttpCrawler):
     def __init__(
             self,
@@ -386,22 +426,104 @@ class IliasCrawler(HttpCrawler):
             section.auth(authenticators),
             section.tfa_auth(authenticators)
         )
-        self._base_url = section.base_url()
+        self._base_url = "https://ilias.studium.kit.edu"
 
-        self._course_id = section.course_id()
-        self._element_url = section.element_url()
+        self._target = section.target()
 
     async def crawl(self) -> None:
-        async with self.crawl_bar(PurePath("/")) as bar:
-            soup = await self._get_page(self._base_url)
-            page = IliasPage(soup, self._base_url, None)
-            for element in page.get_child_elements():
-                self.print(element.name + " " + str(element.type))
+        if isinstance(self._target, int):
+            await self._crawl_course(self._target)
+        elif self._target == "desktop":
+            await self._crawl_desktop()
+        else:
+            await self._crawl_url(self._target)
+
+    async def _crawl_course(self, course_id: int) -> None:
+        # Start crawling at the given course
+        root_url = _url_set_query_param(
+            self._base_url + "/goto.php", "target", f"crs_{course_id}"
+        )
+
+        await self._crawl_url(root_url, expected_id=course_id)
+
+    async def _crawl_desktop(self) -> None:
+        await self._crawl_url(self._base_url)
+
+    async def _crawl_url(self, url: str, expected_id: Optional[int] = None) -> None:
+        tasks = []
+
+        async with self.crawl_bar(PurePath("Root element")):
+            soup = await self._get_page(url)
+
+            if expected_id is not None:
+                perma_link_element: Tag = soup.find(id="current_perma_link")
+                if not perma_link_element or "crs_" not in perma_link_element.get("value"):
+                    # TODO: Properly handle error
+                    raise RuntimeError(
+                        "Invalid course id? I didn't find anything looking like a course!")
+
+            # Duplicated code, but the root page is special - we want to void fetching it twice!
+            page = IliasPage(soup, url, None)
+            for child in page.get_child_elements():
+                tasks.append(self._handle_ilias_element(PurePath("."), child))
+        await asyncio.gather(*tasks)
+
+    async def _handle_ilias_page(self, url: str, parent: IliasPageElement, path: PurePath) -> None:
+        tasks = []
+        async with self.crawl_bar(path):
+            soup = await self._get_page(url)
+            page = IliasPage(soup, url, parent)
+
+            for child in page.get_child_elements():
+                tasks.append(self._handle_ilias_element(path, child))
+
+        await asyncio.gather(*tasks)
+
+    async def _handle_ilias_element(self, parent_path: PurePath, element: IliasPageElement) -> None:
+        element_path = PurePath(parent_path, element.name)
+
+        if element.type == IliasElementType.FILE:
+            await self._download_element(element, element_path)
+        elif element.type == IliasElementType.FORUM:
+            # TODO: Delete
+            self.print(f"Skipping forum [green]{element_path}[/]")
+        elif element.type == IliasElementType.LINK:
+            # TODO: Write in meta-redirect file
+            self.print(f"Skipping link [green]{element_path}[/]")
+        elif element.type == IliasElementType.VIDEO:
+            await self._download_element(element, element_path)
+        elif element.type == IliasElementType.VIDEO_PLAYER:
+            # FIXME: Check if we should look at this and if not bail out already!
+            # This saves us a request for each video, if we skip them anyways
+            raise RuntimeError("IMPLEMENT ME")
+        elif element.type in _DIRECTORY_PAGES:
+            await self._handle_ilias_page(element.url, element, element_path)
+        else:
+            # TODO: Proper exception
+            raise RuntimeError(f"Unknown type: {element.type!r}")
+
+    async def _download_element(self, element: IliasPageElement, element_path: PurePath) -> None:
+        dl = await self.download(element_path, mtime=element.mtime)
+        if not dl:
+            return
+
+        async with self.download_bar(element_path) as bar, dl as sink,\
+                self.session.get(element.url) as resp:
+
+            if resp.content_length:
+                bar.set_total(resp.content_length)
+
+            async for data in resp.content.iter_chunked(1024):
+                sink.file.write(data)
+                bar.advance(len(data))
+
+            sink.done()
 
     async def _get_page(self, url: str, retries_left: int = 3) -> BeautifulSoup:
         if retries_left < 0:
             # TODO: Proper exception
             raise RuntimeError("Get page failed too often")
+        print(url)
         async with self.session.get(url) as request:
             soup = soupify(await request.read())
             if self._is_logged_in(soup):

From 7d323ec62b661c4d3b90460af2f87d200f63047a Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 15 May 2021 21:29:43 +0200
Subject: [PATCH 127/524] Implement video downloads in ilias crawler

---
 PFERD/crawlers/ilias.py | 55 +++++++++++++++++++++++++++++------------
 1 file changed, 39 insertions(+), 16 deletions(-)

diff --git a/PFERD/crawlers/ilias.py b/PFERD/crawlers/ilias.py
index 39c7184..2f3920c 100644
--- a/PFERD/crawlers/ilias.py
+++ b/PFERD/crawlers/ilias.py
@@ -6,12 +6,14 @@ from dataclasses import dataclass, field
 from datetime import datetime
 from enum import Enum
 from pathlib import PurePath
-from typing import Any, Dict, List, Optional, Set, Union
+# TODO In Python 3.9 and above, AsyncContextManager is deprecated
+from typing import Any, AsyncContextManager, Dict, List, Optional, Set, Union
 from urllib.parse import (parse_qs, urlencode, urljoin, urlparse, urlsplit,
                           urlunsplit)
 
 import aiohttp
 from bs4 import BeautifulSoup, Tag
+from PFERD.output_dir import Redownload
 from PFERD.utils import soupify
 
 from ..authenticators import Authenticator
@@ -19,6 +21,7 @@ from ..conductor import TerminalConductor
 from ..config import Config
 from ..crawler import (Crawler, CrawlerSection, HttpCrawler, anoncritical,
                        arepeat)
+from ..output_dir import FileSink
 
 TargetType = Union[str, int]
 
@@ -438,6 +441,9 @@ class IliasCrawler(HttpCrawler):
         else:
             await self._crawl_url(self._target)
 
+        if self.error_free:
+            await self.cleanup()
+
     async def _crawl_course(self, course_id: int) -> None:
         # Start crawling at the given course
         root_url = _url_set_query_param(
@@ -483,7 +489,7 @@ class IliasCrawler(HttpCrawler):
         element_path = PurePath(parent_path, element.name)
 
         if element.type == IliasElementType.FILE:
-            await self._download_element(element, element_path)
+            await self._download_file(element, element_path)
         elif element.type == IliasElementType.FORUM:
             # TODO: Delete
             self.print(f"Skipping forum [green]{element_path}[/]")
@@ -491,33 +497,50 @@ class IliasCrawler(HttpCrawler):
             # TODO: Write in meta-redirect file
             self.print(f"Skipping link [green]{element_path}[/]")
         elif element.type == IliasElementType.VIDEO:
-            await self._download_element(element, element_path)
+            await self._download_file(element, element_path)
         elif element.type == IliasElementType.VIDEO_PLAYER:
-            # FIXME: Check if we should look at this and if not bail out already!
-            # This saves us a request for each video, if we skip them anyways
-            raise RuntimeError("IMPLEMENT ME")
+            await self._download_video(element, element_path)
         elif element.type in _DIRECTORY_PAGES:
             await self._handle_ilias_page(element.url, element, element_path)
         else:
             # TODO: Proper exception
             raise RuntimeError(f"Unknown type: {element.type!r}")
 
-    async def _download_element(self, element: IliasPageElement, element_path: PurePath) -> None:
+    async def _download_video(self, element: IliasPageElement, element_path: PurePath) -> None:
+        # Videos will NOT be redownloaded - their content doesn't really change and they are chunky
+        dl = await self.download(element_path, mtime=element.mtime, redownload=Redownload.NEVER)
+        if not dl:
+            return
+
+        async with self.download_bar(element_path) as bar:
+            page = IliasPage(await self._get_page(element.url), element.url, element)
+            real_element = page.get_child_elements()[0]
+
+            async with dl as sink, self.session.get(element.url) as resp:
+                if resp.content_length:
+                    bar.set_total(resp.content_length)
+
+                async for data in resp.content.iter_chunked(1024):
+                    sink.file.write(data)
+                    bar.advance(len(data))
+
+                sink.done()
+
+    async def _download_file(self, element: IliasPageElement, element_path: PurePath) -> None:
         dl = await self.download(element_path, mtime=element.mtime)
         if not dl:
             return
 
-        async with self.download_bar(element_path) as bar, dl as sink,\
-                self.session.get(element.url) as resp:
+        async with self.download_bar(element_path) as bar:
+            async with dl as sink, self.session.get(element.url) as resp:
+                if resp.content_length:
+                    bar.set_total(resp.content_length)
 
-            if resp.content_length:
-                bar.set_total(resp.content_length)
+                async for data in resp.content.iter_chunked(1024):
+                    sink.file.write(data)
+                    bar.advance(len(data))
 
-            async for data in resp.content.iter_chunked(1024):
-                sink.file.write(data)
-                bar.advance(len(data))
-
-            sink.done()
+                sink.done()
 
     async def _get_page(self, url: str, retries_left: int = 3) -> BeautifulSoup:
         if retries_left < 0:

From c454fabc9db22f8389f3a35c951a2e15bfaee39e Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 15 May 2021 21:40:17 +0200
Subject: [PATCH 128/524] Add support for exercises in ILIAS crawler

---
 PFERD/crawlers/ilias.py | 49 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/PFERD/crawlers/ilias.py b/PFERD/crawlers/ilias.py
index 2f3920c..e52d329 100644
--- a/PFERD/crawlers/ilias.py
+++ b/PFERD/crawlers/ilias.py
@@ -93,6 +93,8 @@ class IliasPage:
             return self._player_to_video()
         if self._is_video_listing():
             return self._find_video_entries()
+        if self._is_exercise_file():
+            return self._find_exercise_entries()
         return self._find_normal_entries()
 
     def _is_video_player(self) -> bool:
@@ -111,6 +113,19 @@ class IliasPage:
         )
         return video_element_table is not None
 
+    def _is_exercise_file(self) -> bool:
+        # we know it from before
+        if self._page_type == IliasElementType.EXERCISE:
+            return True
+
+        # We have no suitable parent - let's guesss
+        if self._soup.find(id="headerimage"):
+            element: Tag = self._soup.find(id="headerimage")
+            if "exc" in element.attrs["src"].lower():
+                return True
+
+        return False
+
     def _player_to_video(self) -> List[IliasPageElement]:
         # Fetch the actual video page. This is a small wrapper page initializing a javscript
         # player. Sadly we can not execute that JS. The actual video stream url is nowhere
@@ -223,6 +238,40 @@ class IliasPage:
 
         return IliasPageElement(IliasElementType.VIDEO_PLAYER, video_url, video_name, modification_time)
 
+    def _find_exercise_entries(self) -> List[IliasPageElement]:
+        results: List[IliasPageElement] = []
+
+        # Each assignment is in an accordion container
+        assignment_containers: List[Tag] = self._soup.select(".il_VAccordionInnerContainer")
+
+        for container in assignment_containers:
+            # Fetch the container name out of the header to use it in the path
+            container_name = container.select_one(".ilAssignmentHeader").getText().strip()
+            # Find all download links in the container (this will contain all the files)
+            files: List[Tag] = container.findAll(
+                name="a",
+                # download links contain the given command class
+                attrs={"href": lambda x: x and "cmdClass=ilexsubmissiongui" in x},
+                text="Download"
+            )
+
+            # Grab each file as you now have the link
+            for file_link in files:
+                # Two divs, side by side. Left is the name, right is the link ==> get left
+                # sibling
+                file_name = file_link.parent.findPrevious(name="div").getText().strip()
+                file_name = _sanitize_path_name(file_name)
+                url = self._abs_url_from_link(file_link)
+
+                results.append(IliasPageElement(
+                    IliasElementType.FILE,
+                    url,
+                    container_name + "/" + file_name,
+                    None  # We do not have any timestamp
+                ))
+
+        return results
+
     def _find_normal_entries(self) -> List[IliasPageElement]:
         result: List[IliasPageElement] = []
 

From 05573ccc53cf4a9e446bf5e010c263670d3002f5 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 15 May 2021 21:33:51 +0200
Subject: [PATCH 129/524] Add fancy CLI options

---
 PFERD/__main__.py   | 233 +++++++++++++++++++++++++++++++++++++++-----
 PFERD/config.py     |  13 +--
 PFERD/crawler.py    |  44 +++------
 PFERD/output_dir.py |  16 +++
 4 files changed, 250 insertions(+), 56 deletions(-)

diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index a16b19b..5815f40 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -1,40 +1,229 @@
 import argparse
 import asyncio
+import configparser
 from pathlib import Path
 
 from .config import Config, ConfigDumpException, ConfigLoadException
+from .output_dir import OnConflict, Redownload
 from .pferd import Pferd
 
+GENERAL_PARSER = argparse.ArgumentParser(add_help=False)
+GENERAL_PARSER.add_argument(
+    "--config", "-c",
+    type=Path,
+    metavar="PATH",
+    help="custom config file"
+)
+GENERAL_PARSER.add_argument(
+    "--dump-config",
+    nargs="?",
+    const=True,
+    metavar="PATH",
+    help="dump current configuration to a file and exit."
+    " Uses default config file path if no path is specified"
+)
+GENERAL_PARSER.add_argument(
+    "--crawler",
+    action="append",
+    type=str,
+    metavar="NAME",
+    help="only execute a single crawler."
+    " Can be specified multiple times to execute multiple crawlers"
+)
+GENERAL_PARSER.add_argument(
+    "--working-dir",
+    type=Path,
+    metavar="PATH",
+    help="custom working directory"
+)
+
+
+def load_general(
+        args: argparse.Namespace,
+        parser: configparser.ConfigParser,
+) -> None:
+    section = parser[parser.default_section]
+
+    if args.working_dir is not None:
+        section["working_dir"] = str(args.working_dir)
+
+
+CRAWLER_PARSER = argparse.ArgumentParser(add_help=False)
+CRAWLER_PARSER_GROUP = CRAWLER_PARSER.add_argument_group(
+    title="general crawler arguments",
+    description="arguments common to all crawlers",
+)
+CRAWLER_PARSER_GROUP.add_argument(
+    "--redownload",
+    type=Redownload.from_string,
+    metavar="OPTION",
+    help="when to redownload a file that's already present locally"
+)
+CRAWLER_PARSER_GROUP.add_argument(
+    "--on-conflict",
+    type=OnConflict.from_string,
+    metavar="OPTION",
+    help="what to do when local and remote files or directories differ"
+)
+CRAWLER_PARSER_GROUP.add_argument(
+    "--transform", "-t",
+    action="append",
+    type=str,
+    metavar="RULE",
+    help="add a single transformation rule. Can be specified multiple times"
+)
+CRAWLER_PARSER_GROUP.add_argument(
+    "--max-concurrent-tasks",
+    type=int,
+    metavar="N",
+    help="maximum number of concurrent tasks (crawling, downloading)"
+)
+CRAWLER_PARSER_GROUP.add_argument(
+    "--max-concurrent-downloads",
+    type=int,
+    metavar="N",
+    help="maximum number of tasks that may download data at the same time"
+)
+CRAWLER_PARSER_GROUP.add_argument(
+    "--delay-between-tasks",
+    type=float,
+    metavar="SECONDS",
+    help="time the crawler should wait between subsequent tasks"
+)
+
+
+def load_crawler(
+        args: argparse.Namespace,
+        section: configparser.SectionProxy,
+) -> None:
+    if args.redownload is not None:
+        section["redownload"] = args.redownload.value
+    if args.on_conflict is not None:
+        section["on_conflict"] = args.on_conflict.value
+    if args.transform is not None:
+        section["transform"] = "\n" + "\n".join(args.transform)
+    if args.max_concurrent_tasks is not None:
+        section["max_concurrent_tasks"] = str(args.max_concurrent_tasks)
+    if args.max_concurrent_downloads is not None:
+        section["max_concurrent_downloads"] = str(args.max_concurrent_downloads)
+    if args.delay_between_tasks is not None:
+        section["delay_between_tasks"] = str(args.delay_between_tasks)
+
+
+PARSER = argparse.ArgumentParser(parents=[GENERAL_PARSER])
+PARSER.set_defaults(command=None)
+SUBPARSERS = PARSER.add_subparsers(title="crawlers")
+
+
+LOCAL_CRAWLER = SUBPARSERS.add_parser(
+    "local",
+    parents=[GENERAL_PARSER, CRAWLER_PARSER],
+)
+LOCAL_CRAWLER.set_defaults(command="local")
+LOCAL_CRAWLER_GROUP = LOCAL_CRAWLER.add_argument_group(
+    title="local crawler arguments",
+    description="arguments for the 'local' crawler",
+)
+LOCAL_CRAWLER_GROUP.add_argument(
+    "target",
+    type=Path,
+    metavar="TARGET",
+    help="directory to crawl"
+)
+LOCAL_CRAWLER_GROUP.add_argument(
+    "output",
+    type=Path,
+    metavar="OUTPUT",
+    help="output directory"
+)
+LOCAL_CRAWLER_GROUP.add_argument(
+    "--crawl-delay",
+    type=float,
+    metavar="SECONDS",
+    help="artificial delay to simulate for crawl requests"
+)
+LOCAL_CRAWLER_GROUP.add_argument(
+    "--download-delay",
+    type=float,
+    metavar="SECONDS",
+    help="artificial delay to simulate for download requests"
+)
+LOCAL_CRAWLER_GROUP.add_argument(
+    "--download-speed",
+    type=int,
+    metavar="BYTES_PER_SECOND",
+    help="download speed to simulate"
+)
+
+
+def load_local_crawler(
+        args: argparse.Namespace,
+        parser: configparser.ConfigParser,
+) -> None:
+    parser["crawl:local"] = {}
+    section = parser["crawl:local"]
+    load_crawler(args, section)
+
+    section["type"] = "local"
+    section["target"] = str(args.target)
+    section["output_dir"] = str(args.output)
+    if args.crawl_delay is not None:
+        section["crawl_delay"] = str(args.crawl_delay)
+    if args.download_delay is not None:
+        section["download_delay"] = str(args.download_delay)
+    if args.download_speed is not None:
+        section["download_speed"] = str(args.download_speed)
+
+
+def load_parser(
+        args: argparse.Namespace,
+) -> configparser.ConfigParser:
+    parser = configparser.ConfigParser()
+
+    if args.command is None:
+        Config.load_parser(parser, path=args.config)
+    elif args.command == "local":
+        load_local_crawler(args, parser)
+
+    load_general(args, parser)
+    prune_crawlers(args, parser)
+
+    return parser
+
+
+def prune_crawlers(
+        args: argparse.Namespace,
+        parser: configparser.ConfigParser,
+) -> None:
+    if not args.crawler:
+        return
+
+    for section in parser.sections():
+        if section.startswith("crawl:"):
+            # TODO Use removeprefix() when switching to 3.9
+            name = section[len("crawl:"):]
+            if name not in args.crawler:
+                parser.remove_section(section)
+
+    # TODO Check if crawlers actually exist
+
 
 def main() -> None:
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--config", "-c",
-        type=Path,
-        metavar="PATH",
-        help="specify custom config file path",
-    )
-    parser.add_argument(
-        "--dump-config",
-        nargs="?",
-        const=True,
-        type=Path,
-        metavar="PATH",
-        help="dump current configuration to a file and exit."
-        " Uses default config file path if no path is specified",
-    )
-    args = parser.parse_args()
+    args = PARSER.parse_args()
 
     try:
-        config_parser = Config.load_parser(args.config)
-        config = Config(config_parser)
+        config = Config(load_parser(args))
     except ConfigLoadException:
         exit(1)
 
-    if args.dump_config:
-        path = None if args.dump_config is True else args.dump_config
+    if args.dump_config is not None:
         try:
-            config.dump(path)
+            if args.dump_config is True:
+                config.dump()
+            elif args.dump_config == "-":
+                config.dump_to_stdout()
+            else:
+                config.dump(Path(args.dump_config))
         except ConfigDumpException:
             exit(1)
         exit()
diff --git a/PFERD/config.py b/PFERD/config.py
index 7a7e832..7fe5d9e 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -1,4 +1,6 @@
+import asyncio
 import os
+import sys
 from configparser import ConfigParser, SectionProxy
 from dataclasses import dataclass
 from pathlib import Path
@@ -68,7 +70,7 @@ class Config:
         raise ConfigLoadException()
 
     @staticmethod
-    def load_parser(path: Optional[Path] = None) -> ConfigParser:
+    def load_parser(parser: ConfigParser, path: Optional[Path] = None) -> None:
         """
         May throw a ConfigLoadException.
         """
@@ -76,8 +78,6 @@ class Config:
         if not path:
             path = Config._default_path()
 
-        parser = ConfigParser()
-
         # Using config.read_file instead of config.read because config.read
         # would just ignore a missing file and carry on.
         try:
@@ -90,8 +90,6 @@ class Config:
         except PermissionError:
             Config._fail_load(path, "Insufficient permissions")
 
-        return parser
-
     @staticmethod
     def _fail_dump(path: Path, reason: str) -> None:
         print(f"Failed to dump config file to {path}")
@@ -123,7 +121,7 @@ class Config:
                     self._parser.write(f)
             except FileExistsError:
                 print("That file already exists.")
-                if prompt_yes_no("Overwrite it?", default=False):
+                if asyncio.run(prompt_yes_no("Overwrite it?", default=False)):
                     with open(path, "w") as f:
                         self._parser.write(f)
                 else:
@@ -133,6 +131,9 @@ class Config:
         except PermissionError:
             self._fail_dump(path, "Insufficient permissions")
 
+    def dump_to_stdout(self) -> None:
+        self._parser.write(sys.stdout)
+
     @property
     def default_section(self) -> SectionProxy:
         return self._parser[self._parser.default_section]
diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index 4148614..140ae20 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -117,37 +117,25 @@ class CrawlerSection(Section):
 
     def redownload(self) -> Redownload:
         value = self.s.get("redownload", "never-smart")
-        if value == "never":
-            return Redownload.NEVER
-        elif value == "never-smart":
-            return Redownload.NEVER_SMART
-        elif value == "always":
-            return Redownload.ALWAYS
-        elif value == "always-smart":
-            return Redownload.ALWAYS_SMART
-
-        self.invalid_value(
-            "redownload",
-            value,
-            "Must be 'never', 'never-smart', 'always' or 'always-smart'"
-        )
+        try:
+            return Redownload.from_string(value)
+        except ValueError as e:
+            self.invalid_value(
+                "redownload",
+                value,
+                str(e).capitalize(),
+            )
 
     def on_conflict(self) -> OnConflict:
         value = self.s.get("on_conflict", "prompt")
-        if value == "prompt":
-            return OnConflict.PROMPT
-        elif value == "local-first":
-            return OnConflict.LOCAL_FIRST
-        elif value == "remote-first":
-            return OnConflict.REMOTE_FIRST
-        elif value == "no-delete":
-            return OnConflict.NO_DELETE
-
-        self.invalid_value(
-            "on_conflict",
-            value,
-            "Must be 'prompt', 'local-first', 'remote-first' or 'no-delete'",
-        )
+        try:
+            return OnConflict.from_string(value)
+        except ValueError as e:
+            self.invalid_value(
+                "on_conflict",
+                value,
+                str(e).capitalize(),
+            )
 
     def transform(self) -> str:
         return self.s.get("transform", "")
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 89c5839..4f5f708 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -32,6 +32,14 @@ class Redownload(Enum):
     ALWAYS = "always"
     ALWAYS_SMART = "always-smart"
 
+    @staticmethod
+    def from_string(string: str) -> "Redownload":
+        try:
+            return Redownload(string)
+        except ValueError:
+            raise ValueError("must be one of 'never', 'never-smart',"
+                             " 'always', 'always-smart'")
+
 
 class OnConflict(Enum):
     PROMPT = "prompt"
@@ -39,6 +47,14 @@ class OnConflict(Enum):
     REMOTE_FIRST = "remote-first"
     NO_DELETE = "no-delete"
 
+    @staticmethod
+    def from_string(string: str) -> "OnConflict":
+        try:
+            return OnConflict(string)
+        except ValueError:
+            raise ValueError("must be one of 'prompt', 'local-first',"
+                             " 'remote-first', 'no-delete'")
+
 
 @dataclass
 class Heuristics:

From 989032fe0c3b90aa5c034657ade7df54b1b2016f Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 15 May 2021 22:25:41 +0200
Subject: [PATCH 130/524] Fix cookies getting deleted

---
 PFERD/output_dir.py |  2 +-
 PFERD/report.py     | 10 +++++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 4f5f708..fa0944b 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -378,7 +378,7 @@ class OutputDirectory:
             pass
 
     async def _cleanup_file(self, path: Path, pure: PurePath) -> None:
-        if self._report.marked(pure):
+        if self._report.is_marked(pure):
             return
 
         if await self._conflict_delete_lf(self._on_conflict, pure):
diff --git a/PFERD/report.py b/PFERD/report.py
index 2c7d8af..1c46216 100644
--- a/PFERD/report.py
+++ b/PFERD/report.py
@@ -62,7 +62,7 @@ class Report:
         detail, see the respective exception's docstring.
         """
 
-        for other in self.known_files & self.reserved_files:
+        for other in self.marked:
             if path == other:
                 raise MarkDuplicateException(path)
 
@@ -71,8 +71,12 @@ class Report:
 
         self.known_files.add(path)
 
-    def marked(self, path: PurePath) -> bool:
-        return path in self.known_files
+    @property
+    def marked(self) -> Set[PurePath]:
+        return self.known_files | self.reserved_files
+
+    def is_marked(self, path: PurePath) -> bool:
+        return path in self.marked
 
     def add_file(self, path: PurePath) -> None:
         """

From 9fd356d29044ac4b9a3ad36c464601048338d0b1 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 15 May 2021 23:00:40 +0200
Subject: [PATCH 131/524] Ensure tmp files are deleted

This doesn't seem to fix the case where an exception bubbles up to the top of
the event loop. It also doesn't seem to fix the case when a KeyboardInterrupt is
thrown, since that never makes its way into the event loop in the first place.

Both of these cases lead to the event loop stopping, which means that the tmp
file cleanup doesn't get executed even though it's inside a "with" or "finally".
---
 PFERD/output_dir.py | 59 +++++++++++++++++++++++++--------------------
 1 file changed, 33 insertions(+), 26 deletions(-)

diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index fa0944b..23d4a31 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -3,13 +3,14 @@ import os
 import random
 import shutil
 import string
-from contextlib import asynccontextmanager
+from contextlib import asynccontextmanager, contextmanager
 from dataclasses import dataclass
 from datetime import datetime
 from enum import Enum
 from pathlib import Path, PurePath
 # TODO In Python 3.9 and above, AsyncContextManager is deprecated
-from typing import AsyncContextManager, AsyncIterator, BinaryIO, Optional
+from typing import (AsyncContextManager, AsyncIterator, BinaryIO, Iterator,
+                    Optional)
 
 from rich.markup import escape
 
@@ -327,36 +328,42 @@ class OutputDirectory:
             mtimestamp = mtime.timestamp()
             os.utime(info.local_path, times=(mtimestamp, mtimestamp))
 
+    @contextmanager
+    def _ensure_deleted(self, path: Path) -> Iterator[None]:
+        try:
+            yield
+        finally:
+            path.unlink(missing_ok=True)
+
     async def _after_download(self, info: DownloadInfo) -> None:
-        changed = False
+        with self._ensure_deleted(info.tmp_path):
+            changed = False
 
-        if not info.success:
-            info.tmp_path.unlink()
-            return
-
-        # Solve conflicts arising from existing local file
-        if info.local_path.exists():
-            changed = True
-            if filecmp.cmp(info.local_path, info.tmp_path):
-                self._update_metadata(info)
-                info.tmp_path.unlink()
+            if not info.success:
                 return
 
-            if not await self._conflict_lfrf(info.on_conflict, info.path):
-                info.tmp_path.unlink()
-                return
+            # Solve conflicts arising from existing local file
+            if info.local_path.exists():
+                changed = True
 
-        info.tmp_path.replace(info.local_path)
-        self._update_metadata(info)
+                if filecmp.cmp(info.local_path, info.tmp_path):
+                    self._update_metadata(info)
+                    return
 
-        if changed:
-            self._conductor.print(
-                f"[bold bright_yellow]Changed[/] {escape(str(info.path))}")
-            self._report.change_file(info.path)
-        else:
-            self._conductor.print(
-                f"[bold bright_green]Added[/] {escape(str(info.path))}")
-            self._report.add_file(info.path)
+                if not await self._conflict_lfrf(info.on_conflict, info.path):
+                    return
+
+            info.tmp_path.replace(info.local_path)
+            self._update_metadata(info)
+
+            if changed:
+                self._conductor.print(
+                    f"[bold bright_yellow]Changed[/] {escape(str(info.path))}")
+                self._report.change_file(info.path)
+            else:
+                self._conductor.print(
+                    f"[bold bright_green]Added[/] {escape(str(info.path))}")
+                self._report.add_file(info.path)
 
     async def cleanup(self) -> None:
         await self._cleanup_dir(self._root, PurePath())

From cf6903d109fead73a622351f98d24b05d013e93a Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 15 May 2021 22:46:26 +0200
Subject: [PATCH 132/524] Retry crawling on I/O failure

---
 PFERD/crawlers/ilias.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/PFERD/crawlers/ilias.py b/PFERD/crawlers/ilias.py
index e52d329..b3190c6 100644
--- a/PFERD/crawlers/ilias.py
+++ b/PFERD/crawlers/ilias.py
@@ -596,12 +596,15 @@ class IliasCrawler(HttpCrawler):
             # TODO: Proper exception
             raise RuntimeError("Get page failed too often")
         print(url)
-        async with self.session.get(url) as request:
-            soup = soupify(await request.read())
-            if self._is_logged_in(soup):
-                return soup
+        try:
+            async with self.session.get(url) as request:
+                soup = soupify(await request.read())
+                if self._is_logged_in(soup):
+                    return soup
 
-        await self._shibboleth_login.login(self.session)
+            await self._shibboleth_login.login(self.session)
+        except Exception:
+            return await self._get_page(url, retries_left - 1)
 
         return await self._get_page(url, retries_left - 1)
 

From 9ec0d3e16ac756e0ce5913f6a1bb30add1985e1f Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 16 May 2021 11:54:42 +0200
Subject: [PATCH 133/524] Implement date-demangling in ILIAS crawler

---
 PFERD/crawlers/ilias.py | 50 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 48 insertions(+), 2 deletions(-)

diff --git a/PFERD/crawlers/ilias.py b/PFERD/crawlers/ilias.py
index b3190c6..18d33ff 100644
--- a/PFERD/crawlers/ilias.py
+++ b/PFERD/crawlers/ilias.py
@@ -3,7 +3,7 @@ import json
 import re
 from configparser import SectionProxy
 from dataclasses import dataclass, field
-from datetime import datetime
+from datetime import date, datetime, timedelta
 from enum import Enum
 from pathlib import PurePath
 # TODO In Python 3.9 and above, AsyncContextManager is deprecated
@@ -424,9 +424,55 @@ class IliasPage:
         """
         return urljoin(self._page_url, link_tag.get("href"))
 
+german_months = ['Jan', 'Feb', 'Mär', 'Apr', 'Mai', 'Jun', 'Jul', 'Aug', 'Sep', 'Okt', 'Nov', 'Dez']
+english_months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
 
 def demangle_date(date_str: str) -> Optional[datetime]:
-    return None
+    """
+    Demangle a given date in one of the following formats:
+    "Gestern, HH:MM"
+    "Heute, HH:MM"
+    "Morgen, HH:MM"
+    "dd. mon yyyy, HH:MM
+    """
+    try:
+        date_str = re.sub(r"\s+", " ", date_str)
+        date_str = re.sub("Gestern|Yesterday", _format_date_english(_yesterday()), date_str, re.I)
+        date_str = re.sub("Heute|Today", _format_date_english(date.today()), date_str, re.I)
+        date_str = re.sub("Morgen|Tomorrow",  _format_date_english(_tomorrow()), date_str, re.I)
+        for german, english in zip(german_months, english_months):
+            date_str = date_str.replace(german, english)
+            # Remove trailing dots for abbreviations, e.g. "20. Apr. 2020" -> "20. Apr 2020"
+            date_str = date_str.replace(english + ".", english)
+
+        # We now have a nice english String in the format: "dd. mmm yyyy, hh:mm"
+        day_part, time_part = date_str.split(",")
+        day_str, month_str, year_str = day_part.split(" ")
+
+        day = int(day_str.strip().replace(".", ""))
+        month = english_months.index(month_str.strip()) + 1
+        year = int(year_str.strip())
+
+        hour_str, minute_str = time_part.split(":")
+        hour = int(hour_str)
+        minute = int(minute_str)
+
+        return datetime(year, month, day, hour, minute)
+    except Exception:
+        # TODO: Properly log this
+        print(f"Could not parse date {date_str!r}")
+        return None
+
+def _format_date_english(date: date) -> str:
+    month = english_months[date.month - 1]
+    return f"{date.day:02d}. {month} {date.year:04d}"
+
+def _yesterday() -> date:
+    return date.today() - timedelta(days=1)
+
+
+def _tomorrow() -> date:
+    return date.today() + timedelta(days=1)
 
 
 def _sanitize_path_name(name: str) -> str:

From 1c226c31aae2e4eeac28eb0a8238485b7854098c Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 16 May 2021 13:01:30 +0200
Subject: [PATCH 134/524] Add some repeat annotations to the ILIAS crawler

---
 PFERD/crawlers/ilias.py | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/PFERD/crawlers/ilias.py b/PFERD/crawlers/ilias.py
index 18d33ff..3f09789 100644
--- a/PFERD/crawlers/ilias.py
+++ b/PFERD/crawlers/ilias.py
@@ -550,6 +550,7 @@ class IliasCrawler(HttpCrawler):
     async def _crawl_desktop(self) -> None:
         await self._crawl_url(self._base_url)
 
+    @arepeat(3)
     async def _crawl_url(self, url: str, expected_id: Optional[int] = None) -> None:
         tasks = []
 
@@ -567,8 +568,11 @@ class IliasCrawler(HttpCrawler):
             page = IliasPage(soup, url, None)
             for child in page.get_child_elements():
                 tasks.append(self._handle_ilias_element(PurePath("."), child))
+
         await asyncio.gather(*tasks)
 
+    @arepeat(3)
+    @anoncritical
     async def _handle_ilias_page(self, url: str, parent: IliasPageElement, path: PurePath) -> None:
         tasks = []
         async with self.crawl_bar(path):
@@ -580,6 +584,7 @@ class IliasCrawler(HttpCrawler):
 
         await asyncio.gather(*tasks)
 
+    @anoncritical
     async def _handle_ilias_element(self, parent_path: PurePath, element: IliasPageElement) -> None:
         element_path = PurePath(parent_path, element.name)
 
@@ -601,6 +606,7 @@ class IliasCrawler(HttpCrawler):
             # TODO: Proper exception
             raise RuntimeError(f"Unknown type: {element.type!r}")
 
+    @arepeat(3)
     async def _download_video(self, element: IliasPageElement, element_path: PurePath) -> None:
         # Videos will NOT be redownloaded - their content doesn't really change and they are chunky
         dl = await self.download(element_path, mtime=element.mtime, redownload=Redownload.NEVER)
@@ -621,6 +627,7 @@ class IliasCrawler(HttpCrawler):
 
                 sink.done()
 
+    @arepeat(3)
     async def _download_file(self, element: IliasPageElement, element_path: PurePath) -> None:
         dl = await self.download(element_path, mtime=element.mtime)
         if not dl:
@@ -638,19 +645,18 @@ class IliasCrawler(HttpCrawler):
                 sink.done()
 
     async def _get_page(self, url: str, retries_left: int = 3) -> BeautifulSoup:
+        # This function will retry itself a few times if it is not logged in - it won't handle
+        # connection errors
         if retries_left < 0:
             # TODO: Proper exception
             raise RuntimeError("Get page failed too often")
         print(url)
-        try:
-            async with self.session.get(url) as request:
-                soup = soupify(await request.read())
-                if self._is_logged_in(soup):
-                    return soup
+        async with self.session.get(url) as request:
+            soup = soupify(await request.read())
+            if self._is_logged_in(soup):
+                return soup
 
-            await self._shibboleth_login.login(self.session)
-        except Exception:
-            return await self._get_page(url, retries_left - 1)
+        await self._shibboleth_login.login(self.session)
 
         return await self._get_page(url, retries_left - 1)
 

From 5ccb17622e0988a7cc21fe4559041b1b20a92771 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 16 May 2021 13:01:41 +0200
Subject: [PATCH 135/524] Configure pycodestyle to use a max line length of 110

---
 setup.cfg | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/setup.cfg b/setup.cfg
index 18ff558..4297032 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -14,3 +14,6 @@ install_requires =
 [options.entry_points]
 console_scripts =
   pferd = PFERD.__main__:main
+
+[pycodestyle]
+max-line-length = 110
\ No newline at end of file

From cd5aa618347b43dd9725718782ad9626f3ec4839 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 16 May 2021 13:17:01 +0200
Subject: [PATCH 136/524] Set max line length for pylint

---
 setup.cfg | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/setup.cfg b/setup.cfg
index 4297032..288cd3c 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -16,4 +16,7 @@ console_scripts =
   pferd = PFERD.__main__:main
 
 [pycodestyle]
+max-line-length = 110
+
+[pylint.FORMAT]
 max-line-length = 110
\ No newline at end of file

From 2b6235dc78386a488c48c4704a061c09e3ca5a0e Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 16 May 2021 13:17:12 +0200
Subject: [PATCH 137/524] Fix pylint warnings (and 2 found bugs) in ILIAS
 crawler

---
 PFERD/crawlers/ilias.py | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/PFERD/crawlers/ilias.py b/PFERD/crawlers/ilias.py
index 3f09789..00bb04b 100644
--- a/PFERD/crawlers/ilias.py
+++ b/PFERD/crawlers/ilias.py
@@ -1,13 +1,12 @@
 import asyncio
 import json
 import re
-from configparser import SectionProxy
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from datetime import date, datetime, timedelta
 from enum import Enum
 from pathlib import PurePath
 # TODO In Python 3.9 and above, AsyncContextManager is deprecated
-from typing import Any, AsyncContextManager, Dict, List, Optional, Set, Union
+from typing import Any, Dict, List, Optional, Set, Union
 from urllib.parse import (parse_qs, urlencode, urljoin, urlparse, urlsplit,
                           urlunsplit)
 
@@ -19,9 +18,7 @@ from PFERD.utils import soupify
 from ..authenticators import Authenticator
 from ..conductor import TerminalConductor
 from ..config import Config
-from ..crawler import (Crawler, CrawlerSection, HttpCrawler, anoncritical,
-                       arepeat)
-from ..output_dir import FileSink
+from ..crawler import CrawlerSection, HttpCrawler, anoncritical, arepeat
 
 TargetType = Union[str, int]
 
@@ -285,8 +282,8 @@ class IliasPage:
 
             if not element_type:
                 continue
-            elif element_type == IliasElementType.MEETING:
-                element_path = _sanitize_path_name(self._normalize_meeting_name(element_name))
+            if element_type == IliasElementType.MEETING:
+                element_name = _sanitize_path_name(self._normalize_meeting_name(element_name))
             elif element_type == IliasElementType.FILE:
                 result.append(self._file_to_element(element_name, abs_url, link))
                 continue
@@ -424,9 +421,11 @@ class IliasPage:
         """
         return urljoin(self._page_url, link_tag.get("href"))
 
+
 german_months = ['Jan', 'Feb', 'Mär', 'Apr', 'Mai', 'Jun', 'Jul', 'Aug', 'Sep', 'Okt', 'Nov', 'Dez']
 english_months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
 
+
 def demangle_date(date_str: str) -> Optional[datetime]:
     """
     Demangle a given date in one of the following formats:
@@ -463,9 +462,11 @@ def demangle_date(date_str: str) -> Optional[datetime]:
         print(f"Could not parse date {date_str!r}")
         return None
 
-def _format_date_english(date: date) -> str:
-    month = english_months[date.month - 1]
-    return f"{date.day:02d}. {month} {date.year:04d}"
+
+def _format_date_english(date_to_format: date) -> str:
+    month = english_months[date_to_format.month - 1]
+    return f"{date_to_format.day:02d}. {month} {date_to_format.year:04d}"
+
 
 def _yesterday() -> date:
     return date.today() - timedelta(days=1)
@@ -617,7 +618,7 @@ class IliasCrawler(HttpCrawler):
             page = IliasPage(await self._get_page(element.url), element.url, element)
             real_element = page.get_child_elements()[0]
 
-            async with dl as sink, self.session.get(element.url) as resp:
+            async with dl as sink, self.session.get(real_element.url) as resp:
                 if resp.content_length:
                     bar.set_total(resp.content_length)
 

From 467ea3a37eebb56b0cf5ec7c85e43ffd00e6d025 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 16 May 2021 13:26:58 +0200
Subject: [PATCH 138/524] Document ILIAS-Crawler arguments in CONFIG.md

---
 CONFIG.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/CONFIG.md b/CONFIG.md
index 53c0706..bd24b16 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -120,6 +120,14 @@ crawler simulate a slower, network-based crawler.
   download requests. (Default: 0.0)
 - `download_speed`: Download speed (in bytes per second) to simulate. (Optional)
 
+### The `kit-ilias` crawler
+
+This crawler crawls the KIT ILIAS instance. It performs remote calls to a poor SCC-Server, so you should be nice and use reasonable delays and concurrent requests.
+- `target`: The ILIAS element to crawl. Can be:
+  - `desktop` if you want to crawl your personal desktop
+  - `<course id>` if you want to crawl the course with the given id
+  - `<url>` if you want to crawl a given element by URL (preferably the permanent URL linked at the bottom of an ILIAS page)
+- `tfa_auth`: Like `auth` but only used for two-factor authentication
 ## Authenticator types
 
 ### The `simple` authenticator

From 8b76ebb3efb5cf674b6ffa024dc65f4e389fdf88 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 16 May 2021 13:28:06 +0200
Subject: [PATCH 139/524] Rename IliasCrawler to KitIliasCrawler

---
 PFERD/crawlers/__init__.py | 6 +++---
 PFERD/crawlers/ilias.py    | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/PFERD/crawlers/__init__.py b/PFERD/crawlers/__init__.py
index 0ae2ca3..41733cb 100644
--- a/PFERD/crawlers/__init__.py
+++ b/PFERD/crawlers/__init__.py
@@ -5,7 +5,7 @@ from ..authenticator import Authenticator
 from ..conductor import TerminalConductor
 from ..config import Config
 from ..crawler import Crawler
-from .ilias import IliasCrawler, IliasCrawlerSection
+from .ilias import KitIliasCrawler, KitIliasCrawlerSection
 from .local import LocalCrawler, LocalCrawlerSection
 
 CrawlerConstructor = Callable[[
@@ -19,6 +19,6 @@ CrawlerConstructor = Callable[[
 CRAWLERS: Dict[str, CrawlerConstructor] = {
     "local": lambda n, s, c, t, a:
         LocalCrawler(n, LocalCrawlerSection(s), c, t),
-    "ilias": lambda n, s, c, t, a:
-        IliasCrawler(n, IliasCrawlerSection(s), c, t, a),
+    "kit-ilias": lambda n, s, c, t, a:
+        KitIliasCrawler(n, KitIliasCrawlerSection(s), c, t, a),
 }
diff --git a/PFERD/crawlers/ilias.py b/PFERD/crawlers/ilias.py
index 00bb04b..edb48a8 100644
--- a/PFERD/crawlers/ilias.py
+++ b/PFERD/crawlers/ilias.py
@@ -23,7 +23,7 @@ from ..crawler import CrawlerSection, HttpCrawler, anoncritical, arepeat
 TargetType = Union[str, int]
 
 
-class IliasCrawlerSection(CrawlerSection):
+class KitIliasCrawlerSection(CrawlerSection):
 
     def target(self) -> TargetType:
         target = self.s.get("target")
@@ -510,11 +510,11 @@ _DIRECTORY_PAGES: Set[IliasElementType] = set([
 ])
 
 
-class IliasCrawler(HttpCrawler):
+class KitIliasCrawler(HttpCrawler):
     def __init__(
             self,
             name: str,
-            section: IliasCrawlerSection,
+            section: KitIliasCrawlerSection,
             config: Config,
             conductor: TerminalConductor,
             authenticators: Dict[str, Authenticator]

From 3efec53f51ce46983605225efee70fd10172f0d0 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 16 May 2021 14:31:43 +0200
Subject: [PATCH 140/524] Configure code checking and formatting tools

Checking
- mypy
- flake8 (which uses pyflakes and pycodestyle)

Formatting
- autopep8
- isort
---
 DEV.md         | 25 ++++++++++++++++++++-----
 scripts/check  |  4 ++++
 scripts/format |  4 ++++
 scripts/setup  |  5 +++++
 setup.cfg      |  8 ++++----
 5 files changed, 37 insertions(+), 9 deletions(-)
 create mode 100755 scripts/check
 create mode 100755 scripts/format
 create mode 100755 scripts/setup

diff --git a/DEV.md b/DEV.md
index a679b4a..212cec8 100644
--- a/DEV.md
+++ b/DEV.md
@@ -15,12 +15,14 @@ environment, run these commands in the same directory as this file:
 ```
 $ python -m venv .venv
 $ . .venv/bin/activate
-$ pip install --editable .
+$ ./scripts/setup
 ```
 
-After this, you can use PFERD as if it was installed normally. Since PFERD was
-installed with `--editable`, there is no need to re-run `pip install` when the
-source code is changed.
+The setup script installs a few required dependencies and tools. It also
+installs PFERD via `pip install --editable .`, which means that you can just run
+`pferd` as if it was installed normally. Since PFERD was installed with
+`--editable`, there is no need to re-run `pip install` when the source code is
+changed.
 
 For more details, see [this part of the Python Tutorial][venv-tut] and
 [this section on "development mode"][ppug-dev].
@@ -29,9 +31,22 @@ For more details, see [this part of the Python Tutorial][venv-tut] and
 [venv-tut]: <https://docs.python.org/3/tutorial/venv.html> "12. Virtual Environments and Packages"
 [ppug-dev]: <https://packaging.python.org/guides/distributing-packages-using-setuptools/#working-in-development-mode> "Working in “development mode”"
 
+## Checking and formatting the code
+
+To run a set of checks against the code, run `./scripts/check` in the repo's
+root directory. This script will run a few tools installed by `./scripts/setup`
+against the entire project.
+
+To format the code, run `./scripts/format` in the repo's root directory.
+
+Before committing changes, please make sure the checks return no warnings and
+the code is formatted.
+
 ## Contributing
 
 When submitting a PR that adds, changes or modifies a feature, please ensure
-that the corresponding documentation is updated.
+that the corresponding documentation is updated as well. Also, please ensure
+that `./scripts/check` returns no warnings and the code has been run through
+`./scripts/format`.
 
 In your first PR, please add your name to the `LICENSE` file.
diff --git a/scripts/check b/scripts/check
new file mode 100755
index 0000000..ba767cd
--- /dev/null
+++ b/scripts/check
@@ -0,0 +1,4 @@
+#!/usr/bin/env bash
+
+mypy PFERD
+flake8 PFERD
diff --git a/scripts/format b/scripts/format
new file mode 100755
index 0000000..cc196ae
--- /dev/null
+++ b/scripts/format
@@ -0,0 +1,4 @@
+#!/usr/bin/env bash
+
+autopep8 --recursive --in-place PFERD
+isort PFERD
diff --git a/scripts/setup b/scripts/setup
new file mode 100755
index 0000000..8a5399b
--- /dev/null
+++ b/scripts/setup
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+
+pip install --upgrade pip setuptools
+pip install --editable .
+pip install --upgrade mypy flake8 autopep8 isort
diff --git a/setup.cfg b/setup.cfg
index 288cd3c..f6b64ea 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -15,8 +15,8 @@ install_requires =
 console_scripts =
   pferd = PFERD.__main__:main
 
-[pycodestyle]
-max-line-length = 110
+[flake8]
+max_line_length = 110
 
-[pylint.FORMAT]
-max-line-length = 110
\ No newline at end of file
+[isort]
+line_length = 110

From 0bae0091896ab3b0d7c1d46d0cf333f8e31ecbea Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 16 May 2021 14:32:53 +0200
Subject: [PATCH 141/524] Run formatting tools

---
 PFERD/crawler.py        | 3 +--
 PFERD/crawlers/ilias.py | 4 ++--
 PFERD/output_dir.py     | 3 +--
 PFERD/utils.py          | 2 ++
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index 140ae20..cb31223 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -3,8 +3,7 @@ from contextlib import asynccontextmanager
 from datetime import datetime
 from pathlib import Path, PurePath
 # TODO In Python 3.9 and above, AsyncContextManager is deprecated
-from typing import (Any, AsyncContextManager, AsyncIterator, Awaitable,
-                    Callable, Dict, Optional, TypeVar)
+from typing import Any, AsyncContextManager, AsyncIterator, Awaitable, Callable, Dict, Optional, TypeVar
 
 import aiohttp
 from rich.markup import escape
diff --git a/PFERD/crawlers/ilias.py b/PFERD/crawlers/ilias.py
index edb48a8..f2a7656 100644
--- a/PFERD/crawlers/ilias.py
+++ b/PFERD/crawlers/ilias.py
@@ -7,11 +7,11 @@ from enum import Enum
 from pathlib import PurePath
 # TODO In Python 3.9 and above, AsyncContextManager is deprecated
 from typing import Any, Dict, List, Optional, Set, Union
-from urllib.parse import (parse_qs, urlencode, urljoin, urlparse, urlsplit,
-                          urlunsplit)
+from urllib.parse import parse_qs, urlencode, urljoin, urlparse, urlsplit, urlunsplit
 
 import aiohttp
 from bs4 import BeautifulSoup, Tag
+
 from PFERD.output_dir import Redownload
 from PFERD.utils import soupify
 
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 23d4a31..ae69d10 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -9,8 +9,7 @@ from datetime import datetime
 from enum import Enum
 from pathlib import Path, PurePath
 # TODO In Python 3.9 and above, AsyncContextManager is deprecated
-from typing import (AsyncContextManager, AsyncIterator, BinaryIO, Iterator,
-                    Optional)
+from typing import AsyncContextManager, AsyncIterator, BinaryIO, Iterator, Optional
 
 from rich.markup import escape
 
diff --git a/PFERD/utils.py b/PFERD/utils.py
index d7c61ec..3022ab6 100644
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -25,6 +25,7 @@ async def ainput(prompt: str) -> str:
 async def agetpass(prompt: str) -> str:
     return await to_thread(lambda: getpass.getpass(prompt))
 
+
 def soupify(data: bytes) -> bs4.BeautifulSoup:
     """
     Parses HTML to a beautifulsoup object.
@@ -32,6 +33,7 @@ def soupify(data: bytes) -> bs4.BeautifulSoup:
 
     return bs4.BeautifulSoup(data, "html.parser")
 
+
 async def prompt_yes_no(query: str, default: Optional[bool]) -> bool:
     """
     Asks the user a yes/no question and returns their choice.

From b8efcc2ca5309fc3d3da6b89fa21761371d4114e Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 17 May 2021 21:30:26 +0200
Subject: [PATCH 142/524] Respect filters in ILIAS crawler

---
 PFERD/crawlers/ilias.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/PFERD/crawlers/ilias.py b/PFERD/crawlers/ilias.py
index f2a7656..09bad09 100644
--- a/PFERD/crawlers/ilias.py
+++ b/PFERD/crawlers/ilias.py
@@ -589,6 +589,9 @@ class KitIliasCrawler(HttpCrawler):
     async def _handle_ilias_element(self, parent_path: PurePath, element: IliasPageElement) -> None:
         element_path = PurePath(parent_path, element.name)
 
+        if not self.should_crawl(element_path):
+            return
+
         if element.type == IliasElementType.FILE:
             await self._download_file(element, element_path)
         elif element.type == IliasElementType.FORUM:

From db1219d4a9cd8bb0522803c84e7f1e6203a6b262 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 17 May 2021 21:31:22 +0200
Subject: [PATCH 143/524] Create a link file in ILIAS crawler

This allows us to crawl links and represent them in the file system.
Users can choose between an ILIAS-imitation (that optionally
auto-redirects) and a plain text variant.
---
 CONFIG.md               |   6 ++
 PFERD/crawlers/ilias.py | 139 ++++++++++++++++++++++++++++++++++++++--
 2 files changed, 141 insertions(+), 4 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index bd24b16..6149ef5 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -128,6 +128,12 @@ This crawler crawls the KIT ILIAS instance. It performs remote calls to a poor S
   - `<course id>` if you want to crawl the course with the given id
   - `<url>` if you want to crawl a given element by URL (preferably the permanent URL linked at the bottom of an ILIAS page)
 - `tfa_auth`: Like `auth` but only used for two-factor authentication
+- `link_file_redirect_delay`: PFERD will create local HTML for external links. 
+   If this property is set to a non-negative value it configures the amount of seconds after which the local HTML
+   file will redirect you to the link target.
+- `link_file_plain_text`: If this is set to true, PFERD will generate plain-text files containing only the link
+   target for external links. If this is false or not specified, PFERD will generate a neat, pretty and functional 
+   HTML page instead.
 ## Authenticator types
 
 ### The `simple` authenticator
diff --git a/PFERD/crawlers/ilias.py b/PFERD/crawlers/ilias.py
index 09bad09..4d81976 100644
--- a/PFERD/crawlers/ilias.py
+++ b/PFERD/crawlers/ilias.py
@@ -52,6 +52,12 @@ class KitIliasCrawlerSection(CrawlerSection):
             self.invalid_value("auth", value, "No such auth section exists")
         return auth
 
+    def link_file_redirect_delay(self) -> int:
+        return self.s.getint("link_file_redirect_delay", fallback=-1)
+
+    def link_file_use_plaintext(self) -> bool:
+        return self.s.getboolean("link_file_plain_text", fallback=False)
+
 
 class IliasElementType(Enum):
     EXERCISE = "exercise"
@@ -72,6 +78,7 @@ class IliasPageElement:
     url: str
     name: str
     mtime: Optional[datetime] = None
+    description: Optional[str] = None
 
 
 class IliasPage:
@@ -279,6 +286,7 @@ class IliasPage:
             abs_url = self._abs_url_from_link(link)
             element_name = _sanitize_path_name(link.getText())
             element_type = self._find_type_from_link(element_name, link, abs_url)
+            description = self._find_link_description(link)
 
             if not element_type:
                 continue
@@ -288,10 +296,19 @@ class IliasPage:
                 result.append(self._file_to_element(element_name, abs_url, link))
                 continue
 
-            result.append(IliasPageElement(element_type, abs_url, element_name, None))
+            result.append(IliasPageElement(element_type, abs_url, element_name, description=description))
 
         return result
 
+    def _find_link_description(self, link: Tag) -> Optional[str]:
+        tile: Tag = link.findParent("div", {"class": lambda x: x and "il_ContainerListItem" in x})
+        if not tile:
+            return None
+        description_element: Tag = tile.find("div", {"class": lambda x: x and "il_Description" in x})
+        if not description_element:
+            return None
+        return description_element.getText().strip()
+
     def _file_to_element(self, name: str, url: str, link_element: Tag) -> IliasPageElement:
         # Files have a list of properties (type, modification date, size, etc.)
         # In a series of divs.
@@ -528,6 +545,8 @@ class KitIliasCrawler(HttpCrawler):
         self._base_url = "https://ilias.studium.kit.edu"
 
         self._target = section.target()
+        self._link_file_redirect_delay = section.link_file_redirect_delay()
+        self._link_file_use_plaintext = section.link_file_use_plaintext()
 
     async def crawl(self) -> None:
         if isinstance(self._target, int):
@@ -598,8 +617,7 @@ class KitIliasCrawler(HttpCrawler):
             # TODO: Delete
             self.print(f"Skipping forum [green]{element_path}[/]")
         elif element.type == IliasElementType.LINK:
-            # TODO: Write in meta-redirect file
-            self.print(f"Skipping link [green]{element_path}[/]")
+            await self._download_link(element, element_path)
         elif element.type == IliasElementType.VIDEO:
             await self._download_file(element, element_path)
         elif element.type == IliasElementType.VIDEO_PLAYER:
@@ -610,6 +628,30 @@ class KitIliasCrawler(HttpCrawler):
             # TODO: Proper exception
             raise RuntimeError(f"Unknown type: {element.type!r}")
 
+    @arepeat(3)
+    async def _download_link(self, element: IliasPageElement, element_path: PurePath) -> None:
+        dl = await self.download(element_path, mtime=element.mtime)
+        if not dl:
+            return
+
+        async with self.download_bar(element_path, 2) as bar:
+            export_url = element.url.replace("cmd=calldirectlink", "cmd=exportHTML")
+            async with self.session.get(export_url) as response:
+                html_page: BeautifulSoup = soupify(await response.read())
+                real_url: str = html_page.select_one("a").get("href").strip()
+
+            bar.advance(1)
+
+            async with dl as sink:
+                content = _link_template_plain if self._link_file_use_plaintext else _link_template_rich
+                content = content.replace("{{link}}", real_url)
+                content = content.replace("{{name}}", element.name)
+                content = content.replace("{{description}}", str(element.description))
+                content = content.replace("{{redirect_delay}}", str(self._link_file_redirect_delay))
+                sink.file.write(content.encode("utf-8"))
+                bar.advance(1)
+                sink.done()
+
     @arepeat(3)
     async def _download_video(self, element: IliasPageElement, element_path: PurePath) -> None:
         # Videos will NOT be redownloaded - their content doesn't really change and they are chunky
@@ -654,7 +696,7 @@ class KitIliasCrawler(HttpCrawler):
         if retries_left < 0:
             # TODO: Proper exception
             raise RuntimeError("Get page failed too often")
-        print(url)
+        print(url, "retries left", retries_left)
         async with self.session.get(url) as request:
             soup = soupify(await request.read())
             if self._is_logged_in(soup):
@@ -792,3 +834,92 @@ class KitShibbolethLogin:
 async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup:
     async with session.post(url, data=data) as response:
         return soupify(await response.read())
+
+_link_template_plain = "{{link}}"
+# flake8: noqa E501
+_link_template_rich = """
+<!DOCTYPE html>
+<html lang="en">
+    <head>
+        <meta charset="UTF-8">
+        <title>ILIAS - Link: {{ name}}</title>
+        <meta http-equiv = "refresh" content = "{{redirect_delay}}; url = {{link}}" />
+    </head>
+
+    <style>
+    * {
+        box-sizing: border-box;
+    }
+    .center-flex {
+        display: flex;
+        align-items: center;
+        justify-content: center;
+    }
+    body {
+        padding: 0;
+        margin: 0;
+        background-color: #f0f0f0;
+        font-family: "Open Sans", Verdana, Arial, Helvetica, sans-serif;
+        height: 100vh;
+    }
+    .row {
+        background-color: white;
+        min-width: 500px;
+        max-width: 90vw;
+        display: flex;
+        padding: 1em;
+    }
+    .logo {
+        flex: 0 1;
+        margin-right: 1em;
+        fill: #009682;
+    }
+    .tile {
+        flex: 1 0;
+        display: flex;
+        flex-direction: column;
+        justify-content: center;
+    }
+    .top-row {
+        padding-bottom: 5px;
+        font-size: 15px;
+    }
+    a {
+        color: #009682;
+        text-decoration: none;
+    }
+    a:hover {
+        text-decoration: underline;
+    }
+    .bottom-row {
+        font-size: 13px;
+    }
+    .menu-button {
+        border: 1px solid black;
+        margin-left: 4em;
+        width: 25px;
+        height: 25px;
+        flex: 0 0 25px;
+        background-color: #b3e0da;
+        font-size: 13px;
+        color: #222;
+    }
+    </style>
+    <body class="center-flex">
+        <div class="row">
+            <div class="logo center-flex">
+                <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24">
+                    <path d="M12 0c-6.627 0-12 5.373-12 12s5.373 12 12 12 12-5.373 12-12-5.373-12-12-12zm9.567 9.098c-.059-.058-.127-.108-.206-.138-.258-.101-1.35.603-1.515.256-.108-.231-.327.148-.578.008-.121-.067-.459-.52-.611-.465-.312.112.479.974.694 1.087.203-.154.86-.469 1.002-.039.271.812-.745 1.702-1.264 2.171-.775.702-.63-.454-1.159-.86-.277-.213-.274-.667-.555-.824-.125-.071-.7-.732-.694-.821l-.017.167c-.095.072-.297-.27-.319-.325 0 .298.485.772.646 1.011.273.409.42 1.005.756 1.339.179.18.866.923 1.045.908l.921-.437c.649.154-1.531 3.237-1.738 3.619-.171.321.139 1.112.114 1.49-.029.437-.374.579-.7.817-.35.255-.268.752-.562.934-.521.321-.897 1.366-1.639 1.361-.219-.001-1.151.364-1.273.007-.095-.258-.223-.455-.356-.71-.131-.25-.015-.51-.175-.731-.11-.154-.479-.502-.513-.684-.002-.157.118-.632.283-.715.231-.118.044-.462.016-.663-.048-.357-.27-.652-.535-.859-.393-.302-.189-.542-.098-.974 0-.206-.126-.476-.402-.396-.57.166-.396-.445-.812-.417-.299.021-.543.211-.821.295-.349.104-.707-.083-1.053-.126-1.421-.179-1.885-1.804-1.514-2.976.037-.192-.115-.547-.048-.696.159-.352.485-.752.768-1.021.16-.152.365-.113.553-.231.29-.182.294-.558.578-.789.404-.328.956-.321 1.482-.392.281-.037 1.35-.268 1.518-.06 0 .039.193.611-.019.578.438.023 1.061.756 1.476.585.213-.089.135-.744.573-.427.265.19 1.45.275 1.696.07.152-.125.236-.939.053-1.031.117.116-.618.125-.686.099-.122-.044-.235.115-.43.025.117.055-.651-.358-.22-.674-.181.132-.349-.037-.544.109-.135.109.062.181-.13.277-.305.155-.535-.53-.649-.607-.118-.077-1.024-.713-.777-.298l.797.793c-.04.026-.209-.289-.209-.059.053-.136.02.585-.105.35-.056-.09.091-.14.006-.271 0-.085-.23-.169-.275-.228-.126-.157-.462-.502-.644-.585-.05-.024-.771.088-.832.111-.071.099-.131.203-.181.314-.149.055-.29.127-.423.216l-.159.356c-.068.061-.772.294-.776.303.03-.076-.492-.172-.457-.324.038-.167.215-.687.169-.877-.048-.199 1.085.287 1.158-.238.029-.227.047-.492-.316-.531.069.008.702-.249.807-.364.148-.169.486-.447.731-.447.286 0 .225-.417.356-.622.133.053-.071.38.088.512-.01-.104.45.057.494.033.105-.056.691-.023.601-.299-.101-.28.052-.197.183-.255-.02.008.248-.458.363-.456-.104-.089-.398.112-.516.103-.308-.024-.177-.525-.061-.672.09-.116-.246-.258-.25-.036-.006.332-.314.633-.243 1.075.109.666-.743-.161-.816-.115-.283.172-.515-.216-.368-.449.149-.238.51-.226.659-.48.104-.179.227-.389.388-.524.541-.454.689-.091 1.229-.042.526.048.178.125.105.327-.07.192.289.261.413.1.071-.092.232-.326.301-.499.07-.175.578-.2.527-.365 2.72 1.148 4.827 3.465 5.694 6.318zm-11.113-3.779l.068-.087.073-.019c.042-.034.086-.118.151-.104.043.009.146.095.111.148-.037.054-.066-.049-.081.101-.018.169-.188.167-.313.222-.087.037-.175-.018-.09-.104l.088-.108-.007-.049zm.442.245c.046-.045.138-.008.151-.094.014-.084.078-.178-.008-.335-.022-.042.116-.082.051-.137l-.109.032s.155-.668.364-.366l-.089.103c.135.134.172.47.215.687.127.066.324.078.098.192.117-.02-.618.314-.715.178-.072-.083.317-.139.307-.173-.004-.011-.317-.02-.265-.087zm1.43-3.547l-.356.326c-.36.298-1.28.883-1.793.705-.524-.18-1.647.667-1.826.673-.067.003.002-.641.36-.689-.141.021.993-.575 1.185-.805.678-.146 1.381-.227 2.104-.227l.326.017zm-5.086 1.19c.07.082.278.092-.026.288-.183.11-.377.809-.548.809-.51.223-.542-.439-1.109.413-.078.115-.395.158-.644.236.685-.688 1.468-1.279 2.327-1.746zm-5.24 8.793c0-.541.055-1.068.139-1.586l.292.185c.113.135.113.719.169.911.139.482.484.751.748 1.19.155.261.414.923.332 1.197.109-.179 1.081.824 1.259 1.033.418.492.74 1.088.061 1.574-.219.158.334 1.14.049 1.382l-.365.094c-.225.138-.235.397-.166.631-1.562-1.765-2.518-4.076-2.518-6.611zm14.347-5.823c.083-.01-.107.167-.107.167.033.256.222.396.581.527.437.157.038.455-.213.385-.139-.039-.854-.255-.879.025 0 .167-.679.001-.573-.175.073-.119.05-.387.186-.562.193-.255.38-.116.386.032-.001.394.398-.373.619-.399z"/>
+                </svg>
+            </div>
+            <div class="tile">
+                <div class="top-row">
+                    <a href="{{link}}">{{name}}</a>
+                </div>
+                <div class="bottom-row">{{description}}</div>
+            </div>
+            <div class="menu-button center-flex"> ⯆ </div>
+        </div>
+    </body>
+</html>
+"""

From 1525aa15a6dd9f09d70af1e1f994ed03fb6cf5db Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 18 May 2021 22:40:28 +0200
Subject: [PATCH 144/524] Fix link template error and use indeterminate
 progress bar

---
 PFERD/crawlers/ilias.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/PFERD/crawlers/ilias.py b/PFERD/crawlers/ilias.py
index 4d81976..014f231 100644
--- a/PFERD/crawlers/ilias.py
+++ b/PFERD/crawlers/ilias.py
@@ -634,14 +634,12 @@ class KitIliasCrawler(HttpCrawler):
         if not dl:
             return
 
-        async with self.download_bar(element_path, 2) as bar:
+        async with self.download_bar(element_path):
             export_url = element.url.replace("cmd=calldirectlink", "cmd=exportHTML")
             async with self.session.get(export_url) as response:
                 html_page: BeautifulSoup = soupify(await response.read())
                 real_url: str = html_page.select_one("a").get("href").strip()
 
-            bar.advance(1)
-
             async with dl as sink:
                 content = _link_template_plain if self._link_file_use_plaintext else _link_template_rich
                 content = content.replace("{{link}}", real_url)
@@ -649,7 +647,6 @@ class KitIliasCrawler(HttpCrawler):
                 content = content.replace("{{description}}", str(element.description))
                 content = content.replace("{{redirect_delay}}", str(self._link_file_redirect_delay))
                 sink.file.write(content.encode("utf-8"))
-                bar.advance(1)
                 sink.done()
 
     @arepeat(3)
@@ -842,7 +839,7 @@ _link_template_rich = """
 <html lang="en">
     <head>
         <meta charset="UTF-8">
-        <title>ILIAS - Link: {{ name}}</title>
+        <title>ILIAS - Link: {{name}}</title>
         <meta http-equiv = "refresh" content = "{{redirect_delay}}; url = {{link}}" />
     </head>
 

From 4b68fa771fb89dac8615cca1fec09c4743893342 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 18 May 2021 22:43:46 +0200
Subject: [PATCH 145/524] Move logging logic to singleton

- Renamed module and class because "conductor" didn't make a lot of sense
- Used singleton approach (there's only one stdout after all)
- Redesigned progress bars (now with download speed!)
---
 PFERD/authenticator.py           |   3 -
 PFERD/authenticators/__init__.py |  10 +-
 PFERD/authenticators/simple.py   |   7 +-
 PFERD/authenticators/tfa.py      |   7 +-
 PFERD/conductor.py               |  95 ------------------
 PFERD/crawler.py                 |  48 ++--------
 PFERD/crawlers/__init__.py       |  10 +-
 PFERD/crawlers/ilias.py          |   6 +-
 PFERD/crawlers/local.py          |   4 +-
 PFERD/logging.py                 | 160 +++++++++++++++++++++++++++++++
 PFERD/output_dir.py              |  21 ++--
 PFERD/pferd.py                   |  17 +---
 12 files changed, 195 insertions(+), 193 deletions(-)
 delete mode 100644 PFERD/conductor.py
 create mode 100644 PFERD/logging.py

diff --git a/PFERD/authenticator.py b/PFERD/authenticator.py
index 7475e2a..d67b263 100644
--- a/PFERD/authenticator.py
+++ b/PFERD/authenticator.py
@@ -1,7 +1,6 @@
 from abc import ABC, abstractmethod
 from typing import Tuple
 
-from .conductor import TerminalConductor
 from .config import Config, Section
 
 
@@ -23,7 +22,6 @@ class Authenticator(ABC):
             name: str,
             section: AuthSection,
             config: Config,
-            conductor: TerminalConductor,
     ) -> None:
         """
         Initialize an authenticator from its name and its section in the config
@@ -36,7 +34,6 @@ class Authenticator(ABC):
         """
 
         self.name = name
-        self.conductor = conductor
 
     @abstractmethod
     async def credentials(self) -> Tuple[str, str]:
diff --git a/PFERD/authenticators/__init__.py b/PFERD/authenticators/__init__.py
index 97ff03a..35096cf 100644
--- a/PFERD/authenticators/__init__.py
+++ b/PFERD/authenticators/__init__.py
@@ -2,7 +2,6 @@ from configparser import SectionProxy
 from typing import Callable, Dict
 
 from ..authenticator import Authenticator, AuthSection
-from ..conductor import TerminalConductor
 from ..config import Config
 from .simple import SimpleAuthenticator, SimpleAuthSection
 from .tfa import TfaAuthenticator
@@ -11,12 +10,11 @@ AuthConstructor = Callable[[
     str,                # Name (without the "auth:" prefix)
     SectionProxy,       # Authenticator's section of global config
     Config,             # Global config
-    TerminalConductor,  # Global conductor instance
 ], Authenticator]
 
 AUTHENTICATORS: Dict[str, AuthConstructor] = {
-    "simple": lambda n, s, c, t:
-        SimpleAuthenticator(n, SimpleAuthSection(s), c, t),
-    "tfa": lambda n, s, c, t:
-        TfaAuthenticator(n, AuthSection(s), c, t),
+    "simple": lambda n, s, c:
+        SimpleAuthenticator(n, SimpleAuthSection(s), c),
+    "tfa": lambda n, s, c:
+        TfaAuthenticator(n, AuthSection(s), c),
 }
diff --git a/PFERD/authenticators/simple.py b/PFERD/authenticators/simple.py
index f21661c..caa0002 100644
--- a/PFERD/authenticators/simple.py
+++ b/PFERD/authenticators/simple.py
@@ -1,8 +1,8 @@
 from typing import Optional, Tuple
 
 from ..authenticator import Authenticator, AuthException, AuthSection
-from ..conductor import TerminalConductor
 from ..config import Config
+from ..logging import log
 from ..utils import agetpass, ainput
 
 
@@ -20,9 +20,8 @@ class SimpleAuthenticator(Authenticator):
             name: str,
             section: SimpleAuthSection,
             config: Config,
-            conductor: TerminalConductor,
     ) -> None:
-        super().__init__(name, section, config, conductor)
+        super().__init__(name, section, config)
 
         self._username = section.username()
         self._password = section.password()
@@ -34,7 +33,7 @@ class SimpleAuthenticator(Authenticator):
         if self._username is not None and self._password is not None:
             return self._username, self._password
 
-        async with self.conductor.exclusive_output():
+        async with log.exclusive_output():
             if self._username is None:
                 self._username = await ainput("Username: ")
             else:
diff --git a/PFERD/authenticators/tfa.py b/PFERD/authenticators/tfa.py
index 3513d09..b0eef18 100644
--- a/PFERD/authenticators/tfa.py
+++ b/PFERD/authenticators/tfa.py
@@ -1,8 +1,8 @@
 from typing import Tuple
 
 from ..authenticator import Authenticator, AuthException, AuthSection
-from ..conductor import TerminalConductor
 from ..config import Config
+from ..logging import log
 from ..utils import ainput
 
 
@@ -12,15 +12,14 @@ class TfaAuthenticator(Authenticator):
             name: str,
             section: AuthSection,
             config: Config,
-            conductor: TerminalConductor,
     ) -> None:
-        super().__init__(name, section, config, conductor)
+        super().__init__(name, section, config)
 
     async def username(self) -> str:
         raise AuthException("TFA authenticator does not support usernames")
 
     async def password(self) -> str:
-        async with self.conductor.exclusive_output():
+        async with log.exclusive_output():
             code = await ainput("TFA code: ")
             return code
 
diff --git a/PFERD/conductor.py b/PFERD/conductor.py
deleted file mode 100644
index d50574e..0000000
--- a/PFERD/conductor.py
+++ /dev/null
@@ -1,95 +0,0 @@
-import asyncio
-from contextlib import asynccontextmanager, contextmanager
-from types import TracebackType
-from typing import AsyncIterator, Iterator, List, Optional, Type
-
-from rich.console import Console
-from rich.progress import Progress, TaskID
-
-
-class ProgressBar:
-    def __init__(self, progress: Progress, taskid: TaskID):
-        self._progress = progress
-        self._taskid = taskid
-
-    def advance(self, amount: float = 1) -> None:
-        self._progress.advance(self._taskid, advance=amount)
-
-    def set_total(self, total: float) -> None:
-        self._progress.update(self._taskid, total=total)
-        self._progress.start_task(self._taskid)
-
-
-class TerminalConductor:
-    def __init__(self) -> None:
-        self._stopped = False
-        self._lock = asyncio.Lock()
-        self._lines: List[str] = []
-
-        self._console = Console(highlight=False)
-        self._progress = Progress(console=self._console)
-
-    async def _start(self) -> None:
-        for task in self._progress.tasks:
-            task.visible = True
-        self._progress.start()
-
-        self._stopped = False
-
-        for line in self._lines:
-            self.print(line)
-        self._lines = []
-
-    async def _stop(self) -> None:
-        self._stopped = True
-
-        for task in self._progress.tasks:
-            task.visible = False
-        self._progress.stop()
-
-    async def __aenter__(self) -> None:
-        async with self._lock:
-            await self._start()
-
-    async def __aexit__(
-            self,
-            exc_type: Optional[Type[BaseException]],
-            exc_value: Optional[BaseException],
-            traceback: Optional[TracebackType],
-    ) -> Optional[bool]:
-        async with self._lock:
-            await self._stop()
-        return None
-
-    def print(self, line: str) -> None:
-        if self._stopped:
-            self._lines.append(line)
-        else:
-            self._console.print(line)
-
-    @asynccontextmanager
-    async def exclusive_output(self) -> AsyncIterator[None]:
-        async with self._lock:
-            await self._stop()
-            try:
-                yield
-            finally:
-                await self._start()
-
-    @contextmanager
-    def progress_bar(
-            self,
-            description: str,
-            total: Optional[float] = None,
-    ) -> Iterator[ProgressBar]:
-        if total is None:
-            # Indeterminate progress bar
-            taskid = self._progress.add_task(description, start=False)
-        else:
-            taskid = self._progress.add_task(description, total=total)
-
-        bar = ProgressBar(self._progress, taskid)
-        try:
-            yield bar
-        finally:
-            self._progress.remove_task(taskid)
diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index cb31223..677baa2 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -9,9 +9,9 @@ import aiohttp
 from rich.markup import escape
 
 from .authenticator import Authenticator
-from .conductor import ProgressBar, TerminalConductor
 from .config import Config, Section
 from .limiter import Limiter
+from .logging import ProgressBar, log
 from .output_dir import FileSink, OnConflict, OutputDirectory, Redownload
 from .transformer import RuleParseException, Transformer
 from .version import __version__
@@ -36,7 +36,7 @@ def noncritical(f: Wrapped) -> Wrapped:
         try:
             f(self, *args, **kwargs)
         except Exception as e:
-            self.print(f"[red]Something went wrong: {escape(str(e))}")
+            log.print(f"[red]Something went wrong: {escape(str(e))}")
             self.error_free = False
     return wrapper  # type: ignore
 
@@ -79,7 +79,7 @@ def anoncritical(f: AWrapped) -> AWrapped:
         try:
             await f(self, *args, **kwargs)
         except Exception as e:
-            self.print(f"[red]Something went wrong: {escape(str(e))}")
+            log.print(f"[red]Something went wrong: {escape(str(e))}")
             self.error_free = False
     return wrapper  # type: ignore
 
@@ -182,7 +182,6 @@ class Crawler(ABC):
             name: str,
             section: CrawlerSection,
             config: Config,
-            conductor: TerminalConductor,
     ) -> None:
         """
         Initialize a crawler from its name and its section in the config file.
@@ -194,7 +193,6 @@ class Crawler(ABC):
         """
 
         self.name = name
-        self._conductor = conductor
         self.error_free = True
 
         self._limiter = Limiter(
@@ -213,34 +211,8 @@ class Crawler(ABC):
             config.working_dir / section.output_dir(name),
             section.redownload(),
             section.on_conflict(),
-            self._conductor,
         )
 
-    def print(self, text: str) -> None:
-        """
-        Print rich markup to the terminal. Crawlers *must* use this function to
-        print things unless they are holding an exclusive output context
-        manager! Be careful to escape all user-supplied strings.
-        """
-
-        self._conductor.print(text)
-
-    def exclusive_output(self) -> AsyncContextManager[None]:
-        """
-        Acquire exclusive rights™ to the terminal output. While this context
-        manager is held, output such as printing and progress bars from other
-        threads is suspended and the current thread may do whatever it wants
-        with the terminal. However, it must return the terminal to its original
-        state before exiting the context manager.
-
-        No two threads can hold this context manager at the same time.
-
-        Useful for password or confirmation prompts as well as running other
-        programs while crawling (e. g. to get certain credentials).
-        """
-
-        return self._conductor.exclusive_output()
-
     @asynccontextmanager
     async def crawl_bar(
             self,
@@ -249,7 +221,7 @@ class Crawler(ABC):
     ) -> AsyncIterator[ProgressBar]:
         desc = f"[bold bright_cyan]Crawling[/] {escape(str(path))}"
         async with self._limiter.limit_crawl():
-            with self._conductor.progress_bar(desc, total=total) as bar:
+            with log.crawl_bar(desc, total=total) as bar:
                 yield bar
 
     @asynccontextmanager
@@ -260,7 +232,7 @@ class Crawler(ABC):
     ) -> AsyncIterator[ProgressBar]:
         desc = f"[bold bright_cyan]Downloading[/] {escape(str(path))}"
         async with self._limiter.limit_download():
-            with self._conductor.progress_bar(desc, total=total) as bar:
+            with log.download_bar(desc, total=total) as bar:
                 yield bar
 
     def should_crawl(self, path: PurePath) -> bool:
@@ -289,7 +261,7 @@ class Crawler(ABC):
         crawler.
         """
 
-        async with self._conductor:
+        with log.show_progress():
             await self.crawl()
 
     @abstractmethod
@@ -312,9 +284,8 @@ class HttpCrawler(Crawler):
             name: str,
             section: CrawlerSection,
             config: Config,
-            conductor: TerminalConductor,
     ) -> None:
-        super().__init__(name, section, config, conductor)
+        super().__init__(name, section, config)
 
         self._cookie_jar_path = self._output_dir.resolve(self.COOKIE_FILE)
         self._output_dir.register_reserved(self.COOKIE_FILE)
@@ -340,7 +311,4 @@ class HttpCrawler(Crawler):
         try:
             cookie_jar.save(self._cookie_jar_path)
         except Exception:
-            self.print(
-                "[bold red]Warning:[/] Failed to save cookies to "
-                + escape(str(self.COOKIE_FILE))
-            )
+            log.print(f"[bold red]Warning:[/] Failed to save cookies to {escape(str(self.COOKIE_FILE))}")
diff --git a/PFERD/crawlers/__init__.py b/PFERD/crawlers/__init__.py
index 41733cb..72d6798 100644
--- a/PFERD/crawlers/__init__.py
+++ b/PFERD/crawlers/__init__.py
@@ -2,7 +2,6 @@ from configparser import SectionProxy
 from typing import Callable, Dict
 
 from ..authenticator import Authenticator
-from ..conductor import TerminalConductor
 from ..config import Config
 from ..crawler import Crawler
 from .ilias import KitIliasCrawler, KitIliasCrawlerSection
@@ -12,13 +11,12 @@ CrawlerConstructor = Callable[[
     str,                       # Name (without the "crawl:" prefix)
     SectionProxy,              # Crawler's section of global config
     Config,                    # Global config
-    TerminalConductor,         # Global conductor instance
     Dict[str, Authenticator],  # Loaded authenticators by name
 ], Crawler]
 
 CRAWLERS: Dict[str, CrawlerConstructor] = {
-    "local": lambda n, s, c, t, a:
-        LocalCrawler(n, LocalCrawlerSection(s), c, t),
-    "kit-ilias": lambda n, s, c, t, a:
-        KitIliasCrawler(n, KitIliasCrawlerSection(s), c, t, a),
+    "local": lambda n, s, c, a:
+        LocalCrawler(n, LocalCrawlerSection(s), c),
+    "kit-ilias": lambda n, s, c, a:
+        KitIliasCrawler(n, KitIliasCrawlerSection(s), c, a),
 }
diff --git a/PFERD/crawlers/ilias.py b/PFERD/crawlers/ilias.py
index 014f231..beac208 100644
--- a/PFERD/crawlers/ilias.py
+++ b/PFERD/crawlers/ilias.py
@@ -16,7 +16,6 @@ from PFERD.output_dir import Redownload
 from PFERD.utils import soupify
 
 from ..authenticators import Authenticator
-from ..conductor import TerminalConductor
 from ..config import Config
 from ..crawler import CrawlerSection, HttpCrawler, anoncritical, arepeat
 
@@ -533,10 +532,9 @@ class KitIliasCrawler(HttpCrawler):
             name: str,
             section: KitIliasCrawlerSection,
             config: Config,
-            conductor: TerminalConductor,
             authenticators: Dict[str, Authenticator]
     ):
-        super().__init__(name, section, config, conductor)
+        super().__init__(name, section, config)
 
         self._shibboleth_login = KitShibbolethLogin(
             section.auth(authenticators),
@@ -615,7 +613,7 @@ class KitIliasCrawler(HttpCrawler):
             await self._download_file(element, element_path)
         elif element.type == IliasElementType.FORUM:
             # TODO: Delete
-            self.print(f"Skipping forum [green]{element_path}[/]")
+            print(f"Skipping forum [green]{element_path}[/]")
         elif element.type == IliasElementType.LINK:
             await self._download_link(element, element_path)
         elif element.type == IliasElementType.VIDEO:
diff --git a/PFERD/crawlers/local.py b/PFERD/crawlers/local.py
index 2dde0d4..363107f 100644
--- a/PFERD/crawlers/local.py
+++ b/PFERD/crawlers/local.py
@@ -4,7 +4,6 @@ import random
 from pathlib import Path, PurePath
 from typing import Optional
 
-from ..conductor import TerminalConductor
 from ..config import Config
 from ..crawler import Crawler, CrawlerSection, anoncritical
 
@@ -44,9 +43,8 @@ class LocalCrawler(Crawler):
             name: str,
             section: LocalCrawlerSection,
             config: Config,
-            conductor: TerminalConductor,
     ):
-        super().__init__(name, section, config, conductor)
+        super().__init__(name, section, config)
 
         self._target = config.working_dir / section.target()
         self._crawl_delay = section.crawl_delay()
diff --git a/PFERD/logging.py b/PFERD/logging.py
new file mode 100644
index 0000000..b075d35
--- /dev/null
+++ b/PFERD/logging.py
@@ -0,0 +1,160 @@
+import asyncio
+from contextlib import asynccontextmanager, contextmanager
+# TODO In Python 3.9 and above, ContextManager and AsyncContextManager are deprecated
+from typing import AsyncIterator, ContextManager, Iterator, List, Optional
+
+from rich.console import Console, RenderGroup
+from rich.live import Live
+from rich.markup import escape
+from rich.progress import (BarColumn, DownloadColumn, Progress, TaskID, TextColumn, TimeRemainingColumn,
+                           TransferSpeedColumn)
+from rich.table import Column
+
+
+class ProgressBar:
+    def __init__(self, progress: Progress, taskid: TaskID):
+        self._progress = progress
+        self._taskid = taskid
+
+    def advance(self, amount: float = 1) -> None:
+        self._progress.advance(self._taskid, advance=amount)
+
+    def set_total(self, total: float) -> None:
+        self._progress.update(self._taskid, total=total)
+        self._progress.start_task(self._taskid)
+
+
+class Log:
+    def __init__(self) -> None:
+        self.console = Console(highlight=False)
+
+        self._crawl_progress = Progress(
+            TextColumn("{task.description}", table_column=Column(ratio=1)),
+            BarColumn(),
+            TimeRemainingColumn(),
+            expand=True,
+        )
+        self._download_progress = Progress(
+            TextColumn("{task.description}", table_column=Column(ratio=1)),
+            TransferSpeedColumn(),
+            DownloadColumn(),
+            BarColumn(),
+            TimeRemainingColumn(),
+            expand=True,
+        )
+
+        self._live = Live(console=self.console, transient=True)
+        self._update_live()
+
+        self._showing_progress = False
+        self._progress_suspended = False
+        self._lock = asyncio.Lock()
+        self._lines: List[str] = []
+
+        # Whether different parts of the output are enabled or disabled
+        self._enabled_explain = False
+        self._enabled_action = True
+        self._enabled_report = True
+
+    def _update_live(self) -> None:
+        elements = []
+        if self._crawl_progress.task_ids:
+            elements.append(self._crawl_progress)
+        if self._download_progress.task_ids:
+            elements.append(self._download_progress)
+
+        group = RenderGroup(*elements)  # type: ignore
+        self._live.update(group)
+
+    def configure(self, explain: bool, action: bool, report: bool) -> None:
+        self._enabled_explain = explain
+        self._enabled_action = action
+        self._enabled_report = report
+
+    @contextmanager
+    def show_progress(self) -> Iterator[None]:
+        if self._showing_progress:
+            raise RuntimeError("Calling 'show_progress' while already showing progress")
+
+        self._showing_progress = True
+        try:
+            with self._live:
+                yield
+        finally:
+            self._showing_progress = False
+
+    @asynccontextmanager
+    async def exclusive_output(self) -> AsyncIterator[None]:
+        if not self._showing_progress:
+            raise RuntimeError("Calling 'exclusive_output' while not showing progress")
+
+        async with self._lock:
+            self._progress_suspended = True
+            self._live.stop()
+            try:
+                yield
+            finally:
+                self._live.start()
+                self._progress_suspended = False
+                for line in self._lines:
+                    self.print(line)
+                self._lines = []
+
+    def print(self, text: str) -> None:
+        if self._progress_suspended:
+            self._lines.append(text)
+        else:
+            self.console.print(text)
+
+    def explain_topic(self, text: str) -> None:
+        if self._enabled_explain:
+            self.print(f"[cyan]{escape(text)}")
+
+    def explain(self, text: str) -> None:
+        if self._enabled_explain:
+            self.print(f"  {escape(text)}")
+
+    def action(self, text: str) -> None:
+        if self._enabled_action:
+            self.print(text)
+
+    def report(self, text: str) -> None:
+        if self._enabled_report:
+            self.print(text)
+
+    @contextmanager
+    def _bar(
+            self,
+            progress: Progress,
+            description: str,
+            total: Optional[float],
+    ) -> Iterator[ProgressBar]:
+        if total is None:
+            # Indeterminate progress bar
+            taskid = progress.add_task(description, start=False)
+        else:
+            taskid = progress.add_task(description, total=total)
+        self._update_live()
+
+        try:
+            yield ProgressBar(progress, taskid)
+        finally:
+            progress.remove_task(taskid)
+            self._update_live()
+
+    def crawl_bar(
+            self,
+            description: str,
+            total: Optional[float] = None,
+    ) -> ContextManager[ProgressBar]:
+        return self._bar(self._crawl_progress, description, total)
+
+    def download_bar(
+            self,
+            description: str,
+            total: Optional[float] = None,
+    ) -> ContextManager[ProgressBar]:
+        return self._bar(self._download_progress, description, total)
+
+
+log = Log()
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index ae69d10..417fa52 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -13,7 +13,7 @@ from typing import AsyncContextManager, AsyncIterator, BinaryIO, Iterator, Optio
 
 from rich.markup import escape
 
-from .conductor import TerminalConductor
+from .logging import log
 from .report import MarkConflictException, MarkDuplicateException, Report
 from .utils import prompt_yes_no
 
@@ -93,12 +93,10 @@ class OutputDirectory:
             root: Path,
             redownload: Redownload,
             on_conflict: OnConflict,
-            conductor: TerminalConductor,
     ):
         self._root = root
         self._redownload = redownload
         self._on_conflict = on_conflict
-        self._conductor = conductor
 
         self._report = Report()
 
@@ -176,7 +174,7 @@ class OutputDirectory:
             path: PurePath,
     ) -> bool:
         if on_conflict == OnConflict.PROMPT:
-            async with self._conductor.exclusive_output():
+            async with log.exclusive_output():
                 prompt = f"Replace {path} with remote file?"
                 return await prompt_yes_no(prompt, default=False)
         elif on_conflict == OnConflict.LOCAL_FIRST:
@@ -195,7 +193,7 @@ class OutputDirectory:
             path: PurePath,
     ) -> bool:
         if on_conflict == OnConflict.PROMPT:
-            async with self._conductor.exclusive_output():
+            async with log.exclusive_output():
                 prompt = f"Recursively delete {path} and replace with remote file?"
                 return await prompt_yes_no(prompt, default=False)
         elif on_conflict == OnConflict.LOCAL_FIRST:
@@ -215,7 +213,7 @@ class OutputDirectory:
             parent: PurePath,
     ) -> bool:
         if on_conflict == OnConflict.PROMPT:
-            async with self._conductor.exclusive_output():
+            async with log.exclusive_output():
                 prompt = f"Delete {parent} so remote file {path} can be downloaded?"
                 return await prompt_yes_no(prompt, default=False)
         elif on_conflict == OnConflict.LOCAL_FIRST:
@@ -234,7 +232,7 @@ class OutputDirectory:
             path: PurePath,
     ) -> bool:
         if on_conflict == OnConflict.PROMPT:
-            async with self._conductor.exclusive_output():
+            async with log.exclusive_output():
                 prompt = f"Delete {path}?"
                 return await prompt_yes_no(prompt, default=False)
         elif on_conflict == OnConflict.LOCAL_FIRST:
@@ -356,12 +354,10 @@ class OutputDirectory:
             self._update_metadata(info)
 
             if changed:
-                self._conductor.print(
-                    f"[bold bright_yellow]Changed[/] {escape(str(info.path))}")
+                log.action(f"[bold bright_yellow]Changed[/] {escape(str(info.path))}")
                 self._report.change_file(info.path)
             else:
-                self._conductor.print(
-                    f"[bold bright_green]Added[/] {escape(str(info.path))}")
+                log.action(f"[bold bright_green]Added[/] {escape(str(info.path))}")
                 self._report.add_file(info.path)
 
     async def cleanup(self) -> None:
@@ -390,8 +386,7 @@ class OutputDirectory:
         if await self._conflict_delete_lf(self._on_conflict, pure):
             try:
                 path.unlink()
-                self._conductor.print(
-                    f"[bold bright_magenta]Deleted[/] {escape(str(path))}")
+                log.action(f"[bold bright_magenta]Deleted[/] {escape(str(path))}")
                 self._report.delete_file(pure)
             except OSError:
                 pass
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index 9154a80..10cd1c2 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -5,7 +5,6 @@ from rich.markup import escape
 
 from .authenticator import Authenticator
 from .authenticators import AUTHENTICATORS
-from .conductor import TerminalConductor
 from .config import Config
 from .crawler import Crawler
 from .crawlers import CRAWLERS
@@ -18,7 +17,6 @@ class PferdLoadException(Exception):
 class Pferd:
     def __init__(self, config: Config):
         self._config = config
-        self._conductor = TerminalConductor()
         self._authenticators: Dict[str, Authenticator] = {}
         self._crawlers: Dict[str, Crawler] = {}
 
@@ -34,12 +32,7 @@ class Pferd:
                 print(f"[red]Error: Unknown authenticator type {t}")
                 continue
 
-            authenticator = authenticator_constructor(
-                name,
-                section,
-                self._config,
-                self._conductor,
-            )
+            authenticator = authenticator_constructor(name, section, self._config)
             self._authenticators[name] = authenticator
 
         if abort:
@@ -57,13 +50,7 @@ class Pferd:
                 print(f"[red]Error: Unknown crawler type {t}")
                 continue
 
-            crawler = crawler_constructor(
-                name,
-                section,
-                self._config,
-                self._conductor,
-                self._authenticators,
-            )
+            crawler = crawler_constructor(name, section, self._config, self._authenticators)
             self._crawlers[name] = crawler
 
         if abort:

From 38510655007c83b72ef9ff87e4d0640aae87e88c Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 18 May 2021 23:23:40 +0200
Subject: [PATCH 146/524] Fix local crawler's download bars

Display the pure path instead of the local path.
---
 PFERD/crawlers/local.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/crawlers/local.py b/PFERD/crawlers/local.py
index 363107f..d4156bc 100644
--- a/PFERD/crawlers/local.py
+++ b/PFERD/crawlers/local.py
@@ -91,7 +91,7 @@ class LocalCrawler(Crawler):
         if not dl:
             return
 
-        async with self.download_bar(path) as bar:
+        async with self.download_bar(pure) as bar:
             await asyncio.sleep(random.uniform(
                 0.5 * self._download_delay,
                 self._download_delay,

From b7a999bc2ea813325fa331c83a862b16aaef46a9 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 19 May 2021 13:25:57 +0200
Subject: [PATCH 147/524] Clean up crawler exceptions and (a)noncritical

---
 PFERD/crawler.py | 136 +++++++++++++++++++++++++----------------------
 1 file changed, 71 insertions(+), 65 deletions(-)

diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index 677baa2..96745d1 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -13,11 +13,15 @@ from .config import Config, Section
 from .limiter import Limiter
 from .logging import ProgressBar, log
 from .output_dir import FileSink, OnConflict, OutputDirectory, Redownload
-from .transformer import RuleParseException, Transformer
+from .transformer import Transformer
 from .version import __version__
 
 
-class CrawlerLoadException(Exception):
+class CrawlWarning(Exception):
+    pass
+
+
+class CrawlError(Exception):
     pass
 
 
@@ -26,41 +30,29 @@ Wrapped = TypeVar("Wrapped", bound=Callable[..., None])
 
 def noncritical(f: Wrapped) -> Wrapped:
     """
-    Warning: Must only be applied to member functions of the Crawler class!
-
     Catches all exceptions occuring during the function call. If an exception
     occurs, the crawler's error_free variable is set to False.
-    """
 
-    def wrapper(self: "Crawler", *args: Any, **kwargs: Any) -> None:
-        try:
-            f(self, *args, **kwargs)
-        except Exception as e:
-            log.print(f"[red]Something went wrong: {escape(str(e))}")
-            self.error_free = False
-    return wrapper  # type: ignore
-
-
-def repeat(attempts: int) -> Callable[[Wrapped], Wrapped]:
-    """
     Warning: Must only be applied to member functions of the Crawler class!
-
-    If an exception occurs during the function call, retries the function call
-    a set amount of times. Exceptions that occur during the last attempt are
-    not caught and instead passed on upwards.
     """
 
-    def decorator(f: Wrapped) -> Wrapped:
-        def wrapper(self: "Crawler", *args: Any, **kwargs: Any) -> None:
-            for _ in range(attempts - 1):
-                try:
-                    f(self, *args, **kwargs)
-                    return
-                except Exception:
-                    pass
-            f(self, *args, **kwargs)
-        return wrapper  # type: ignore
-    return decorator
+    def wrapper(*args: Any, **kwargs: Any) -> None:
+        if not (args and isinstance(args[0], Crawler)):
+            raise RuntimeError("@noncritical must only applied to Crawler methods")
+
+        crawler = args[0]
+
+        try:
+            f(*args, **kwargs)
+        except CrawlWarning as e:
+            log.print(f"[bold bright_red]Warning[/] {escape(str(e))}")
+            crawler.error_free = False
+        except CrawlError as e:
+            log.print(f"[bold bright_red]Error[/] [red]{escape(str(e))}")
+            crawler.error_free = False
+            raise
+
+    return wrapper  # type: ignore
 
 
 AWrapped = TypeVar("AWrapped", bound=Callable[..., Awaitable[None]])
@@ -69,42 +61,30 @@ AWrapped = TypeVar("AWrapped", bound=Callable[..., Awaitable[None]])
 def anoncritical(f: AWrapped) -> AWrapped:
     """
     An async version of @noncritical.
-    Warning: Must only be applied to member functions of the Crawler class!
 
     Catches all exceptions occuring during the function call. If an exception
     occurs, the crawler's error_free variable is set to False.
-    """
 
-    async def wrapper(self: "Crawler", *args: Any, **kwargs: Any) -> None:
-        try:
-            await f(self, *args, **kwargs)
-        except Exception as e:
-            log.print(f"[red]Something went wrong: {escape(str(e))}")
-            self.error_free = False
-    return wrapper  # type: ignore
-
-
-def arepeat(attempts: int) -> Callable[[AWrapped], AWrapped]:
-    """
-    An async version of @noncritical.
     Warning: Must only be applied to member functions of the Crawler class!
-
-    If an exception occurs during the function call, retries the function call
-    a set amount of times. Exceptions that occur during the last attempt are
-    not caught and instead passed on upwards.
     """
 
-    def decorator(f: AWrapped) -> AWrapped:
-        async def wrapper(self: "Crawler", *args: Any, **kwargs: Any) -> None:
-            for _ in range(attempts - 1):
-                try:
-                    await f(self, *args, **kwargs)
-                    return
-                except Exception:
-                    pass
-            await f(self, *args, **kwargs)
-        return wrapper  # type: ignore
-    return decorator
+    async def wrapper(*args: Any, **kwargs: Any) -> None:
+        if not (args and isinstance(args[0], Crawler)):
+            raise RuntimeError("@anoncritical must only applied to Crawler methods")
+
+        crawler = args[0]
+
+        try:
+            await f(*args, **kwargs)
+        except CrawlWarning as e:
+            log.print(f"[bold bright_red]Warning[/] {escape(str(e))}")
+            crawler.error_free = False
+        except CrawlError as e:
+            log.print(f"[bold bright_red]Error[/] [red]{escape(str(e))}")
+            crawler.error_free = False
+            raise
+
+    return wrapper  # type: ignore
 
 
 class CrawlerSection(Section):
@@ -201,11 +181,7 @@ class Crawler(ABC):
             task_delay=section.delay_between_tasks(),
         )
 
-        try:
-            self._transformer = Transformer(section.transform())
-        except RuleParseException as e:
-            e.pretty_print()
-            raise CrawlerLoadException()
+        self._transformer = Transformer(section.transform())
 
         self._output_dir = OutputDirectory(
             config.working_dir / section.output_dir(name),
@@ -312,3 +288,33 @@ class HttpCrawler(Crawler):
             cookie_jar.save(self._cookie_jar_path)
         except Exception:
             log.print(f"[bold red]Warning:[/] Failed to save cookies to {escape(str(self.COOKIE_FILE))}")
+
+
+def repeat(attempts: int) -> Callable[[Wrapped], Wrapped]:
+    """Deprecated."""
+    def decorator(f: Wrapped) -> Wrapped:
+        def wrapper(self: "Crawler", *args: Any, **kwargs: Any) -> None:
+            for _ in range(attempts - 1):
+                try:
+                    f(self, *args, **kwargs)
+                    return
+                except Exception:
+                    pass
+            f(self, *args, **kwargs)
+        return wrapper  # type: ignore
+    return decorator
+
+
+def arepeat(attempts: int) -> Callable[[AWrapped], AWrapped]:
+    """Deprecated."""
+    def decorator(f: AWrapped) -> AWrapped:
+        async def wrapper(self: "Crawler", *args: Any, **kwargs: Any) -> None:
+            for _ in range(attempts - 1):
+                try:
+                    await f(self, *args, **kwargs)
+                    return
+                except Exception:
+                    pass
+            await f(self, *args, **kwargs)
+        return wrapper  # type: ignore
+    return decorator

From a7c025fd866132a7c5fd87684c2e56b951b1460e Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 19 May 2021 17:16:23 +0200
Subject: [PATCH 148/524] Implement reusable FileSinkToken for OutputDirectory

---
 PFERD/output_dir.py | 102 +++++++++++++++++++++++++++++---------------
 PFERD/utils.py      |  45 ++++++++++++++++++-
 2 files changed, 112 insertions(+), 35 deletions(-)

diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 417fa52..783d6bc 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -3,19 +3,19 @@ import os
 import random
 import shutil
 import string
-from contextlib import asynccontextmanager, contextmanager
+from contextlib import contextmanager
 from dataclasses import dataclass
 from datetime import datetime
 from enum import Enum
 from pathlib import Path, PurePath
 # TODO In Python 3.9 and above, AsyncContextManager is deprecated
-from typing import AsyncContextManager, AsyncIterator, BinaryIO, Iterator, Optional
+from typing import AsyncContextManager, BinaryIO, Iterator, Optional, Tuple
 
 from rich.markup import escape
 
 from .logging import log
 from .report import MarkConflictException, MarkDuplicateException, Report
-from .utils import prompt_yes_no
+from .utils import ReusableAsyncContextManager, prompt_yes_no
 
 SUFFIX_CHARS = string.ascii_lowercase + string.digits
 SUFFIX_LENGTH = 6
@@ -87,6 +87,49 @@ class DownloadInfo:
     success: bool = False
 
 
+class FileSinkToken(ReusableAsyncContextManager[FileSink]):
+    # Whenever this class is entered, it creates a new temporary file and
+    # returns a corresponding FileSink.
+    #
+    # When it is exited again, the file is closed and information about the
+    # download handed back to the OutputDirectory.
+
+    def __init__(
+            self,
+            output_dir: "OutputDirectory",
+            path: PurePath,
+            local_path: Path,
+            heuristics: Heuristics,
+            on_conflict: OnConflict,
+    ):
+        super().__init__()
+
+        self._output_dir = output_dir
+        self._path = path
+        self._local_path = local_path
+        self._heuristics = heuristics
+        self._on_conflict = on_conflict
+
+    async def _on_aenter(self) -> FileSink:
+        tmp_path, file = await self._output_dir._create_tmp_file(self._local_path)
+        sink = FileSink(file)
+
+        async def after_download() -> None:
+            await self._output_dir._after_download(DownloadInfo(
+                self._path,
+                self._local_path,
+                tmp_path,
+                self._heuristics,
+                self._on_conflict,
+                sink.is_done(),
+            ))
+
+        self._stack.push_async_callback(after_download)
+        self._stack.enter_context(file)
+
+        return sink
+
+
 class OutputDirectory:
     def __init__(
             self,
@@ -111,11 +154,9 @@ class OutputDirectory:
         try:
             self._report.mark(path)
         except MarkDuplicateException:
-            msg = "Another file has already been placed here."
-            raise OutputDirException(msg)
+            raise OutputDirException("Another file has already been placed here.")
         except MarkConflictException as e:
-            msg = f"Collides with other file: {e.collides_with}"
-            raise OutputDirException(msg)
+            raise OutputDirException(f"Collides with other file: {e.collides_with}")
 
     def resolve(self, path: PurePath) -> Path:
         """
@@ -123,8 +164,7 @@ class OutputDirectory:
         """
 
         if ".." in path.parts:
-            msg = f"Path {path} contains forbidden '..'"
-            raise OutputDirException(msg)
+            raise OutputDirException(f"Path {path} contains forbidden '..'")
         return self._root / path
 
     def _should_download(
@@ -137,6 +177,7 @@ class OutputDirectory:
         # since we know that the remote is different from the local files. This
         # includes the case where no local file exists.
         if not local_path.is_file():
+            # TODO Don't download if on_conflict is LOCAL_FIRST or NO_DELETE
             return True
 
         if redownload == Redownload.NEVER:
@@ -251,19 +292,24 @@ class OutputDirectory:
         name = f"{prefix}{base.name}.tmp.{suffix}"
         return base.parent / name
 
-    @asynccontextmanager
-    async def _sink_context_manager(
+    async def _create_tmp_file(
             self,
-            file: BinaryIO,
-            info: DownloadInfo,
-    ) -> AsyncIterator[FileSink]:
-        sink = FileSink(file)
-        try:
-            with file:
-                yield sink
-        finally:
-            info.success = sink.is_done()
-            await self._after_download(info)
+            local_path: Path,
+    ) -> Tuple[Path, BinaryIO]:
+        """
+        May raise an OutputDirException.
+        """
+
+        # Create tmp file
+        for attempt in range(TRIES):
+            suffix_length = SUFFIX_LENGTH + 2 * attempt
+            tmp_path = self._tmp_path(local_path, suffix_length)
+            try:
+                return tmp_path, open(tmp_path, "xb")
+            except FileExistsError:
+                pass  # Try again
+
+        raise OutputDirException(f"Failed to create temporary file {tmp_path}")
 
     async def download(
             self,
@@ -306,19 +352,7 @@ class OutputDirectory:
         # Ensure parent directory exists
         local_path.parent.mkdir(parents=True, exist_ok=True)
 
-        # Create tmp file
-        for attempt in range(TRIES):
-            suffix_length = SUFFIX_LENGTH + 2 * attempt
-            tmp_path = self._tmp_path(local_path, suffix_length)
-            info = DownloadInfo(path, local_path, tmp_path,
-                                heuristics, on_conflict)
-            try:
-                file = open(tmp_path, "xb")
-                return self._sink_context_manager(file, info)
-            except FileExistsError:
-                pass  # Try again
-
-        return None
+        return FileSinkToken(self, path, local_path, heuristics, on_conflict)
 
     def _update_metadata(self, info: DownloadInfo) -> None:
         if mtime := info.heuristics.mtime:
diff --git a/PFERD/utils.py b/PFERD/utils.py
index 3022ab6..0b3d40d 100644
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -2,7 +2,11 @@ import asyncio
 import contextvars
 import functools
 import getpass
-from typing import Any, Callable, Optional, TypeVar
+import sys
+from abc import ABC, abstractmethod
+from contextlib import AsyncExitStack
+from types import TracebackType
+from typing import Any, Callable, Generic, Optional, Type, TypeVar
 
 import bs4
 
@@ -56,3 +60,42 @@ async def prompt_yes_no(query: str, default: Optional[bool]) -> bool:
             return default
 
         print("Please answer with 'y' or 'n'.")
+
+
+class ReusableAsyncContextManager(ABC, Generic[T]):
+    def __init__(self) -> None:
+        self._active = False
+        self._stack = AsyncExitStack()
+
+    @abstractmethod
+    async def _on_aenter(self) -> T:
+        pass
+
+    async def __aenter__(self) -> T:
+        if self._active:
+            raise RuntimeError("Nested or otherwise concurrent usage is not allowed")
+
+        self._active = True
+        await self._stack.__aenter__()
+
+        # See https://stackoverflow.com/a/13075071
+        try:
+            result: T = await self._on_aenter()
+        except:  # noqa: E722 do not use bare 'except'
+            if not await self.__aexit__(*sys.exc_info()):
+                raise
+
+        return result
+
+    async def __aexit__(
+            self,
+            exc_type: Optional[Type[BaseException]],
+            exc_value: Optional[BaseException],
+            traceback: Optional[TracebackType],
+    ) -> Optional[bool]:
+        if not self._active:
+            raise RuntimeError("__aexit__ called too many times")
+
+        result = await self._stack.__aexit__(exc_type, exc_value, traceback)
+        self._active = False
+        return result

From 5916626399e920cfa314c84a74f597bc6f305114 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 19 May 2021 17:16:59 +0200
Subject: [PATCH 149/524] Make noqua comment more specific

---
 PFERD/crawlers/ilias.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/PFERD/crawlers/ilias.py b/PFERD/crawlers/ilias.py
index beac208..be3584c 100644
--- a/PFERD/crawlers/ilias.py
+++ b/PFERD/crawlers/ilias.py
@@ -831,7 +831,6 @@ async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> Beautifu
         return soupify(await response.read())
 
 _link_template_plain = "{{link}}"
-# flake8: noqa E501
 _link_template_rich = """
 <!DOCTYPE html>
 <html lang="en">
@@ -917,4 +916,4 @@ _link_template_rich = """
         </div>
     </body>
 </html>
-"""
+"""  # noqa: E501 line too long

From 92886fb8d8104d3a56d370bb0a72a51062bda81a Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 19 May 2021 17:32:23 +0200
Subject: [PATCH 150/524] Implement --version flag

---
 PFERD/__main__.py | 10 ++++++++++
 PFERD/crawler.py  |  4 ++--
 PFERD/version.py  |  3 ++-
 setup.cfg         |  2 +-
 4 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index 5815f40..54228a5 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -6,8 +6,14 @@ from pathlib import Path
 from .config import Config, ConfigDumpException, ConfigLoadException
 from .output_dir import OnConflict, Redownload
 from .pferd import Pferd
+from .version import NAME, VERSION
 
 GENERAL_PARSER = argparse.ArgumentParser(add_help=False)
+GENERAL_PARSER.add_argument(
+    "--version",
+    action="store_true",
+    help="print version and exit"
+)
 GENERAL_PARSER.add_argument(
     "--config", "-c",
     type=Path,
@@ -211,6 +217,10 @@ def prune_crawlers(
 def main() -> None:
     args = PARSER.parse_args()
 
+    if args.version:
+        print(f"{NAME} {VERSION}")
+        exit()
+
     try:
         config = Config(load_parser(args))
     except ConfigLoadException:
diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index 96745d1..adfe74b 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -14,7 +14,7 @@ from .limiter import Limiter
 from .logging import ProgressBar, log
 from .output_dir import FileSink, OnConflict, OutputDirectory, Redownload
 from .transformer import Transformer
-from .version import __version__
+from .version import NAME, VERSION
 
 
 class CrawlWarning(Exception):
@@ -275,7 +275,7 @@ class HttpCrawler(Crawler):
             pass
 
         async with aiohttp.ClientSession(
-                headers={"User-Agent": f"pferd/{__version__}"},
+                headers={"User-Agent": f"{NAME}/{VERSION}"},
                 cookie_jar=cookie_jar,
         ) as session:
             self.session = session
diff --git a/PFERD/version.py b/PFERD/version.py
index 528787c..e26dabb 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1 +1,2 @@
-__version__ = "3.0.0"
+NAME = "PFERD"
+VERSION = "3.0.0"
diff --git a/setup.cfg b/setup.cfg
index f6b64ea..cb85ab0 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = PFERD
-version = attr: PFERD.version.__version__
+version = attr: PFERD.version.VERSION
 
 [options]
 packages = PFERD

From 0d10752b5a9f68d2f0bd97ac5003bf2690027d58 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 19 May 2021 17:48:51 +0200
Subject: [PATCH 151/524] Configure explain log level via cli and config file

---
 CONFIG.md               |  2 ++
 PFERD/__main__.py       | 19 +++++++++++++++++++
 PFERD/config.py         | 23 ++++++++++++++---------
 PFERD/crawler.py        |  2 +-
 PFERD/crawlers/local.py |  2 +-
 PFERD/logging.py        | 19 +++++++------------
 6 files changed, 44 insertions(+), 23 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index 6149ef5..29fc7e2 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -19,6 +19,8 @@ default values for the other sections.
   paths in the config file are interpreted relative to this path. If this path
   is relative, it is interpreted relative to the script's working dir. `~` is
   expanded to the current user's home directory. (Default: `.`)
+- `explain`: Whether PFERD should log and explain its actions and decisions in
+  detail. (Default: `no`)
 
 ## The `crawl:*` sections
 
diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index 54228a5..589c12d 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -4,6 +4,7 @@ import configparser
 from pathlib import Path
 
 from .config import Config, ConfigDumpException, ConfigLoadException
+from .logging import log
 from .output_dir import OnConflict, Redownload
 from .pferd import Pferd
 from .version import NAME, VERSION
@@ -42,6 +43,13 @@ GENERAL_PARSER.add_argument(
     metavar="PATH",
     help="custom working directory"
 )
+GENERAL_PARSER.add_argument(
+    "--explain", "-e",
+    # TODO Use argparse.BooleanOptionalAction after updating to 3.9
+    action="store_const",
+    const=True,
+    help="log and explain in detail what PFERD is doing"
+)
 
 
 def load_general(
@@ -52,6 +60,8 @@ def load_general(
 
     if args.working_dir is not None:
         section["working_dir"] = str(args.working_dir)
+    if args.explain is not None:
+        section["explain"] = "true" if args.explain else "false"
 
 
 CRAWLER_PARSER = argparse.ArgumentParser(add_help=False)
@@ -217,6 +227,10 @@ def prune_crawlers(
 def main() -> None:
     args = PARSER.parse_args()
 
+    # Configure log levels set by command line arguments
+    if args.explain is not None:
+        log.output_explain = args.explain
+
     if args.version:
         print(f"{NAME} {VERSION}")
         exit()
@@ -226,6 +240,11 @@ def main() -> None:
     except ConfigLoadException:
         exit(1)
 
+    # Configure log levels set in the config file
+    # TODO Catch config section exceptions
+    if args.explain is None:
+        log.output_explain = config.default_section.explain()
+
     if args.dump_config is not None:
         try:
             if args.dump_config is True:
diff --git a/PFERD/config.py b/PFERD/config.py
index 7fe5d9e..08beb0c 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -50,6 +50,15 @@ class Section:
         self.error(key, "Missing value")
 
 
+class DefaultSection(Section):
+    def working_dir(self) -> Path:
+        pathstr = self.s.get("working_dir", ".")
+        return Path(pathstr).expanduser()
+
+    def explain(self) -> bool:
+        return self.s.getboolean("explain", fallback=False)
+
+
 class Config:
     @staticmethod
     def _default_path() -> Path:
@@ -62,6 +71,11 @@ class Config:
 
     def __init__(self, parser: ConfigParser):
         self._parser = parser
+        self._default_section = DefaultSection(parser[parser.default_section])
+
+    @property
+    def default_section(self) -> DefaultSection:
+        return self._default_section
 
     @staticmethod
     def _fail_load(path: Path, reason: str) -> None:
@@ -134,10 +148,6 @@ class Config:
     def dump_to_stdout(self) -> None:
         self._parser.write(sys.stdout)
 
-    @property
-    def default_section(self) -> SectionProxy:
-        return self._parser[self._parser.default_section]
-
     def crawler_sections(self) -> List[Tuple[str, SectionProxy]]:
         result = []
         for name, proxy in self._parser.items():
@@ -153,8 +163,3 @@ class Config:
                 result.append((name, proxy))
 
         return result
-
-    @property
-    def working_dir(self) -> Path:
-        pathstr = self.default_section.get("working_dir", ".")
-        return Path(pathstr).expanduser()
diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index adfe74b..80ecedb 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -184,7 +184,7 @@ class Crawler(ABC):
         self._transformer = Transformer(section.transform())
 
         self._output_dir = OutputDirectory(
-            config.working_dir / section.output_dir(name),
+            config.default_section.working_dir() / section.output_dir(name),
             section.redownload(),
             section.on_conflict(),
         )
diff --git a/PFERD/crawlers/local.py b/PFERD/crawlers/local.py
index d4156bc..8cfc79a 100644
--- a/PFERD/crawlers/local.py
+++ b/PFERD/crawlers/local.py
@@ -46,7 +46,7 @@ class LocalCrawler(Crawler):
     ):
         super().__init__(name, section, config)
 
-        self._target = config.working_dir / section.target()
+        self._target = config.default_section.working_dir() / section.target()
         self._crawl_delay = section.crawl_delay()
         self._download_delay = section.download_delay()
         self._download_speed = section.download_speed()
diff --git a/PFERD/logging.py b/PFERD/logging.py
index b075d35..cedc5c9 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -52,9 +52,9 @@ class Log:
         self._lines: List[str] = []
 
         # Whether different parts of the output are enabled or disabled
-        self._enabled_explain = False
-        self._enabled_action = True
-        self._enabled_report = True
+        self.output_explain = False
+        self.output_action = True
+        self.output_report = True
 
     def _update_live(self) -> None:
         elements = []
@@ -66,11 +66,6 @@ class Log:
         group = RenderGroup(*elements)  # type: ignore
         self._live.update(group)
 
-    def configure(self, explain: bool, action: bool, report: bool) -> None:
-        self._enabled_explain = explain
-        self._enabled_action = action
-        self._enabled_report = report
-
     @contextmanager
     def show_progress(self) -> Iterator[None]:
         if self._showing_progress:
@@ -107,19 +102,19 @@ class Log:
             self.console.print(text)
 
     def explain_topic(self, text: str) -> None:
-        if self._enabled_explain:
+        if self.output_explain:
             self.print(f"[cyan]{escape(text)}")
 
     def explain(self, text: str) -> None:
-        if self._enabled_explain:
+        if self.output_explain:
             self.print(f"  {escape(text)}")
 
     def action(self, text: str) -> None:
-        if self._enabled_action:
+        if self.output_action:
             self.print(text)
 
     def report(self, text: str) -> None:
-        if self._enabled_report:
+        if self.output_report:
             self.print(text)
 
     @contextmanager

From 3300886120a0a21127c69f7eaf5af0cb246cae24 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 19 May 2021 18:10:17 +0200
Subject: [PATCH 152/524] Explain config file loading

---
 PFERD/__main__.py | 16 ++++++++++++----
 PFERD/config.py   | 23 ++++++++++++-----------
 PFERD/logging.py  |  9 +++++++++
 3 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index 589c12d..c03e08c 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -133,7 +133,7 @@ SUBPARSERS = PARSER.add_subparsers(title="crawlers")
 
 LOCAL_CRAWLER = SUBPARSERS.add_parser(
     "local",
-    parents=[GENERAL_PARSER, CRAWLER_PARSER],
+    parents=[CRAWLER_PARSER],
 )
 LOCAL_CRAWLER.set_defaults(command="local")
 LOCAL_CRAWLER_GROUP = LOCAL_CRAWLER.add_argument_group(
@@ -194,12 +194,16 @@ def load_local_crawler(
 def load_parser(
         args: argparse.Namespace,
 ) -> configparser.ConfigParser:
+    log.explain_topic("Loading config")
     parser = configparser.ConfigParser()
 
     if args.command is None:
+        log.explain("No CLI command specified, loading config from file")
         Config.load_parser(parser, path=args.config)
-    elif args.command == "local":
-        load_local_crawler(args, parser)
+    else:
+        log.explain(f"CLI command specified, creating config for {args.command!r}")
+        if args.command == "local":
+            load_local_crawler(args, parser)
 
     load_general(args, parser)
     prune_crawlers(args, parser)
@@ -230,6 +234,8 @@ def main() -> None:
     # Configure log levels set by command line arguments
     if args.explain is not None:
         log.output_explain = args.explain
+    if args.dump_config:
+        log.output_explain = False
 
     if args.version:
         print(f"{NAME} {VERSION}")
@@ -237,7 +243,9 @@ def main() -> None:
 
     try:
         config = Config(load_parser(args))
-    except ConfigLoadException:
+    except ConfigLoadException as e:
+        log.error(f"Failed to load config file at path {str(e.path)!r}")
+        log.error_contd(f"Reason: {e.reason}")
         exit(1)
 
     # Configure log levels set in the config file
diff --git a/PFERD/config.py b/PFERD/config.py
index 08beb0c..30ae3fb 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -6,11 +6,14 @@ from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, List, NoReturn, Optional, Tuple
 
+from .logging import log
 from .utils import prompt_yes_no
 
 
+@dataclass
 class ConfigLoadException(Exception):
-    pass
+    path: Path
+    reason: str
 
 
 class ConfigDumpException(Exception):
@@ -77,20 +80,18 @@ class Config:
     def default_section(self) -> DefaultSection:
         return self._default_section
 
-    @staticmethod
-    def _fail_load(path: Path, reason: str) -> None:
-        print(f"Failed to load config file at {path}")
-        print(f"Reason: {reason}")
-        raise ConfigLoadException()
-
     @staticmethod
     def load_parser(parser: ConfigParser, path: Optional[Path] = None) -> None:
         """
         May throw a ConfigLoadException.
         """
 
-        if not path:
+        if path:
+            log.explain("Using custom path")
+        else:
+            log.explain("Using default path")
             path = Config._default_path()
+        log.explain(f"Loading {str(path)!r}")
 
         # Using config.read_file instead of config.read because config.read
         # would just ignore a missing file and carry on.
@@ -98,11 +99,11 @@ class Config:
             with open(path) as f:
                 parser.read_file(f, source=str(path))
         except FileNotFoundError:
-            Config._fail_load(path, "File does not exist")
+            raise ConfigLoadException(path, "File does not exist")
         except IsADirectoryError:
-            Config._fail_load(path, "That's a directory, not a file")
+            raise ConfigLoadException(path, "That's a directory, not a file")
         except PermissionError:
-            Config._fail_load(path, "Insufficient permissions")
+            raise ConfigLoadException(path, "Insufficient permissions")
 
     @staticmethod
     def _fail_dump(path: Path, reason: str) -> None:
diff --git a/PFERD/logging.py b/PFERD/logging.py
index cedc5c9..e2a6d33 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -101,6 +101,15 @@ class Log:
         else:
             self.console.print(text)
 
+    def warn(self, text: str) -> None:
+        self.print(f"[bold bright_red]Warning[/] {escape(text)}")
+
+    def error(self, text: str) -> None:
+        self.print(f"[bold bright_red]Error[/] [red]{escape(text)}")
+
+    def error_contd(self, text: str) -> None:
+        self.print(f"[red]{escape(text)}")
+
     def explain_topic(self, text: str) -> None:
         if self.output_explain:
             self.print(f"[cyan]{escape(text)}")

From 9f03702e69a9f09a8d7df6ad49378d3f15ae7bf4 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 19 May 2021 21:34:36 +0200
Subject: [PATCH 153/524] Split up ilias crawler in multiple files

The ilias crawler contained a crawler and an HTML parser, now they are
split in two.
---
 PFERD/crawlers/ilias/__init__.py              |   3 +
 PFERD/crawlers/ilias/kit_ilias_html.py        | 452 +++++++++++++++++
 .../kit_web_ilias_crawler.py}                 | 477 +-----------------
 PFERD/utils.py                                |  27 +-
 4 files changed, 488 insertions(+), 471 deletions(-)
 create mode 100644 PFERD/crawlers/ilias/__init__.py
 create mode 100644 PFERD/crawlers/ilias/kit_ilias_html.py
 rename PFERD/crawlers/{ilias.py => ilias/kit_web_ilias_crawler.py} (51%)

diff --git a/PFERD/crawlers/ilias/__init__.py b/PFERD/crawlers/ilias/__init__.py
new file mode 100644
index 0000000..15b8d5d
--- /dev/null
+++ b/PFERD/crawlers/ilias/__init__.py
@@ -0,0 +1,3 @@
+from .kit_web_ilias_crawler import KitIliasCrawler, KitIliasCrawlerSection
+
+__all__ = ["KitIliasCrawler", "KitIliasCrawlerSection"]
diff --git a/PFERD/crawlers/ilias/kit_ilias_html.py b/PFERD/crawlers/ilias/kit_ilias_html.py
new file mode 100644
index 0000000..17eb855
--- /dev/null
+++ b/PFERD/crawlers/ilias/kit_ilias_html.py
@@ -0,0 +1,452 @@
+import json
+import re
+from dataclasses import dataclass
+from datetime import date, datetime, timedelta
+from enum import Enum
+# TODO In Python 3.9 and above, AsyncContextManager is deprecated
+from typing import List, Optional, Union
+from urllib.parse import urljoin, urlparse
+
+from bs4 import BeautifulSoup, Tag
+
+from PFERD.utils import url_set_query_params
+
+TargetType = Union[str, int]
+
+
+class IliasElementType(Enum):
+    EXERCISE = "exercise"
+    FILE = "file"
+    FOLDER = "folder"
+    FORUM = "forum"
+    LINK = "link"
+    MEETING = "meeting"
+    VIDEO = "video"
+    VIDEO_PLAYER = "video_player"
+    VIDEO_FOLDER = "video_folder"
+    VIDEO_FOLDER_MAYBE_PAGINATED = "video_folder_maybe_paginated"
+
+
+@dataclass
+class IliasPageElement:
+    type: IliasElementType
+    url: str
+    name: str
+    mtime: Optional[datetime] = None
+    description: Optional[str] = None
+
+
+class IliasPage:
+
+    def __init__(self, soup: BeautifulSoup, _page_url: str, source_element: Optional[IliasPageElement]):
+        self._soup = soup
+        self._page_url = _page_url
+        self._page_type = source_element.type if source_element else None
+        self._source_name = source_element.name if source_element else ""
+
+    def get_child_elements(self) -> List[IliasPageElement]:
+        """
+        Return all child page elements you can find here.
+        """
+        if self._is_video_player():
+            return self._player_to_video()
+        if self._is_video_listing():
+            return self._find_video_entries()
+        if self._is_exercise_file():
+            return self._find_exercise_entries()
+        return self._find_normal_entries()
+
+    def _is_video_player(self) -> bool:
+        return "paella_config_file" in str(self._soup)
+
+    def _is_video_listing(self) -> bool:
+        # ILIAS fluff around it
+        if self._soup.find(id="headerimage"):
+            element: Tag = self._soup.find(id="headerimage")
+            if "opencast" in element.attrs["src"].lower():
+                return True
+
+        # Raw listing without ILIAS fluff
+        video_element_table: Tag = self._soup.find(
+            name="table", id=re.compile(r"tbl_xoct_.+")
+        )
+        return video_element_table is not None
+
+    def _is_exercise_file(self) -> bool:
+        # we know it from before
+        if self._page_type == IliasElementType.EXERCISE:
+            return True
+
+        # We have no suitable parent - let's guesss
+        if self._soup.find(id="headerimage"):
+            element: Tag = self._soup.find(id="headerimage")
+            if "exc" in element.attrs["src"].lower():
+                return True
+
+        return False
+
+    def _player_to_video(self) -> List[IliasPageElement]:
+        # Fetch the actual video page. This is a small wrapper page initializing a javscript
+        # player. Sadly we can not execute that JS. The actual video stream url is nowhere
+        # on the page, but defined in a JS object inside a script tag, passed to the player
+        # library.
+        # We do the impossible and RegEx the stream JSON object out of the page's HTML source
+        regex: re.Pattern[str] = re.compile(
+            r"({\"streams\"[\s\S]+?),\s*{\"paella_config_file", re.IGNORECASE
+        )
+        json_match = regex.search(str(self._soup))
+
+        if json_match is None:
+            print(f"Could not find json stream info for {self._page_url!r}")
+            return []
+        json_str = json_match.group(1)
+
+        # parse it
+        json_object = json.loads(json_str)
+        # and fetch the video url!
+        video_url = json_object["streams"][0]["sources"]["mp4"][0]["src"]
+        return [IliasPageElement(IliasElementType.VIDEO, video_url, self._source_name)]
+
+    def _find_video_entries(self) -> List[IliasPageElement]:
+        # ILIAS has three stages for video pages
+        # 1. The initial dummy page without any videos. This page contains the link to the listing
+        # 2. The video listing which might be paginated
+        # 3. An unpaginated video listing (or at least one that includes 800 videos)
+        #
+        # We need to figure out where we are.
+
+        video_element_table: Tag = self._soup.find(
+            name="table", id=re.compile(r"tbl_xoct_.+")
+        )
+
+        if video_element_table is None:
+            # We are in stage 1
+            # The page is actually emtpy but contains the link to stage 2
+            content_link: Tag = self._soup.select_one("#tab_series a")
+            url: str = self._abs_url_from_link(content_link)
+            query_params = {"limit": "800", "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
+            url = url_set_query_params(url, query_params)
+            return [IliasPageElement(IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED, url, "")]
+
+        is_paginated = self._soup.find(id=re.compile(r"tab_page_sel.+")) is not None
+
+        if is_paginated and not self._page_type == IliasElementType.VIDEO_FOLDER:
+            # We are in stage 2 - try to break pagination
+            return self._find_video_entries_paginated()
+
+        return self._find_video_entries_no_paging()
+
+    def _find_video_entries_paginated(self) -> List[IliasPageElement]:
+        table_element: Tag = self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
+
+        if table_element is None:
+            # TODO: Properly log this
+            print(
+                "Could not increase elements per page (table not found)."
+                " Some might not be crawled!"
+            )
+            return self._find_video_entries_no_paging()
+
+        id_match = re.match(r"tbl_xoct_(.+)", table_element.attrs["id"])
+        if id_match is None:
+            # TODO: Properly log this
+            print(
+                "Could not increase elements per page (table id not found)."
+                " Some might not be crawled!"
+            )
+            return self._find_video_entries_no_paging()
+
+        table_id = id_match.group(1)
+
+        query_params = {f"tbl_xoct_{table_id}_trows": "800",
+                        "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
+        url = url_set_query_params(self._page_url, query_params)
+        return [IliasPageElement(IliasElementType.VIDEO_FOLDER, url, "")]
+
+    def _find_video_entries_no_paging(self) -> List[IliasPageElement]:
+        """
+        Crawls the "second stage" video page. This page contains the actual video urls.
+        """
+        # Video start links are marked with an "Abspielen" link
+        video_links: List[Tag] = self._soup.findAll(
+            name="a", text=re.compile(r"\s*Abspielen\s*")
+        )
+
+        results: List[IliasPageElement] = []
+
+        # TODO: Sadly the download button is currently broken, so never do that
+        for link in video_links:
+            results.append(self._listed_video_to_element(link))
+
+        return results
+
+    def _listed_video_to_element(self, link: Tag) -> IliasPageElement:
+        # The link is part of a table with multiple columns, describing metadata.
+        # 6th child (1 indexed) is the modification time string
+        modification_string = link.parent.parent.parent.select_one(
+            "td.std:nth-child(6)"
+        ).getText().strip()
+        modification_time = datetime.strptime(modification_string, "%d.%m.%Y - %H:%M")
+
+        title = link.parent.parent.parent.select_one("td.std:nth-child(3)").getText().strip()
+        title += ".mp4"
+
+        video_name: str = _sanitize_path_name(title)
+
+        video_url = self._abs_url_from_link(link)
+
+        return IliasPageElement(IliasElementType.VIDEO_PLAYER, video_url, video_name, modification_time)
+
+    def _find_exercise_entries(self) -> List[IliasPageElement]:
+        results: List[IliasPageElement] = []
+
+        # Each assignment is in an accordion container
+        assignment_containers: List[Tag] = self._soup.select(".il_VAccordionInnerContainer")
+
+        for container in assignment_containers:
+            # Fetch the container name out of the header to use it in the path
+            container_name = container.select_one(".ilAssignmentHeader").getText().strip()
+            # Find all download links in the container (this will contain all the files)
+            files: List[Tag] = container.findAll(
+                name="a",
+                # download links contain the given command class
+                attrs={"href": lambda x: x and "cmdClass=ilexsubmissiongui" in x},
+                text="Download"
+            )
+
+            # Grab each file as you now have the link
+            for file_link in files:
+                # Two divs, side by side. Left is the name, right is the link ==> get left
+                # sibling
+                file_name = file_link.parent.findPrevious(name="div").getText().strip()
+                file_name = _sanitize_path_name(file_name)
+                url = self._abs_url_from_link(file_link)
+
+                results.append(IliasPageElement(
+                    IliasElementType.FILE,
+                    url,
+                    container_name + "/" + file_name,
+                    None  # We do not have any timestamp
+                ))
+
+        return results
+
+    def _find_normal_entries(self) -> List[IliasPageElement]:
+        result: List[IliasPageElement] = []
+
+        # Fetch all links and throw them to the general interpreter
+        links: List[Tag] = self._soup.select("a.il_ContainerItemTitle")
+
+        for link in links:
+            abs_url = self._abs_url_from_link(link)
+            element_name = _sanitize_path_name(link.getText())
+            element_type = self._find_type_from_link(element_name, link, abs_url)
+            description = self._find_link_description(link)
+
+            if not element_type:
+                continue
+            if element_type == IliasElementType.MEETING:
+                element_name = _sanitize_path_name(self._normalize_meeting_name(element_name))
+            elif element_type == IliasElementType.FILE:
+                result.append(self._file_to_element(element_name, abs_url, link))
+                continue
+
+            result.append(IliasPageElement(element_type, abs_url, element_name, description=description))
+
+        return result
+
+    def _find_link_description(self, link: Tag) -> Optional[str]:
+        tile: Tag = link.findParent("div", {"class": lambda x: x and "il_ContainerListItem" in x})
+        if not tile:
+            return None
+        description_element: Tag = tile.find("div", {"class": lambda x: x and "il_Description" in x})
+        if not description_element:
+            return None
+        return description_element.getText().strip()
+
+    def _file_to_element(self, name: str, url: str, link_element: Tag) -> IliasPageElement:
+        # Files have a list of properties (type, modification date, size, etc.)
+        # In a series of divs.
+        # Find the parent containing all those divs, so we can filter our what we need
+        properties_parent: Tag = link_element.findParent(
+            "div", {"class": lambda x: "il_ContainerListItem" in x}
+        ).select_one(".il_ItemProperties")
+        # The first one is always the filetype
+        file_type = properties_parent.select_one("span.il_ItemProperty").getText().strip()
+
+        # The rest does not have a stable order. Grab the whole text and reg-ex the date
+        # out of it
+        all_properties_text = properties_parent.getText().strip()
+        modification_date_match = re.search(
+            r"(((\d+\. \w+ \d+)|(Gestern|Yesterday)|(Heute|Today)|(Morgen|Tomorrow)), \d+:\d+)",
+            all_properties_text
+        )
+        if modification_date_match is None:
+            modification_date = None
+            # TODO: Properly log this
+            print(f"Could not extract start date from {all_properties_text!r}")
+        else:
+            modification_date_str = modification_date_match.group(1)
+            modification_date = demangle_date(modification_date_str)
+
+        # Grab the name from the link text
+        name = _sanitize_path_name(link_element.getText())
+        full_path = name + "." + file_type
+
+        return IliasPageElement(IliasElementType.FILE, url, full_path, modification_date)
+
+    @staticmethod
+    def _find_type_from_link(
+            element_name: str,
+            link_element: Tag,
+            url: str
+    ) -> Optional[IliasElementType]:
+        """
+        Decides which sub crawler to use for a given top level element.
+        """
+        parsed_url = urlparse(url)
+
+        # file URLs contain "target=file"
+        if "target=file_" in parsed_url.query:
+            return IliasElementType.FILE
+
+        # Skip forums
+        if "cmd=showThreads" in parsed_url.query:
+            return IliasElementType.FORUM
+
+        # Everything with a ref_id can *probably* be opened to reveal nested things
+        # video groups, directories, exercises, etc
+        if "ref_id=" in parsed_url.query:
+            return IliasPage._find_type_from_folder_like(link_element, url)
+
+        # TODO: Log this properly
+        print(f"Unknown type: The element was at {str(element_name)!r} and it is {link_element!r})")
+        return None
+
+    @staticmethod
+    def _find_type_from_folder_like(link_element: Tag, url: str) -> Optional[IliasElementType]:
+        """
+        Try crawling something that looks like a folder.
+        """
+        # pylint: disable=too-many-return-statements
+
+        found_parent: Optional[Tag] = None
+
+        # We look for the outer div of our inner link, to find information around it
+        # (mostly the icon)
+        for parent in link_element.parents:
+            if "ilContainerListItemOuter" in parent["class"]:
+                found_parent = parent
+                break
+
+        if found_parent is None:
+            # TODO: Log this properly
+            print(f"Could not find element icon for {url!r}")
+            return None
+
+        # Find the small descriptive icon to figure out the type
+        img_tag: Optional[Tag] = found_parent.select_one("img.ilListItemIcon")
+
+        if img_tag is None:
+            # TODO: Log this properly
+            print(f"Could not find image tag for {url!r}")
+            return None
+
+        if "opencast" in str(img_tag["alt"]).lower():
+            return IliasElementType.VIDEO_FOLDER
+
+        if str(img_tag["src"]).endswith("icon_exc.svg"):
+            return IliasElementType.EXERCISE
+
+        if str(img_tag["src"]).endswith("icon_webr.svg"):
+            return IliasElementType.LINK
+
+        if str(img_tag["src"]).endswith("frm.svg"):
+            return IliasElementType.FORUM
+
+        if str(img_tag["src"]).endswith("sess.svg"):
+            return IliasElementType.MEETING
+
+        return IliasElementType.FOLDER
+
+    @staticmethod
+    def _normalize_meeting_name(meeting_name: str) -> str:
+        """
+        Normalizes meeting names, which have a relative time as their first part,
+        to their date in ISO format.
+        """
+        date_portion_str = meeting_name.split(" - ")[0]
+        date_portion = demangle_date(date_portion_str)
+
+        if not date_portion:
+            return meeting_name
+
+        rest_of_name = meeting_name
+        if rest_of_name.startswith(date_portion_str):
+            rest_of_name = rest_of_name[len(date_portion_str):]
+
+        return datetime.strftime(date_portion, "%Y-%m-%d, %H:%M") + rest_of_name
+
+    def _abs_url_from_link(self, link_tag: Tag) -> str:
+        """
+        Create an absolute url from an <a> tag.
+        """
+        return urljoin(self._page_url, link_tag.get("href"))
+
+
+german_months = ['Jan', 'Feb', 'Mär', 'Apr', 'Mai', 'Jun', 'Jul', 'Aug', 'Sep', 'Okt', 'Nov', 'Dez']
+english_months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
+
+
+def demangle_date(date_str: str) -> Optional[datetime]:
+    """
+    Demangle a given date in one of the following formats:
+    "Gestern, HH:MM"
+    "Heute, HH:MM"
+    "Morgen, HH:MM"
+    "dd. mon yyyy, HH:MM
+    """
+    try:
+        date_str = re.sub(r"\s+", " ", date_str)
+        date_str = re.sub("Gestern|Yesterday", _format_date_english(_yesterday()), date_str, re.I)
+        date_str = re.sub("Heute|Today", _format_date_english(date.today()), date_str, re.I)
+        date_str = re.sub("Morgen|Tomorrow",  _format_date_english(_tomorrow()), date_str, re.I)
+        for german, english in zip(german_months, english_months):
+            date_str = date_str.replace(german, english)
+            # Remove trailing dots for abbreviations, e.g. "20. Apr. 2020" -> "20. Apr 2020"
+            date_str = date_str.replace(english + ".", english)
+
+        # We now have a nice english String in the format: "dd. mmm yyyy, hh:mm"
+        day_part, time_part = date_str.split(",")
+        day_str, month_str, year_str = day_part.split(" ")
+
+        day = int(day_str.strip().replace(".", ""))
+        month = english_months.index(month_str.strip()) + 1
+        year = int(year_str.strip())
+
+        hour_str, minute_str = time_part.split(":")
+        hour = int(hour_str)
+        minute = int(minute_str)
+
+        return datetime(year, month, day, hour, minute)
+    except Exception:
+        # TODO: Properly log this
+        print(f"Could not parse date {date_str!r}")
+        return None
+
+
+def _format_date_english(date_to_format: date) -> str:
+    month = english_months[date_to_format.month - 1]
+    return f"{date_to_format.day:02d}. {month} {date_to_format.year:04d}"
+
+
+def _yesterday() -> date:
+    return date.today() - timedelta(days=1)
+
+
+def _tomorrow() -> date:
+    return date.today() + timedelta(days=1)
+
+
+def _sanitize_path_name(name: str) -> str:
+    return name.replace("/", "-").replace("\\", "-").strip()
diff --git a/PFERD/crawlers/ilias.py b/PFERD/crawlers/ilias/kit_web_ilias_crawler.py
similarity index 51%
rename from PFERD/crawlers/ilias.py
rename to PFERD/crawlers/ilias/kit_web_ilias_crawler.py
index be3584c..be613e6 100644
--- a/PFERD/crawlers/ilias.py
+++ b/PFERD/crawlers/ilias/kit_web_ilias_crawler.py
@@ -1,23 +1,19 @@
 import asyncio
-import json
 import re
-from dataclasses import dataclass
-from datetime import date, datetime, timedelta
-from enum import Enum
 from pathlib import PurePath
 # TODO In Python 3.9 and above, AsyncContextManager is deprecated
-from typing import Any, Dict, List, Optional, Set, Union
-from urllib.parse import parse_qs, urlencode, urljoin, urlparse, urlsplit, urlunsplit
+from typing import Any, Dict, Optional, Set, Union
 
 import aiohttp
 from bs4 import BeautifulSoup, Tag
 
+from PFERD.authenticators import Authenticator
+from PFERD.config import Config
+from PFERD.crawler import CrawlerSection, HttpCrawler, anoncritical, arepeat
 from PFERD.output_dir import Redownload
-from PFERD.utils import soupify
+from PFERD.utils import soupify, url_set_query_param
 
-from ..authenticators import Authenticator
-from ..config import Config
-from ..crawler import CrawlerSection, HttpCrawler, anoncritical, arepeat
+from .kit_ilias_html import IliasElementType, IliasPage, IliasPageElement
 
 TargetType = Union[str, int]
 
@@ -58,465 +54,6 @@ class KitIliasCrawlerSection(CrawlerSection):
         return self.s.getboolean("link_file_plain_text", fallback=False)
 
 
-class IliasElementType(Enum):
-    EXERCISE = "exercise"
-    FILE = "file"
-    FOLDER = "folder"
-    FORUM = "forum"
-    LINK = "link"
-    MEETING = "meeting"
-    VIDEO = "video"
-    VIDEO_PLAYER = "video_player"
-    VIDEO_FOLDER = "video_folder"
-    VIDEO_FOLDER_MAYBE_PAGINATED = "video_folder_maybe_paginated"
-
-
-@dataclass
-class IliasPageElement:
-    type: IliasElementType
-    url: str
-    name: str
-    mtime: Optional[datetime] = None
-    description: Optional[str] = None
-
-
-class IliasPage:
-
-    def __init__(self, soup: BeautifulSoup, _page_url: str, source_element: Optional[IliasPageElement]):
-        self._soup = soup
-        self._page_url = _page_url
-        self._page_type = source_element.type if source_element else None
-        self._source_name = source_element.name if source_element else ""
-
-    def get_child_elements(self) -> List[IliasPageElement]:
-        """
-        Return all child page elements you can find here.
-        """
-        if self._is_video_player():
-            return self._player_to_video()
-        if self._is_video_listing():
-            return self._find_video_entries()
-        if self._is_exercise_file():
-            return self._find_exercise_entries()
-        return self._find_normal_entries()
-
-    def _is_video_player(self) -> bool:
-        return "paella_config_file" in str(self._soup)
-
-    def _is_video_listing(self) -> bool:
-        # ILIAS fluff around it
-        if self._soup.find(id="headerimage"):
-            element: Tag = self._soup.find(id="headerimage")
-            if "opencast" in element.attrs["src"].lower():
-                return True
-
-        # Raw listing without ILIAS fluff
-        video_element_table: Tag = self._soup.find(
-            name="table", id=re.compile(r"tbl_xoct_.+")
-        )
-        return video_element_table is not None
-
-    def _is_exercise_file(self) -> bool:
-        # we know it from before
-        if self._page_type == IliasElementType.EXERCISE:
-            return True
-
-        # We have no suitable parent - let's guesss
-        if self._soup.find(id="headerimage"):
-            element: Tag = self._soup.find(id="headerimage")
-            if "exc" in element.attrs["src"].lower():
-                return True
-
-        return False
-
-    def _player_to_video(self) -> List[IliasPageElement]:
-        # Fetch the actual video page. This is a small wrapper page initializing a javscript
-        # player. Sadly we can not execute that JS. The actual video stream url is nowhere
-        # on the page, but defined in a JS object inside a script tag, passed to the player
-        # library.
-        # We do the impossible and RegEx the stream JSON object out of the page's HTML source
-        regex: re.Pattern[str] = re.compile(
-            r"({\"streams\"[\s\S]+?),\s*{\"paella_config_file", re.IGNORECASE
-        )
-        json_match = regex.search(str(self._soup))
-
-        if json_match is None:
-            print(f"Could not find json stream info for {self._page_url!r}")
-            return []
-        json_str = json_match.group(1)
-
-        # parse it
-        json_object = json.loads(json_str)
-        # and fetch the video url!
-        video_url = json_object["streams"][0]["sources"]["mp4"][0]["src"]
-        return [IliasPageElement(IliasElementType.VIDEO, video_url, self._source_name)]
-
-    def _find_video_entries(self) -> List[IliasPageElement]:
-        # ILIAS has three stages for video pages
-        # 1. The initial dummy page without any videos. This page contains the link to the listing
-        # 2. The video listing which might be paginated
-        # 3. An unpaginated video listing (or at least one that includes 800 videos)
-        #
-        # We need to figure out where we are.
-
-        video_element_table: Tag = self._soup.find(
-            name="table", id=re.compile(r"tbl_xoct_.+")
-        )
-
-        if video_element_table is None:
-            # We are in stage 1
-            # The page is actually emtpy but contains the link to stage 2
-            content_link: Tag = self._soup.select_one("#tab_series a")
-            url: str = self._abs_url_from_link(content_link)
-            query_params = {"limit": "800", "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
-            url = _url_set_query_params(url, query_params)
-            return [IliasPageElement(IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED, url, "")]
-
-        is_paginated = self._soup.find(id=re.compile(r"tab_page_sel.+")) is not None
-
-        if is_paginated and not self._page_type == IliasElementType.VIDEO_FOLDER:
-            # We are in stage 2 - try to break pagination
-            return self._find_video_entries_paginated()
-
-        return self._find_video_entries_no_paging()
-
-    def _find_video_entries_paginated(self) -> List[IliasPageElement]:
-        table_element: Tag = self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
-
-        if table_element is None:
-            # TODO: Properly log this
-            print(
-                "Could not increase elements per page (table not found)."
-                " Some might not be crawled!"
-            )
-            return self._find_video_entries_no_paging()
-
-        id_match = re.match(r"tbl_xoct_(.+)", table_element.attrs["id"])
-        if id_match is None:
-            # TODO: Properly log this
-            print(
-                "Could not increase elements per page (table id not found)."
-                " Some might not be crawled!"
-            )
-            return self._find_video_entries_no_paging()
-
-        table_id = id_match.group(1)
-
-        query_params = {f"tbl_xoct_{table_id}_trows": "800",
-                        "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
-        url = _url_set_query_params(self._page_url, query_params)
-        return [IliasPageElement(IliasElementType.VIDEO_FOLDER, url, "")]
-
-    def _find_video_entries_no_paging(self) -> List[IliasPageElement]:
-        """
-        Crawls the "second stage" video page. This page contains the actual video urls.
-        """
-        # Video start links are marked with an "Abspielen" link
-        video_links: List[Tag] = self._soup.findAll(
-            name="a", text=re.compile(r"\s*Abspielen\s*")
-        )
-
-        results: List[IliasPageElement] = []
-
-        # TODO: Sadly the download button is currently broken, so never do that
-        for link in video_links:
-            results.append(self._listed_video_to_element(link))
-
-        return results
-
-    def _listed_video_to_element(self, link: Tag) -> IliasPageElement:
-        # The link is part of a table with multiple columns, describing metadata.
-        # 6th child (1 indexed) is the modification time string
-        modification_string = link.parent.parent.parent.select_one(
-            "td.std:nth-child(6)"
-        ).getText().strip()
-        modification_time = datetime.strptime(modification_string, "%d.%m.%Y - %H:%M")
-
-        title = link.parent.parent.parent.select_one("td.std:nth-child(3)").getText().strip()
-        title += ".mp4"
-
-        video_name: str = _sanitize_path_name(title)
-
-        video_url = self._abs_url_from_link(link)
-
-        return IliasPageElement(IliasElementType.VIDEO_PLAYER, video_url, video_name, modification_time)
-
-    def _find_exercise_entries(self) -> List[IliasPageElement]:
-        results: List[IliasPageElement] = []
-
-        # Each assignment is in an accordion container
-        assignment_containers: List[Tag] = self._soup.select(".il_VAccordionInnerContainer")
-
-        for container in assignment_containers:
-            # Fetch the container name out of the header to use it in the path
-            container_name = container.select_one(".ilAssignmentHeader").getText().strip()
-            # Find all download links in the container (this will contain all the files)
-            files: List[Tag] = container.findAll(
-                name="a",
-                # download links contain the given command class
-                attrs={"href": lambda x: x and "cmdClass=ilexsubmissiongui" in x},
-                text="Download"
-            )
-
-            # Grab each file as you now have the link
-            for file_link in files:
-                # Two divs, side by side. Left is the name, right is the link ==> get left
-                # sibling
-                file_name = file_link.parent.findPrevious(name="div").getText().strip()
-                file_name = _sanitize_path_name(file_name)
-                url = self._abs_url_from_link(file_link)
-
-                results.append(IliasPageElement(
-                    IliasElementType.FILE,
-                    url,
-                    container_name + "/" + file_name,
-                    None  # We do not have any timestamp
-                ))
-
-        return results
-
-    def _find_normal_entries(self) -> List[IliasPageElement]:
-        result: List[IliasPageElement] = []
-
-        # Fetch all links and throw them to the general interpreter
-        links: List[Tag] = self._soup.select("a.il_ContainerItemTitle")
-
-        for link in links:
-            abs_url = self._abs_url_from_link(link)
-            element_name = _sanitize_path_name(link.getText())
-            element_type = self._find_type_from_link(element_name, link, abs_url)
-            description = self._find_link_description(link)
-
-            if not element_type:
-                continue
-            if element_type == IliasElementType.MEETING:
-                element_name = _sanitize_path_name(self._normalize_meeting_name(element_name))
-            elif element_type == IliasElementType.FILE:
-                result.append(self._file_to_element(element_name, abs_url, link))
-                continue
-
-            result.append(IliasPageElement(element_type, abs_url, element_name, description=description))
-
-        return result
-
-    def _find_link_description(self, link: Tag) -> Optional[str]:
-        tile: Tag = link.findParent("div", {"class": lambda x: x and "il_ContainerListItem" in x})
-        if not tile:
-            return None
-        description_element: Tag = tile.find("div", {"class": lambda x: x and "il_Description" in x})
-        if not description_element:
-            return None
-        return description_element.getText().strip()
-
-    def _file_to_element(self, name: str, url: str, link_element: Tag) -> IliasPageElement:
-        # Files have a list of properties (type, modification date, size, etc.)
-        # In a series of divs.
-        # Find the parent containing all those divs, so we can filter our what we need
-        properties_parent: Tag = link_element.findParent(
-            "div", {"class": lambda x: "il_ContainerListItem" in x}
-        ).select_one(".il_ItemProperties")
-        # The first one is always the filetype
-        file_type = properties_parent.select_one("span.il_ItemProperty").getText().strip()
-
-        # The rest does not have a stable order. Grab the whole text and reg-ex the date
-        # out of it
-        all_properties_text = properties_parent.getText().strip()
-        modification_date_match = re.search(
-            r"(((\d+\. \w+ \d+)|(Gestern|Yesterday)|(Heute|Today)|(Morgen|Tomorrow)), \d+:\d+)",
-            all_properties_text
-        )
-        if modification_date_match is None:
-            modification_date = None
-            # TODO: Properly log this
-            print(f"Could not extract start date from {all_properties_text!r}")
-        else:
-            modification_date_str = modification_date_match.group(1)
-            modification_date = demangle_date(modification_date_str)
-
-        # Grab the name from the link text
-        name = _sanitize_path_name(link_element.getText())
-        full_path = name + "." + file_type
-
-        return IliasPageElement(IliasElementType.FILE, url, full_path, modification_date)
-
-    @staticmethod
-    def _find_type_from_link(
-            element_name: str,
-            link_element: Tag,
-            url: str
-    ) -> Optional[IliasElementType]:
-        """
-        Decides which sub crawler to use for a given top level element.
-        """
-        parsed_url = urlparse(url)
-
-        # file URLs contain "target=file"
-        if "target=file_" in parsed_url.query:
-            return IliasElementType.FILE
-
-        # Skip forums
-        if "cmd=showThreads" in parsed_url.query:
-            return IliasElementType.FORUM
-
-        # Everything with a ref_id can *probably* be opened to reveal nested things
-        # video groups, directories, exercises, etc
-        if "ref_id=" in parsed_url.query:
-            return IliasPage._find_type_from_folder_like(link_element, url)
-
-        # TODO: Log this properly
-        print(f"Unknown type: The element was at {str(element_name)!r} and it is {link_element!r})")
-        return None
-
-    @staticmethod
-    def _find_type_from_folder_like(link_element: Tag, url: str) -> Optional[IliasElementType]:
-        """
-        Try crawling something that looks like a folder.
-        """
-        # pylint: disable=too-many-return-statements
-
-        found_parent: Optional[Tag] = None
-
-        # We look for the outer div of our inner link, to find information around it
-        # (mostly the icon)
-        for parent in link_element.parents:
-            if "ilContainerListItemOuter" in parent["class"]:
-                found_parent = parent
-                break
-
-        if found_parent is None:
-            # TODO: Log this properly
-            print(f"Could not find element icon for {url!r}")
-            return None
-
-        # Find the small descriptive icon to figure out the type
-        img_tag: Optional[Tag] = found_parent.select_one("img.ilListItemIcon")
-
-        if img_tag is None:
-            # TODO: Log this properly
-            print(f"Could not find image tag for {url!r}")
-            return None
-
-        if "opencast" in str(img_tag["alt"]).lower():
-            return IliasElementType.VIDEO_FOLDER
-
-        if str(img_tag["src"]).endswith("icon_exc.svg"):
-            return IliasElementType.EXERCISE
-
-        if str(img_tag["src"]).endswith("icon_webr.svg"):
-            return IliasElementType.LINK
-
-        if str(img_tag["src"]).endswith("frm.svg"):
-            return IliasElementType.FORUM
-
-        if str(img_tag["src"]).endswith("sess.svg"):
-            return IliasElementType.MEETING
-
-        return IliasElementType.FOLDER
-
-    @staticmethod
-    def _normalize_meeting_name(meeting_name: str) -> str:
-        """
-        Normalizes meeting names, which have a relative time as their first part,
-        to their date in ISO format.
-        """
-        date_portion_str = meeting_name.split(" - ")[0]
-        date_portion = demangle_date(date_portion_str)
-
-        if not date_portion:
-            return meeting_name
-
-        rest_of_name = meeting_name
-        if rest_of_name.startswith(date_portion_str):
-            rest_of_name = rest_of_name[len(date_portion_str):]
-
-        return datetime.strftime(date_portion, "%Y-%m-%d, %H:%M") + rest_of_name
-
-    def _abs_url_from_link(self, link_tag: Tag) -> str:
-        """
-        Create an absolute url from an <a> tag.
-        """
-        return urljoin(self._page_url, link_tag.get("href"))
-
-
-german_months = ['Jan', 'Feb', 'Mär', 'Apr', 'Mai', 'Jun', 'Jul', 'Aug', 'Sep', 'Okt', 'Nov', 'Dez']
-english_months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
-
-
-def demangle_date(date_str: str) -> Optional[datetime]:
-    """
-    Demangle a given date in one of the following formats:
-    "Gestern, HH:MM"
-    "Heute, HH:MM"
-    "Morgen, HH:MM"
-    "dd. mon yyyy, HH:MM
-    """
-    try:
-        date_str = re.sub(r"\s+", " ", date_str)
-        date_str = re.sub("Gestern|Yesterday", _format_date_english(_yesterday()), date_str, re.I)
-        date_str = re.sub("Heute|Today", _format_date_english(date.today()), date_str, re.I)
-        date_str = re.sub("Morgen|Tomorrow",  _format_date_english(_tomorrow()), date_str, re.I)
-        for german, english in zip(german_months, english_months):
-            date_str = date_str.replace(german, english)
-            # Remove trailing dots for abbreviations, e.g. "20. Apr. 2020" -> "20. Apr 2020"
-            date_str = date_str.replace(english + ".", english)
-
-        # We now have a nice english String in the format: "dd. mmm yyyy, hh:mm"
-        day_part, time_part = date_str.split(",")
-        day_str, month_str, year_str = day_part.split(" ")
-
-        day = int(day_str.strip().replace(".", ""))
-        month = english_months.index(month_str.strip()) + 1
-        year = int(year_str.strip())
-
-        hour_str, minute_str = time_part.split(":")
-        hour = int(hour_str)
-        minute = int(minute_str)
-
-        return datetime(year, month, day, hour, minute)
-    except Exception:
-        # TODO: Properly log this
-        print(f"Could not parse date {date_str!r}")
-        return None
-
-
-def _format_date_english(date_to_format: date) -> str:
-    month = english_months[date_to_format.month - 1]
-    return f"{date_to_format.day:02d}. {month} {date_to_format.year:04d}"
-
-
-def _yesterday() -> date:
-    return date.today() - timedelta(days=1)
-
-
-def _tomorrow() -> date:
-    return date.today() + timedelta(days=1)
-
-
-def _sanitize_path_name(name: str) -> str:
-    return name.replace("/", "-").replace("\\", "-").strip()
-
-
-def _url_set_query_param(url: str, param: str, value: str) -> str:
-    """
-    Set a query parameter in an url, overwriting existing ones with the same name.
-    """
-    scheme, netloc, path, query, fragment = urlsplit(url)
-    query_parameters = parse_qs(query)
-    query_parameters[param] = [value]
-    new_query_string = urlencode(query_parameters, doseq=True)
-
-    return urlunsplit((scheme, netloc, path, new_query_string, fragment))
-
-
-def _url_set_query_params(url: str, params: Dict[str, str]) -> str:
-    result = url
-
-    for key, val in params.items():
-        result = _url_set_query_param(result, key, val)
-
-    return result
-
-
 _DIRECTORY_PAGES: Set[IliasElementType] = set([
     IliasElementType.EXERCISE,
     IliasElementType.FOLDER,
@@ -559,7 +96,7 @@ class KitIliasCrawler(HttpCrawler):
 
     async def _crawl_course(self, course_id: int) -> None:
         # Start crawling at the given course
-        root_url = _url_set_query_param(
+        root_url = url_set_query_param(
             self._base_url + "/goto.php", "target", f"crs_{course_id}"
         )
 
diff --git a/PFERD/utils.py b/PFERD/utils.py
index 0b3d40d..56d6f53 100644
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -6,7 +6,8 @@ import sys
 from abc import ABC, abstractmethod
 from contextlib import AsyncExitStack
 from types import TracebackType
-from typing import Any, Callable, Generic, Optional, Type, TypeVar
+from typing import Any, Callable, Dict, Generic, Optional, Type, TypeVar
+from urllib.parse import parse_qs, urlencode, urlsplit, urlunsplit
 
 import bs4
 
@@ -38,6 +39,30 @@ def soupify(data: bytes) -> bs4.BeautifulSoup:
     return bs4.BeautifulSoup(data, "html.parser")
 
 
+def url_set_query_param(url: str, param: str, value: str) -> str:
+    """
+    Set a query parameter in an url, overwriting existing ones with the same name.
+    """
+    scheme, netloc, path, query, fragment = urlsplit(url)
+    query_parameters = parse_qs(query)
+    query_parameters[param] = [value]
+    new_query_string = urlencode(query_parameters, doseq=True)
+
+    return urlunsplit((scheme, netloc, path, new_query_string, fragment))
+
+
+def url_set_query_params(url: str, params: Dict[str, str]) -> str:
+    """
+    Sets multiple query parameters in an url, overwriting existing ones.
+    """
+    result = url
+
+    for key, val in params.items():
+        result = url_set_query_param(result, key, val)
+
+    return result
+
+
 async def prompt_yes_no(query: str, default: Optional[bool]) -> bool:
     """
     Asks the user a yes/no question and returns their choice.

From 2976b4d352ac86f718d95c8a193a8bc198615b6b Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 19 May 2021 21:37:10 +0200
Subject: [PATCH 154/524] Move ILIAS file templates to own file

---
 PFERD/crawlers/ilias/file_templates.py        | 87 ++++++++++++++++++
 PFERD/crawlers/ilias/kit_web_ilias_crawler.py | 91 +------------------
 2 files changed, 89 insertions(+), 89 deletions(-)
 create mode 100644 PFERD/crawlers/ilias/file_templates.py

diff --git a/PFERD/crawlers/ilias/file_templates.py b/PFERD/crawlers/ilias/file_templates.py
new file mode 100644
index 0000000..e9e332e
--- /dev/null
+++ b/PFERD/crawlers/ilias/file_templates.py
@@ -0,0 +1,87 @@
+link_template_plain = "{{link}}"
+link_template_rich = """
+<!DOCTYPE html>
+<html lang="en">
+    <head>
+        <meta charset="UTF-8">
+        <title>ILIAS - Link: {{name}}</title>
+        <meta http-equiv = "refresh" content = "{{redirect_delay}}; url = {{link}}" />
+    </head>
+
+    <style>
+    * {
+        box-sizing: border-box;
+    }
+    .center-flex {
+        display: flex;
+        align-items: center;
+        justify-content: center;
+    }
+    body {
+        padding: 0;
+        margin: 0;
+        background-color: #f0f0f0;
+        font-family: "Open Sans", Verdana, Arial, Helvetica, sans-serif;
+        height: 100vh;
+    }
+    .row {
+        background-color: white;
+        min-width: 500px;
+        max-width: 90vw;
+        display: flex;
+        padding: 1em;
+    }
+    .logo {
+        flex: 0 1;
+        margin-right: 1em;
+        fill: #009682;
+    }
+    .tile {
+        flex: 1 0;
+        display: flex;
+        flex-direction: column;
+        justify-content: center;
+    }
+    .top-row {
+        padding-bottom: 5px;
+        font-size: 15px;
+    }
+    a {
+        color: #009682;
+        text-decoration: none;
+    }
+    a:hover {
+        text-decoration: underline;
+    }
+    .bottom-row {
+        font-size: 13px;
+    }
+    .menu-button {
+        border: 1px solid black;
+        margin-left: 4em;
+        width: 25px;
+        height: 25px;
+        flex: 0 0 25px;
+        background-color: #b3e0da;
+        font-size: 13px;
+        color: #222;
+    }
+    </style>
+    <body class="center-flex">
+        <div class="row">
+            <div class="logo center-flex">
+                <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24">
+                    <path d="M12 0c-6.627 0-12 5.373-12 12s5.373 12 12 12 12-5.373 12-12-5.373-12-12-12zm9.567 9.098c-.059-.058-.127-.108-.206-.138-.258-.101-1.35.603-1.515.256-.108-.231-.327.148-.578.008-.121-.067-.459-.52-.611-.465-.312.112.479.974.694 1.087.203-.154.86-.469 1.002-.039.271.812-.745 1.702-1.264 2.171-.775.702-.63-.454-1.159-.86-.277-.213-.274-.667-.555-.824-.125-.071-.7-.732-.694-.821l-.017.167c-.095.072-.297-.27-.319-.325 0 .298.485.772.646 1.011.273.409.42 1.005.756 1.339.179.18.866.923 1.045.908l.921-.437c.649.154-1.531 3.237-1.738 3.619-.171.321.139 1.112.114 1.49-.029.437-.374.579-.7.817-.35.255-.268.752-.562.934-.521.321-.897 1.366-1.639 1.361-.219-.001-1.151.364-1.273.007-.095-.258-.223-.455-.356-.71-.131-.25-.015-.51-.175-.731-.11-.154-.479-.502-.513-.684-.002-.157.118-.632.283-.715.231-.118.044-.462.016-.663-.048-.357-.27-.652-.535-.859-.393-.302-.189-.542-.098-.974 0-.206-.126-.476-.402-.396-.57.166-.396-.445-.812-.417-.299.021-.543.211-.821.295-.349.104-.707-.083-1.053-.126-1.421-.179-1.885-1.804-1.514-2.976.037-.192-.115-.547-.048-.696.159-.352.485-.752.768-1.021.16-.152.365-.113.553-.231.29-.182.294-.558.578-.789.404-.328.956-.321 1.482-.392.281-.037 1.35-.268 1.518-.06 0 .039.193.611-.019.578.438.023 1.061.756 1.476.585.213-.089.135-.744.573-.427.265.19 1.45.275 1.696.07.152-.125.236-.939.053-1.031.117.116-.618.125-.686.099-.122-.044-.235.115-.43.025.117.055-.651-.358-.22-.674-.181.132-.349-.037-.544.109-.135.109.062.181-.13.277-.305.155-.535-.53-.649-.607-.118-.077-1.024-.713-.777-.298l.797.793c-.04.026-.209-.289-.209-.059.053-.136.02.585-.105.35-.056-.09.091-.14.006-.271 0-.085-.23-.169-.275-.228-.126-.157-.462-.502-.644-.585-.05-.024-.771.088-.832.111-.071.099-.131.203-.181.314-.149.055-.29.127-.423.216l-.159.356c-.068.061-.772.294-.776.303.03-.076-.492-.172-.457-.324.038-.167.215-.687.169-.877-.048-.199 1.085.287 1.158-.238.029-.227.047-.492-.316-.531.069.008.702-.249.807-.364.148-.169.486-.447.731-.447.286 0 .225-.417.356-.622.133.053-.071.38.088.512-.01-.104.45.057.494.033.105-.056.691-.023.601-.299-.101-.28.052-.197.183-.255-.02.008.248-.458.363-.456-.104-.089-.398.112-.516.103-.308-.024-.177-.525-.061-.672.09-.116-.246-.258-.25-.036-.006.332-.314.633-.243 1.075.109.666-.743-.161-.816-.115-.283.172-.515-.216-.368-.449.149-.238.51-.226.659-.48.104-.179.227-.389.388-.524.541-.454.689-.091 1.229-.042.526.048.178.125.105.327-.07.192.289.261.413.1.071-.092.232-.326.301-.499.07-.175.578-.2.527-.365 2.72 1.148 4.827 3.465 5.694 6.318zm-11.113-3.779l.068-.087.073-.019c.042-.034.086-.118.151-.104.043.009.146.095.111.148-.037.054-.066-.049-.081.101-.018.169-.188.167-.313.222-.087.037-.175-.018-.09-.104l.088-.108-.007-.049zm.442.245c.046-.045.138-.008.151-.094.014-.084.078-.178-.008-.335-.022-.042.116-.082.051-.137l-.109.032s.155-.668.364-.366l-.089.103c.135.134.172.47.215.687.127.066.324.078.098.192.117-.02-.618.314-.715.178-.072-.083.317-.139.307-.173-.004-.011-.317-.02-.265-.087zm1.43-3.547l-.356.326c-.36.298-1.28.883-1.793.705-.524-.18-1.647.667-1.826.673-.067.003.002-.641.36-.689-.141.021.993-.575 1.185-.805.678-.146 1.381-.227 2.104-.227l.326.017zm-5.086 1.19c.07.082.278.092-.026.288-.183.11-.377.809-.548.809-.51.223-.542-.439-1.109.413-.078.115-.395.158-.644.236.685-.688 1.468-1.279 2.327-1.746zm-5.24 8.793c0-.541.055-1.068.139-1.586l.292.185c.113.135.113.719.169.911.139.482.484.751.748 1.19.155.261.414.923.332 1.197.109-.179 1.081.824 1.259 1.033.418.492.74 1.088.061 1.574-.219.158.334 1.14.049 1.382l-.365.094c-.225.138-.235.397-.166.631-1.562-1.765-2.518-4.076-2.518-6.611zm14.347-5.823c.083-.01-.107.167-.107.167.033.256.222.396.581.527.437.157.038.455-.213.385-.139-.039-.854-.255-.879.025 0 .167-.679.001-.573-.175.073-.119.05-.387.186-.562.193-.255.38-.116.386.032-.001.394.398-.373.619-.399z"/>
+                </svg>
+            </div>
+            <div class="tile">
+                <div class="top-row">
+                    <a href="{{link}}">{{name}}</a>
+                </div>
+                <div class="bottom-row">{{description}}</div>
+            </div>
+            <div class="menu-button center-flex"> ⯆ </div>
+        </div>
+    </body>
+</html>
+"""  # noqa: E501 line too long
diff --git a/PFERD/crawlers/ilias/kit_web_ilias_crawler.py b/PFERD/crawlers/ilias/kit_web_ilias_crawler.py
index be613e6..46eb662 100644
--- a/PFERD/crawlers/ilias/kit_web_ilias_crawler.py
+++ b/PFERD/crawlers/ilias/kit_web_ilias_crawler.py
@@ -13,6 +13,7 @@ from PFERD.crawler import CrawlerSection, HttpCrawler, anoncritical, arepeat
 from PFERD.output_dir import Redownload
 from PFERD.utils import soupify, url_set_query_param
 
+from .file_templates import link_template_plain, link_template_rich
 from .kit_ilias_html import IliasElementType, IliasPage, IliasPageElement
 
 TargetType = Union[str, int]
@@ -176,7 +177,7 @@ class KitIliasCrawler(HttpCrawler):
                 real_url: str = html_page.select_one("a").get("href").strip()
 
             async with dl as sink:
-                content = _link_template_plain if self._link_file_use_plaintext else _link_template_rich
+                content = link_template_plain if self._link_file_use_plaintext else link_template_rich
                 content = content.replace("{{link}}", real_url)
                 content = content.replace("{{name}}", element.name)
                 content = content.replace("{{description}}", str(element.description))
@@ -366,91 +367,3 @@ class KitShibbolethLogin:
 async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup:
     async with session.post(url, data=data) as response:
         return soupify(await response.read())
-
-_link_template_plain = "{{link}}"
-_link_template_rich = """
-<!DOCTYPE html>
-<html lang="en">
-    <head>
-        <meta charset="UTF-8">
-        <title>ILIAS - Link: {{name}}</title>
-        <meta http-equiv = "refresh" content = "{{redirect_delay}}; url = {{link}}" />
-    </head>
-
-    <style>
-    * {
-        box-sizing: border-box;
-    }
-    .center-flex {
-        display: flex;
-        align-items: center;
-        justify-content: center;
-    }
-    body {
-        padding: 0;
-        margin: 0;
-        background-color: #f0f0f0;
-        font-family: "Open Sans", Verdana, Arial, Helvetica, sans-serif;
-        height: 100vh;
-    }
-    .row {
-        background-color: white;
-        min-width: 500px;
-        max-width: 90vw;
-        display: flex;
-        padding: 1em;
-    }
-    .logo {
-        flex: 0 1;
-        margin-right: 1em;
-        fill: #009682;
-    }
-    .tile {
-        flex: 1 0;
-        display: flex;
-        flex-direction: column;
-        justify-content: center;
-    }
-    .top-row {
-        padding-bottom: 5px;
-        font-size: 15px;
-    }
-    a {
-        color: #009682;
-        text-decoration: none;
-    }
-    a:hover {
-        text-decoration: underline;
-    }
-    .bottom-row {
-        font-size: 13px;
-    }
-    .menu-button {
-        border: 1px solid black;
-        margin-left: 4em;
-        width: 25px;
-        height: 25px;
-        flex: 0 0 25px;
-        background-color: #b3e0da;
-        font-size: 13px;
-        color: #222;
-    }
-    </style>
-    <body class="center-flex">
-        <div class="row">
-            <div class="logo center-flex">
-                <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24">
-                    <path d="M12 0c-6.627 0-12 5.373-12 12s5.373 12 12 12 12-5.373 12-12-5.373-12-12-12zm9.567 9.098c-.059-.058-.127-.108-.206-.138-.258-.101-1.35.603-1.515.256-.108-.231-.327.148-.578.008-.121-.067-.459-.52-.611-.465-.312.112.479.974.694 1.087.203-.154.86-.469 1.002-.039.271.812-.745 1.702-1.264 2.171-.775.702-.63-.454-1.159-.86-.277-.213-.274-.667-.555-.824-.125-.071-.7-.732-.694-.821l-.017.167c-.095.072-.297-.27-.319-.325 0 .298.485.772.646 1.011.273.409.42 1.005.756 1.339.179.18.866.923 1.045.908l.921-.437c.649.154-1.531 3.237-1.738 3.619-.171.321.139 1.112.114 1.49-.029.437-.374.579-.7.817-.35.255-.268.752-.562.934-.521.321-.897 1.366-1.639 1.361-.219-.001-1.151.364-1.273.007-.095-.258-.223-.455-.356-.71-.131-.25-.015-.51-.175-.731-.11-.154-.479-.502-.513-.684-.002-.157.118-.632.283-.715.231-.118.044-.462.016-.663-.048-.357-.27-.652-.535-.859-.393-.302-.189-.542-.098-.974 0-.206-.126-.476-.402-.396-.57.166-.396-.445-.812-.417-.299.021-.543.211-.821.295-.349.104-.707-.083-1.053-.126-1.421-.179-1.885-1.804-1.514-2.976.037-.192-.115-.547-.048-.696.159-.352.485-.752.768-1.021.16-.152.365-.113.553-.231.29-.182.294-.558.578-.789.404-.328.956-.321 1.482-.392.281-.037 1.35-.268 1.518-.06 0 .039.193.611-.019.578.438.023 1.061.756 1.476.585.213-.089.135-.744.573-.427.265.19 1.45.275 1.696.07.152-.125.236-.939.053-1.031.117.116-.618.125-.686.099-.122-.044-.235.115-.43.025.117.055-.651-.358-.22-.674-.181.132-.349-.037-.544.109-.135.109.062.181-.13.277-.305.155-.535-.53-.649-.607-.118-.077-1.024-.713-.777-.298l.797.793c-.04.026-.209-.289-.209-.059.053-.136.02.585-.105.35-.056-.09.091-.14.006-.271 0-.085-.23-.169-.275-.228-.126-.157-.462-.502-.644-.585-.05-.024-.771.088-.832.111-.071.099-.131.203-.181.314-.149.055-.29.127-.423.216l-.159.356c-.068.061-.772.294-.776.303.03-.076-.492-.172-.457-.324.038-.167.215-.687.169-.877-.048-.199 1.085.287 1.158-.238.029-.227.047-.492-.316-.531.069.008.702-.249.807-.364.148-.169.486-.447.731-.447.286 0 .225-.417.356-.622.133.053-.071.38.088.512-.01-.104.45.057.494.033.105-.056.691-.023.601-.299-.101-.28.052-.197.183-.255-.02.008.248-.458.363-.456-.104-.089-.398.112-.516.103-.308-.024-.177-.525-.061-.672.09-.116-.246-.258-.25-.036-.006.332-.314.633-.243 1.075.109.666-.743-.161-.816-.115-.283.172-.515-.216-.368-.449.149-.238.51-.226.659-.48.104-.179.227-.389.388-.524.541-.454.689-.091 1.229-.042.526.048.178.125.105.327-.07.192.289.261.413.1.071-.092.232-.326.301-.499.07-.175.578-.2.527-.365 2.72 1.148 4.827 3.465 5.694 6.318zm-11.113-3.779l.068-.087.073-.019c.042-.034.086-.118.151-.104.043.009.146.095.111.148-.037.054-.066-.049-.081.101-.018.169-.188.167-.313.222-.087.037-.175-.018-.09-.104l.088-.108-.007-.049zm.442.245c.046-.045.138-.008.151-.094.014-.084.078-.178-.008-.335-.022-.042.116-.082.051-.137l-.109.032s.155-.668.364-.366l-.089.103c.135.134.172.47.215.687.127.066.324.078.098.192.117-.02-.618.314-.715.178-.072-.083.317-.139.307-.173-.004-.011-.317-.02-.265-.087zm1.43-3.547l-.356.326c-.36.298-1.28.883-1.793.705-.524-.18-1.647.667-1.826.673-.067.003.002-.641.36-.689-.141.021.993-.575 1.185-.805.678-.146 1.381-.227 2.104-.227l.326.017zm-5.086 1.19c.07.082.278.092-.026.288-.183.11-.377.809-.548.809-.51.223-.542-.439-1.109.413-.078.115-.395.158-.644.236.685-.688 1.468-1.279 2.327-1.746zm-5.24 8.793c0-.541.055-1.068.139-1.586l.292.185c.113.135.113.719.169.911.139.482.484.751.748 1.19.155.261.414.923.332 1.197.109-.179 1.081.824 1.259 1.033.418.492.74 1.088.061 1.574-.219.158.334 1.14.049 1.382l-.365.094c-.225.138-.235.397-.166.631-1.562-1.765-2.518-4.076-2.518-6.611zm14.347-5.823c.083-.01-.107.167-.107.167.033.256.222.396.581.527.437.157.038.455-.213.385-.139-.039-.854-.255-.879.025 0 .167-.679.001-.573-.175.073-.119.05-.387.186-.562.193-.255.38-.116.386.032-.001.394.398-.373.619-.399z"/>
-                </svg>
-            </div>
-            <div class="tile">
-                <div class="top-row">
-                    <a href="{{link}}">{{name}}</a>
-                </div>
-                <div class="bottom-row">{{description}}</div>
-            </div>
-            <div class="menu-button center-flex"> ⯆ </div>
-        </div>
-    </body>
-</html>
-"""  # noqa: E501 line too long

From 81301f3a76f741cb8f6db5aae75e1bb146cead5b Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 19 May 2021 21:41:17 +0200
Subject: [PATCH 155/524] Rename the ilias crawler to ilias web crawler

---
 PFERD/crawlers/__init__.py                                  | 6 +++---
 PFERD/crawlers/ilias/__init__.py                            | 4 ++--
 .../{kit_web_ilias_crawler.py => kit_ilias_web_crawler.py}  | 6 +++---
 3 files changed, 8 insertions(+), 8 deletions(-)
 rename PFERD/crawlers/ilias/{kit_web_ilias_crawler.py => kit_ilias_web_crawler.py} (99%)

diff --git a/PFERD/crawlers/__init__.py b/PFERD/crawlers/__init__.py
index 72d6798..dc7dfa0 100644
--- a/PFERD/crawlers/__init__.py
+++ b/PFERD/crawlers/__init__.py
@@ -4,7 +4,7 @@ from typing import Callable, Dict
 from ..authenticator import Authenticator
 from ..config import Config
 from ..crawler import Crawler
-from .ilias import KitIliasCrawler, KitIliasCrawlerSection
+from .ilias import KitIliasWebCrawler, KitIliasWebCrawlerSection
 from .local import LocalCrawler, LocalCrawlerSection
 
 CrawlerConstructor = Callable[[
@@ -17,6 +17,6 @@ CrawlerConstructor = Callable[[
 CRAWLERS: Dict[str, CrawlerConstructor] = {
     "local": lambda n, s, c, a:
         LocalCrawler(n, LocalCrawlerSection(s), c),
-    "kit-ilias": lambda n, s, c, a:
-        KitIliasCrawler(n, KitIliasCrawlerSection(s), c, a),
+    "kit-ilias-web": lambda n, s, c, a:
+        KitIliasWebCrawler(n, KitIliasWebCrawlerSection(s), c, a),
 }
diff --git a/PFERD/crawlers/ilias/__init__.py b/PFERD/crawlers/ilias/__init__.py
index 15b8d5d..26618a8 100644
--- a/PFERD/crawlers/ilias/__init__.py
+++ b/PFERD/crawlers/ilias/__init__.py
@@ -1,3 +1,3 @@
-from .kit_web_ilias_crawler import KitIliasCrawler, KitIliasCrawlerSection
+from .kit_ilias_web_crawler import KitIliasWebCrawler, KitIliasWebCrawlerSection
 
-__all__ = ["KitIliasCrawler", "KitIliasCrawlerSection"]
+__all__ = ["KitIliasWebCrawler", "KitIliasWebCrawlerSection"]
diff --git a/PFERD/crawlers/ilias/kit_web_ilias_crawler.py b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
similarity index 99%
rename from PFERD/crawlers/ilias/kit_web_ilias_crawler.py
rename to PFERD/crawlers/ilias/kit_ilias_web_crawler.py
index 46eb662..9c7793c 100644
--- a/PFERD/crawlers/ilias/kit_web_ilias_crawler.py
+++ b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
@@ -19,7 +19,7 @@ from .kit_ilias_html import IliasElementType, IliasPage, IliasPageElement
 TargetType = Union[str, int]
 
 
-class KitIliasCrawlerSection(CrawlerSection):
+class KitIliasWebCrawlerSection(CrawlerSection):
 
     def target(self) -> TargetType:
         target = self.s.get("target")
@@ -64,11 +64,11 @@ _DIRECTORY_PAGES: Set[IliasElementType] = set([
 ])
 
 
-class KitIliasCrawler(HttpCrawler):
+class KitIliasWebCrawler(HttpCrawler):
     def __init__(
             self,
             name: str,
-            section: KitIliasCrawlerSection,
+            section: KitIliasWebCrawlerSection,
             config: Config,
             authenticators: Dict[str, Authenticator]
     ):

From 8cfa818f04e97713ffd15f9a39e07728211042d8 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 19 May 2021 21:57:55 +0200
Subject: [PATCH 156/524] Only call should_crawl once

---
 PFERD/crawlers/ilias/kit_ilias_web_crawler.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
index 9c7793c..82ca8d7 100644
--- a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
@@ -130,6 +130,12 @@ class KitIliasWebCrawler(HttpCrawler):
     @arepeat(3)
     @anoncritical
     async def _handle_ilias_page(self, url: str, parent: IliasPageElement, path: PurePath) -> None:
+        # We might not want to crawl this directory-ish page.
+        # This is not in #handle_element, as the download methods check it themselves and therefore
+        # would perform this check twice - messing with the explain output
+        if not self.should_crawl(path):
+            return
+
         tasks = []
         async with self.crawl_bar(path):
             soup = await self._get_page(url)

From e4f9560655b2bd8f56a77f0b126d14b1db61b52c Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 19 May 2021 22:01:09 +0200
Subject: [PATCH 157/524] Only retry on aiohttp errors in ILIAS crawler

This patch removes quite a few retries and now only retries the ilias
element method. Every other HTTP-interacting method (except for the root
requests) is called from there and should be covered.

In the future we also want to retry the root a few times, but that
will be done after the download sink API is adjusted.
---
 PFERD/crawlers/ilias/kit_ilias_web_crawler.py | 37 +++++++++++++------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
index 82ca8d7..a025127 100644
--- a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
@@ -2,14 +2,14 @@ import asyncio
 import re
 from pathlib import PurePath
 # TODO In Python 3.9 and above, AsyncContextManager is deprecated
-from typing import Any, Dict, Optional, Set, Union
+from typing import Any, Awaitable, Callable, Dict, Optional, Set, TypeVar, Union
 
 import aiohttp
 from bs4 import BeautifulSoup, Tag
 
 from PFERD.authenticators import Authenticator
 from PFERD.config import Config
-from PFERD.crawler import CrawlerSection, HttpCrawler, anoncritical, arepeat
+from PFERD.crawler import CrawlerSection, CrawlWarning, HttpCrawler, anoncritical
 from PFERD.output_dir import Redownload
 from PFERD.utils import soupify, url_set_query_param
 
@@ -63,6 +63,29 @@ _DIRECTORY_PAGES: Set[IliasElementType] = set([
     IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED,
 ])
 
+AWrapped = TypeVar("AWrapped", bound=Callable[..., Awaitable[None]])
+
+
+def _iorepeat(attempts: int) -> Callable[[AWrapped], AWrapped]:
+    def decorator(f: AWrapped) -> AWrapped:
+        async def wrapper(self: "HttpCrawler", *args: Any, **kwargs: Any) -> None:
+            for _ in range(attempts - 1):
+                try:
+                    await f(self, *args, **kwargs)
+                    return
+                except aiohttp.ContentTypeError:  # invalid content type
+                    raise CrawlWarning("ILIAS returned an invalid content type")
+                except aiohttp.TooManyRedirects:
+                    raise CrawlWarning("Got stuck in a redirect loop")
+                except aiohttp.ClientPayloadError:  # encoding or not enough bytes
+                    pass
+                except aiohttp.ClientConnectionError:  # e.g. timeout, disconnect, resolve failed, etc.
+                    pass
+
+            await f(self, *args, **kwargs)
+        return wrapper  # type: ignore
+    return decorator
+
 
 class KitIliasWebCrawler(HttpCrawler):
     def __init__(
@@ -106,7 +129,6 @@ class KitIliasWebCrawler(HttpCrawler):
     async def _crawl_desktop(self) -> None:
         await self._crawl_url(self._base_url)
 
-    @arepeat(3)
     async def _crawl_url(self, url: str, expected_id: Optional[int] = None) -> None:
         tasks = []
 
@@ -127,8 +149,6 @@ class KitIliasWebCrawler(HttpCrawler):
 
         await asyncio.gather(*tasks)
 
-    @arepeat(3)
-    @anoncritical
     async def _handle_ilias_page(self, url: str, parent: IliasPageElement, path: PurePath) -> None:
         # We might not want to crawl this directory-ish page.
         # This is not in #handle_element, as the download methods check it themselves and therefore
@@ -147,12 +167,10 @@ class KitIliasWebCrawler(HttpCrawler):
         await asyncio.gather(*tasks)
 
     @anoncritical
+    @_iorepeat(3)
     async def _handle_ilias_element(self, parent_path: PurePath, element: IliasPageElement) -> None:
         element_path = PurePath(parent_path, element.name)
 
-        if not self.should_crawl(element_path):
-            return
-
         if element.type == IliasElementType.FILE:
             await self._download_file(element, element_path)
         elif element.type == IliasElementType.FORUM:
@@ -170,7 +188,6 @@ class KitIliasWebCrawler(HttpCrawler):
             # TODO: Proper exception
             raise RuntimeError(f"Unknown type: {element.type!r}")
 
-    @arepeat(3)
     async def _download_link(self, element: IliasPageElement, element_path: PurePath) -> None:
         dl = await self.download(element_path, mtime=element.mtime)
         if not dl:
@@ -191,7 +208,6 @@ class KitIliasWebCrawler(HttpCrawler):
                 sink.file.write(content.encode("utf-8"))
                 sink.done()
 
-    @arepeat(3)
     async def _download_video(self, element: IliasPageElement, element_path: PurePath) -> None:
         # Videos will NOT be redownloaded - their content doesn't really change and they are chunky
         dl = await self.download(element_path, mtime=element.mtime, redownload=Redownload.NEVER)
@@ -212,7 +228,6 @@ class KitIliasWebCrawler(HttpCrawler):
 
                 sink.done()
 
-    @arepeat(3)
     async def _download_file(self, element: IliasPageElement, element_path: PurePath) -> None:
         dl = await self.download(element_path, mtime=element.mtime)
         if not dl:

From 83d12fcf2d75650033154c77926728798a4bb541 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 20 May 2021 14:58:54 +0200
Subject: [PATCH 158/524] Add some explains to ilias crawler and use crawler
 exceptions

---
 PFERD/crawlers/ilias/kit_ilias_web_crawler.py | 46 ++++++++++++-------
 1 file changed, 30 insertions(+), 16 deletions(-)

diff --git a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
index a025127..88732c0 100644
--- a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
@@ -6,10 +6,12 @@ from typing import Any, Awaitable, Callable, Dict, Optional, Set, TypeVar, Union
 
 import aiohttp
 from bs4 import BeautifulSoup, Tag
+from rich.markup import escape
 
 from PFERD.authenticators import Authenticator
 from PFERD.config import Config
-from PFERD.crawler import CrawlerSection, CrawlWarning, HttpCrawler, anoncritical
+from PFERD.crawler import CrawlError, CrawlerSection, CrawlWarning, HttpCrawler, anoncritical
+from PFERD.logging import log
 from PFERD.output_dir import Redownload
 from PFERD.utils import soupify, url_set_query_param
 
@@ -66,10 +68,11 @@ _DIRECTORY_PAGES: Set[IliasElementType] = set([
 AWrapped = TypeVar("AWrapped", bound=Callable[..., Awaitable[None]])
 
 
-def _iorepeat(attempts: int) -> Callable[[AWrapped], AWrapped]:
+def _iorepeat(attempts: int, name: str) -> Callable[[AWrapped], AWrapped]:
     def decorator(f: AWrapped) -> AWrapped:
         async def wrapper(self: "HttpCrawler", *args: Any, **kwargs: Any) -> None:
-            for _ in range(attempts - 1):
+            last_exception: Optional[BaseException] = None
+            for round in range(attempts):
                 try:
                     await f(self, *args, **kwargs)
                     return
@@ -77,12 +80,17 @@ def _iorepeat(attempts: int) -> Callable[[AWrapped], AWrapped]:
                     raise CrawlWarning("ILIAS returned an invalid content type")
                 except aiohttp.TooManyRedirects:
                     raise CrawlWarning("Got stuck in a redirect loop")
-                except aiohttp.ClientPayloadError:  # encoding or not enough bytes
-                    pass
-                except aiohttp.ClientConnectionError:  # e.g. timeout, disconnect, resolve failed, etc.
-                    pass
+                except aiohttp.ClientPayloadError as e:  # encoding or not enough bytes
+                    last_exception = e
+                except aiohttp.ClientConnectionError as e:  # e.g. timeout, disconnect, resolve failed, etc.
+                    last_exception = e
+                log.explain_topic(f"Retrying operation {escape(name)}. Retries left: {attempts - 1 - round}")
+
+            if last_exception:
+                message = f"Error in I/O Operation: {escape(str(last_exception))}"
+                raise CrawlWarning(message) from last_exception
+            raise CrawlError("Impossible return in ilias _iorepeat")
 
-            await f(self, *args, **kwargs)
         return wrapper  # type: ignore
     return decorator
 
@@ -109,14 +117,19 @@ class KitIliasWebCrawler(HttpCrawler):
 
     async def crawl(self) -> None:
         if isinstance(self._target, int):
+            log.explain_topic(f"Inferred crawl target: Course with id {self._target}")
             await self._crawl_course(self._target)
         elif self._target == "desktop":
+            log.explain_topic("Inferred crawl target: Personal desktop")
             await self._crawl_desktop()
         else:
+            log.explain_topic(f"Inferred crawl target: URL {escape(self._target)}")
             await self._crawl_url(self._target)
 
         if self.error_free:
             await self.cleanup()
+        else:
+            log.explain_topic("Skipping file cleanup as errors occurred earlier")
 
     async def _crawl_course(self, course_id: int) -> None:
         # Start crawling at the given course
@@ -132,15 +145,16 @@ class KitIliasWebCrawler(HttpCrawler):
     async def _crawl_url(self, url: str, expected_id: Optional[int] = None) -> None:
         tasks = []
 
+        # TODO: Retry this when the crawl and download bar are reworked
         async with self.crawl_bar(PurePath("Root element")):
             soup = await self._get_page(url)
 
             if expected_id is not None:
                 perma_link_element: Tag = soup.find(id="current_perma_link")
                 if not perma_link_element or "crs_" not in perma_link_element.get("value"):
-                    # TODO: Properly handle error
-                    raise RuntimeError(
-                        "Invalid course id? I didn't find anything looking like a course!")
+                    raise CrawlError(
+                        "Invalid course id? I didn't find anything looking like a course"
+                    )
 
             # Duplicated code, but the root page is special - we want to void fetching it twice!
             page = IliasPage(soup, url, None)
@@ -167,15 +181,14 @@ class KitIliasWebCrawler(HttpCrawler):
         await asyncio.gather(*tasks)
 
     @anoncritical
-    @_iorepeat(3)
+    @_iorepeat(3, "ILIAS element crawling")
     async def _handle_ilias_element(self, parent_path: PurePath, element: IliasPageElement) -> None:
         element_path = PurePath(parent_path, element.name)
 
         if element.type == IliasElementType.FILE:
             await self._download_file(element, element_path)
         elif element.type == IliasElementType.FORUM:
-            # TODO: Delete
-            print(f"Skipping forum [green]{element_path}[/]")
+            log.explain_topic(f"Skipping forum at {escape(str(element_path))}")
         elif element.type == IliasElementType.LINK:
             await self._download_link(element, element_path)
         elif element.type == IliasElementType.VIDEO:
@@ -185,8 +198,9 @@ class KitIliasWebCrawler(HttpCrawler):
         elif element.type in _DIRECTORY_PAGES:
             await self._handle_ilias_page(element.url, element, element_path)
         else:
-            # TODO: Proper exception
-            raise RuntimeError(f"Unknown type: {element.type!r}")
+            # This will retry it a few times, failing everytime. It doesn't make any network
+            # requests, so that's fine.
+            raise CrawlWarning(f"Unknown element type: {element.type!r}")
 
     async def _download_link(self, element: IliasPageElement, element_path: PurePath) -> None:
         dl = await self.download(element_path, mtime=element.mtime)

From 4b104b6252cb5ee97481c0842564922757482f85 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Fri, 21 May 2021 12:02:51 +0200
Subject: [PATCH 159/524] Try out some HTTP authentication handling

This is by no means final yet and will change a bit once the dl and cl
are changed, but it might serve as a first try. It is also wholly
untested.
---
 PFERD/crawler.py                              | 28 +++++++++
 PFERD/crawlers/ilias/kit_ilias_web_crawler.py | 63 ++++++++++++-------
 2 files changed, 69 insertions(+), 22 deletions(-)

diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index 80ecedb..2f8e5ad 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -1,3 +1,4 @@
+import asyncio
 from abc import ABC, abstractmethod
 from contextlib import asynccontextmanager
 from datetime import datetime
@@ -265,6 +266,33 @@ class HttpCrawler(Crawler):
 
         self._cookie_jar_path = self._output_dir.resolve(self.COOKIE_FILE)
         self._output_dir.register_reserved(self.COOKIE_FILE)
+        self._authentication_id = 0
+        self._authentication_lock = asyncio.Lock()
+
+    async def prepare_request(self) -> int:
+        # We acquire the lock here to ensure we wait for any concurrent authenticate to finish.
+        # This should reduce the amount of requests we make: If an authentication is in progress
+        # all future requests wait for authentication to complete.
+        async with self._authentication_lock:
+            return self._authentication_id
+
+    async def authenticate(self, current_id: int) -> None:
+        async with self._authentication_lock:
+            # Another thread successfully called authenticate in between
+            # We do not want to perform auth again, so return here. We can
+            # assume auth suceeded as authenticate will throw an error if
+            # it failed.
+            if current_id != self._authentication_id:
+                return
+            await self._authenticate()
+            self._authentication_id += 1
+
+    async def _authenticate(self) -> None:
+        """
+        Performs authentication. This method must only return normally if authentication suceeded.
+        In all other cases it mus either retry internally or throw a terminal exception.
+        """
+        raise RuntimeError("_authenticate() was called but crawler doesn't provide an implementation")
 
     async def run(self) -> None:
         cookie_jar = aiohttp.CookieJar()
diff --git a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
index 88732c0..0ca6565 100644
--- a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
@@ -5,14 +5,15 @@ from pathlib import PurePath
 from typing import Any, Awaitable, Callable, Dict, Optional, Set, TypeVar, Union
 
 import aiohttp
+from aiohttp import hdrs
 from bs4 import BeautifulSoup, Tag
 from rich.markup import escape
 
 from PFERD.authenticators import Authenticator
 from PFERD.config import Config
 from PFERD.crawler import CrawlError, CrawlerSection, CrawlWarning, HttpCrawler, anoncritical
-from PFERD.logging import log
-from PFERD.output_dir import Redownload
+from PFERD.logging import ProgressBar, log
+from PFERD.output_dir import FileSink, Redownload
 from PFERD.utils import soupify, url_set_query_param
 
 from .file_templates import link_template_plain, link_template_rich
@@ -232,23 +233,24 @@ class KitIliasWebCrawler(HttpCrawler):
             page = IliasPage(await self._get_page(element.url), element.url, element)
             real_element = page.get_child_elements()[0]
 
-            async with dl as sink, self.session.get(real_element.url) as resp:
-                if resp.content_length:
-                    bar.set_total(resp.content_length)
-
-                async for data in resp.content.iter_chunked(1024):
-                    sink.file.write(data)
-                    bar.advance(len(data))
-
-                sink.done()
+            async with dl as sink:
+                await self._stream_from_url(real_element.url, sink, bar)
 
     async def _download_file(self, element: IliasPageElement, element_path: PurePath) -> None:
         dl = await self.download(element_path, mtime=element.mtime)
         if not dl:
             return
 
-        async with self.download_bar(element_path) as bar:
-            async with dl as sink, self.session.get(element.url) as resp:
+        async with self.download_bar(element_path) as bar, dl as sink:
+            await self._stream_from_url(element.url, sink, bar)
+
+    async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar) -> None:
+        async def try_stream() -> bool:
+            async with self.session.get(url, allow_redirects=False) as resp:
+                # Redirect means we weren't authenticated
+                if hdrs.LOCATION in resp.headers:
+                    return False
+
                 if resp.content_length:
                     bar.set_total(resp.content_length)
 
@@ -257,22 +259,39 @@ class KitIliasWebCrawler(HttpCrawler):
                     bar.advance(len(data))
 
                 sink.done()
+            return True
 
-    async def _get_page(self, url: str, retries_left: int = 3) -> BeautifulSoup:
-        # This function will retry itself a few times if it is not logged in - it won't handle
-        # connection errors
-        if retries_left < 0:
-            # TODO: Proper exception
-            raise RuntimeError("Get page failed too often")
-        print(url, "retries left", retries_left)
+        auth_id = await self.prepare_request()
+        if await try_stream():
+            return
+
+        await self.authenticate(auth_id)
+
+        if not await try_stream():
+            raise CrawlError("File streaming failed after authenticate()")
+
+    async def _get_page(self, url: str) -> BeautifulSoup:
+        auth_id = await self.prepare_request()
         async with self.session.get(url) as request:
             soup = soupify(await request.read())
             if self._is_logged_in(soup):
                 return soup
 
-        await self._shibboleth_login.login(self.session)
+        # We weren't authenticated, so try to do that
+        await self.authenticate(auth_id)
 
-        return await self._get_page(url, retries_left - 1)
+        # Retry once after authenticating. If this fails, we will die.
+        async with self.session.get(url) as request:
+            soup = soupify(await request.read())
+            if self._is_logged_in(soup):
+                return soup
+        raise CrawlError("get_page failed even after authenticating")
+
+    # We repeat this as the login method in shibboleth doesn't handle I/O errors.
+    # Shibboleth is quite reliable as well, the repeat is likely not critical here.
+    @_iorepeat(3, "Login")
+    async def _authenticate(self) -> None:
+        await self._shibboleth_login.login(self.session)
 
     @staticmethod
     def _is_logged_in(soup: BeautifulSoup) -> bool:

From 98b8ca31faafbc5b27aa6eaa397a6610c2c43f31 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 22 May 2021 14:45:32 +0200
Subject: [PATCH 160/524] Add some todos

---
 PFERD/__main__.py | 6 +++++-
 PFERD/crawler.py  | 1 +
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index c03e08c..69feb81 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -266,4 +266,8 @@ def main() -> None:
         exit()
 
     pferd = Pferd(config)
-    asyncio.run(pferd.run())
+    try:
+        asyncio.run(pferd.run())
+    except KeyboardInterrupt:
+        # TODO Clean up tmp files
+        pass
diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index 2f8e5ad..2785e41 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -49,6 +49,7 @@ def noncritical(f: Wrapped) -> Wrapped:
             log.print(f"[bold bright_red]Warning[/] {escape(str(e))}")
             crawler.error_free = False
         except CrawlError as e:
+            # TODO Don't print error, just pass it on upwards
             log.print(f"[bold bright_red]Error[/] [red]{escape(str(e))}")
             crawler.error_free = False
             raise

From b5785f260ed3f1543e95b411b8bc5e6d14b316ae Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 22 May 2021 15:03:45 +0200
Subject: [PATCH 161/524] Extract CLI argument parsing to separate module

---
 PFERD/__main__.py     | 189 +-----------------------------------------
 PFERD/cli/__init__.py | 125 ++++++++++++++++++++++++++++
 PFERD/cli/local.py    |  67 +++++++++++++++
 3 files changed, 196 insertions(+), 185 deletions(-)
 create mode 100644 PFERD/cli/__init__.py
 create mode 100644 PFERD/cli/local.py

diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index 69feb81..9c60c63 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -3,193 +3,12 @@ import asyncio
 import configparser
 from pathlib import Path
 
+from .cli import PARSER, load_default_section
 from .config import Config, ConfigDumpException, ConfigLoadException
 from .logging import log
-from .output_dir import OnConflict, Redownload
 from .pferd import Pferd
 from .version import NAME, VERSION
 
-GENERAL_PARSER = argparse.ArgumentParser(add_help=False)
-GENERAL_PARSER.add_argument(
-    "--version",
-    action="store_true",
-    help="print version and exit"
-)
-GENERAL_PARSER.add_argument(
-    "--config", "-c",
-    type=Path,
-    metavar="PATH",
-    help="custom config file"
-)
-GENERAL_PARSER.add_argument(
-    "--dump-config",
-    nargs="?",
-    const=True,
-    metavar="PATH",
-    help="dump current configuration to a file and exit."
-    " Uses default config file path if no path is specified"
-)
-GENERAL_PARSER.add_argument(
-    "--crawler",
-    action="append",
-    type=str,
-    metavar="NAME",
-    help="only execute a single crawler."
-    " Can be specified multiple times to execute multiple crawlers"
-)
-GENERAL_PARSER.add_argument(
-    "--working-dir",
-    type=Path,
-    metavar="PATH",
-    help="custom working directory"
-)
-GENERAL_PARSER.add_argument(
-    "--explain", "-e",
-    # TODO Use argparse.BooleanOptionalAction after updating to 3.9
-    action="store_const",
-    const=True,
-    help="log and explain in detail what PFERD is doing"
-)
-
-
-def load_general(
-        args: argparse.Namespace,
-        parser: configparser.ConfigParser,
-) -> None:
-    section = parser[parser.default_section]
-
-    if args.working_dir is not None:
-        section["working_dir"] = str(args.working_dir)
-    if args.explain is not None:
-        section["explain"] = "true" if args.explain else "false"
-
-
-CRAWLER_PARSER = argparse.ArgumentParser(add_help=False)
-CRAWLER_PARSER_GROUP = CRAWLER_PARSER.add_argument_group(
-    title="general crawler arguments",
-    description="arguments common to all crawlers",
-)
-CRAWLER_PARSER_GROUP.add_argument(
-    "--redownload",
-    type=Redownload.from_string,
-    metavar="OPTION",
-    help="when to redownload a file that's already present locally"
-)
-CRAWLER_PARSER_GROUP.add_argument(
-    "--on-conflict",
-    type=OnConflict.from_string,
-    metavar="OPTION",
-    help="what to do when local and remote files or directories differ"
-)
-CRAWLER_PARSER_GROUP.add_argument(
-    "--transform", "-t",
-    action="append",
-    type=str,
-    metavar="RULE",
-    help="add a single transformation rule. Can be specified multiple times"
-)
-CRAWLER_PARSER_GROUP.add_argument(
-    "--max-concurrent-tasks",
-    type=int,
-    metavar="N",
-    help="maximum number of concurrent tasks (crawling, downloading)"
-)
-CRAWLER_PARSER_GROUP.add_argument(
-    "--max-concurrent-downloads",
-    type=int,
-    metavar="N",
-    help="maximum number of tasks that may download data at the same time"
-)
-CRAWLER_PARSER_GROUP.add_argument(
-    "--delay-between-tasks",
-    type=float,
-    metavar="SECONDS",
-    help="time the crawler should wait between subsequent tasks"
-)
-
-
-def load_crawler(
-        args: argparse.Namespace,
-        section: configparser.SectionProxy,
-) -> None:
-    if args.redownload is not None:
-        section["redownload"] = args.redownload.value
-    if args.on_conflict is not None:
-        section["on_conflict"] = args.on_conflict.value
-    if args.transform is not None:
-        section["transform"] = "\n" + "\n".join(args.transform)
-    if args.max_concurrent_tasks is not None:
-        section["max_concurrent_tasks"] = str(args.max_concurrent_tasks)
-    if args.max_concurrent_downloads is not None:
-        section["max_concurrent_downloads"] = str(args.max_concurrent_downloads)
-    if args.delay_between_tasks is not None:
-        section["delay_between_tasks"] = str(args.delay_between_tasks)
-
-
-PARSER = argparse.ArgumentParser(parents=[GENERAL_PARSER])
-PARSER.set_defaults(command=None)
-SUBPARSERS = PARSER.add_subparsers(title="crawlers")
-
-
-LOCAL_CRAWLER = SUBPARSERS.add_parser(
-    "local",
-    parents=[CRAWLER_PARSER],
-)
-LOCAL_CRAWLER.set_defaults(command="local")
-LOCAL_CRAWLER_GROUP = LOCAL_CRAWLER.add_argument_group(
-    title="local crawler arguments",
-    description="arguments for the 'local' crawler",
-)
-LOCAL_CRAWLER_GROUP.add_argument(
-    "target",
-    type=Path,
-    metavar="TARGET",
-    help="directory to crawl"
-)
-LOCAL_CRAWLER_GROUP.add_argument(
-    "output",
-    type=Path,
-    metavar="OUTPUT",
-    help="output directory"
-)
-LOCAL_CRAWLER_GROUP.add_argument(
-    "--crawl-delay",
-    type=float,
-    metavar="SECONDS",
-    help="artificial delay to simulate for crawl requests"
-)
-LOCAL_CRAWLER_GROUP.add_argument(
-    "--download-delay",
-    type=float,
-    metavar="SECONDS",
-    help="artificial delay to simulate for download requests"
-)
-LOCAL_CRAWLER_GROUP.add_argument(
-    "--download-speed",
-    type=int,
-    metavar="BYTES_PER_SECOND",
-    help="download speed to simulate"
-)
-
-
-def load_local_crawler(
-        args: argparse.Namespace,
-        parser: configparser.ConfigParser,
-) -> None:
-    parser["crawl:local"] = {}
-    section = parser["crawl:local"]
-    load_crawler(args, section)
-
-    section["type"] = "local"
-    section["target"] = str(args.target)
-    section["output_dir"] = str(args.output)
-    if args.crawl_delay is not None:
-        section["crawl_delay"] = str(args.crawl_delay)
-    if args.download_delay is not None:
-        section["download_delay"] = str(args.download_delay)
-    if args.download_speed is not None:
-        section["download_speed"] = str(args.download_speed)
-
 
 def load_parser(
         args: argparse.Namespace,
@@ -202,10 +21,10 @@ def load_parser(
         Config.load_parser(parser, path=args.config)
     else:
         log.explain(f"CLI command specified, creating config for {args.command!r}")
-        if args.command == "local":
-            load_local_crawler(args, parser)
+        if args.command:
+            args.command(args, parser)
 
-    load_general(args, parser)
+    load_default_section(args, parser)
     prune_crawlers(args, parser)
 
     return parser
diff --git a/PFERD/cli/__init__.py b/PFERD/cli/__init__.py
new file mode 100644
index 0000000..71d9732
--- /dev/null
+++ b/PFERD/cli/__init__.py
@@ -0,0 +1,125 @@
+import argparse
+import configparser
+from pathlib import Path
+
+from ..output_dir import OnConflict, Redownload
+
+CRAWLER_PARSER = argparse.ArgumentParser(add_help=False)
+CRAWLER_PARSER_GROUP = CRAWLER_PARSER.add_argument_group(
+    title="general crawler arguments",
+    description="arguments common to all crawlers",
+)
+CRAWLER_PARSER_GROUP.add_argument(
+    "--redownload",
+    type=Redownload.from_string,
+    metavar="OPTION",
+    help="when to redownload a file that's already present locally"
+)
+CRAWLER_PARSER_GROUP.add_argument(
+    "--on-conflict",
+    type=OnConflict.from_string,
+    metavar="OPTION",
+    help="what to do when local and remote files or directories differ"
+)
+CRAWLER_PARSER_GROUP.add_argument(
+    "--transform", "-t",
+    action="append",
+    type=str,
+    metavar="RULE",
+    help="add a single transformation rule. Can be specified multiple times"
+)
+CRAWLER_PARSER_GROUP.add_argument(
+    "--max-concurrent-tasks",
+    type=int,
+    metavar="N",
+    help="maximum number of concurrent tasks (crawling, downloading)"
+)
+CRAWLER_PARSER_GROUP.add_argument(
+    "--max-concurrent-downloads",
+    type=int,
+    metavar="N",
+    help="maximum number of tasks that may download data at the same time"
+)
+CRAWLER_PARSER_GROUP.add_argument(
+    "--delay-between-tasks",
+    type=float,
+    metavar="SECONDS",
+    help="time the crawler should wait between subsequent tasks"
+)
+
+
+def load_crawler(
+        args: argparse.Namespace,
+        section: configparser.SectionProxy,
+) -> None:
+    if args.redownload is not None:
+        section["redownload"] = args.redownload.value
+    if args.on_conflict is not None:
+        section["on_conflict"] = args.on_conflict.value
+    if args.transform is not None:
+        section["transform"] = "\n" + "\n".join(args.transform)
+    if args.max_concurrent_tasks is not None:
+        section["max_concurrent_tasks"] = str(args.max_concurrent_tasks)
+    if args.max_concurrent_downloads is not None:
+        section["max_concurrent_downloads"] = str(args.max_concurrent_downloads)
+    if args.delay_between_tasks is not None:
+        section["delay_between_tasks"] = str(args.delay_between_tasks)
+
+
+PARSER = argparse.ArgumentParser()
+PARSER.set_defaults(command=None)
+PARSER.add_argument(
+    "--version",
+    action="store_true",
+    help="print version and exit"
+)
+PARSER.add_argument(
+    "--config", "-c",
+    type=Path,
+    metavar="PATH",
+    help="custom config file"
+)
+PARSER.add_argument(
+    "--dump-config",
+    nargs="?",
+    const=True,
+    metavar="PATH",
+    help="dump current configuration to a file and exit."
+    " Uses default config file path if no path is specified"
+)
+PARSER.add_argument(
+    "--crawler",
+    action="append",
+    type=str,
+    metavar="NAME",
+    help="only execute a single crawler."
+    " Can be specified multiple times to execute multiple crawlers"
+)
+PARSER.add_argument(
+    "--working-dir",
+    type=Path,
+    metavar="PATH",
+    help="custom working directory"
+)
+PARSER.add_argument(
+    "--explain", "-e",
+    # TODO Use argparse.BooleanOptionalAction after updating to 3.9
+    action="store_const",
+    const=True,
+    help="log and explain in detail what PFERD is doing"
+)
+
+
+def load_default_section(
+        args: argparse.Namespace,
+        parser: configparser.ConfigParser,
+) -> None:
+    section = parser[parser.default_section]
+
+    if args.working_dir is not None:
+        section["working_dir"] = str(args.working_dir)
+    if args.explain is not None:
+        section["explain"] = "true" if args.explain else "false"
+
+
+SUBPARSERS = PARSER.add_subparsers(title="crawlers")
diff --git a/PFERD/cli/local.py b/PFERD/cli/local.py
new file mode 100644
index 0000000..5df81db
--- /dev/null
+++ b/PFERD/cli/local.py
@@ -0,0 +1,67 @@
+import argparse
+import configparser
+from pathlib import Path
+
+from . import CRAWLER_PARSER, SUBPARSERS, load_crawler
+
+SUBPARSER = SUBPARSERS.add_parser(
+    "local",
+    parents=[CRAWLER_PARSER],
+)
+
+GROUP = SUBPARSER.add_argument_group(
+    title="local crawler arguments",
+    description="arguments for the 'local' crawler",
+)
+GROUP.add_argument(
+    "target",
+    type=Path,
+    metavar="TARGET",
+    help="directory to crawl"
+)
+GROUP.add_argument(
+    "output",
+    type=Path,
+    metavar="OUTPUT",
+    help="output directory"
+)
+GROUP.add_argument(
+    "--crawl-delay",
+    type=float,
+    metavar="SECONDS",
+    help="artificial delay to simulate for crawl requests"
+)
+GROUP.add_argument(
+    "--download-delay",
+    type=float,
+    metavar="SECONDS",
+    help="artificial delay to simulate for download requests"
+)
+GROUP.add_argument(
+    "--download-speed",
+    type=int,
+    metavar="BYTES_PER_SECOND",
+    help="download speed to simulate"
+)
+
+
+def load(
+        args: argparse.Namespace,
+        parser: configparser.ConfigParser,
+) -> None:
+    parser["crawl:local"] = {}
+    section = parser["crawl:local"]
+    load_crawler(args, section)
+
+    section["type"] = "local"
+    section["target"] = str(args.target)
+    section["output_dir"] = str(args.output)
+    if args.crawl_delay is not None:
+        section["crawl_delay"] = str(args.crawl_delay)
+    if args.download_delay is not None:
+        section["download_delay"] = str(args.download_delay)
+    if args.download_speed is not None:
+        section["download_speed"] = str(args.download_speed)
+
+
+SUBPARSER.set_defaults(command=load)

From 54dd2f8337a36a70c789ee7f3aa397677b565244 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 22 May 2021 16:47:24 +0200
Subject: [PATCH 162/524] Clean up main and improve error handling

---
 PFERD/__main__.py | 96 +++++++++++++++++++++++++++++++++--------------
 PFERD/config.py   | 62 ++++++++++++++++--------------
 PFERD/logging.py  | 23 ++++++++++++
 3 files changed, 125 insertions(+), 56 deletions(-)

diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index 9c60c63..c418095 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -4,15 +4,13 @@ import configparser
 from pathlib import Path
 
 from .cli import PARSER, load_default_section
-from .config import Config, ConfigDumpException, ConfigLoadException
+from .config import Config, ConfigDumpError, ConfigLoadError, ConfigOptionError
 from .logging import log
 from .pferd import Pferd
 from .version import NAME, VERSION
 
 
-def load_parser(
-        args: argparse.Namespace,
-) -> configparser.ConfigParser:
+def load_config_parser(args: argparse.Namespace) -> configparser.ConfigParser:
     log.explain_topic("Loading config")
     parser = configparser.ConfigParser()
 
@@ -47,46 +45,88 @@ def prune_crawlers(
     # TODO Check if crawlers actually exist
 
 
-def main() -> None:
-    args = PARSER.parse_args()
+def load_config(args: argparse.Namespace) -> Config:
+    try:
+        return Config(load_config_parser(args))
+    except ConfigLoadError as e:
+        log.error(str(e))
+        log.error_contd(e.reason)
+        exit(1)
 
-    # Configure log levels set by command line arguments
+
+def configure_logging_from_args(args: argparse.Namespace) -> None:
     if args.explain is not None:
         log.output_explain = args.explain
-    if args.dump_config:
+
+    # We want to prevent any unnecessary output if we're printing the config to
+    # stdout, otherwise it would not be a valid config file.
+    if args.dump_config == "-":
         log.output_explain = False
 
+
+def configure_logging_from_config(args: argparse.Namespace, config: Config) -> None:
+    # In configure_logging_from_args(), all normal logging is already disabled
+    # whenever we dump the config. We don't want to override that decision with
+    # values from the config file.
+    if args.dump_config == "-":
+        return
+
+    try:
+        if args.explain is None:
+            log.output_explain = config.default_section.explain()
+    except ConfigOptionError as e:
+        log.error(str(e))
+        exit(1)
+
+
+def dump_config(args: argparse.Namespace, config: Config) -> None:
+    try:
+        if args.dump_config is True:
+            config.dump()
+        elif args.dump_config == "-":
+            config.dump_to_stdout()
+        else:
+            config.dump(Path(args.dump_config))
+    except ConfigDumpError as e:
+        log.error(str(e))
+        log.error_contd(e.reason)
+        exit(1)
+
+
+def main() -> None:
+    args = PARSER.parse_args()
+
     if args.version:
         print(f"{NAME} {VERSION}")
         exit()
 
-    try:
-        config = Config(load_parser(args))
-    except ConfigLoadException as e:
-        log.error(f"Failed to load config file at path {str(e.path)!r}")
-        log.error_contd(f"Reason: {e.reason}")
-        exit(1)
+    # Configuring logging happens in two stages because CLI args have
+    # precedence over config file options and loading the config already
+    # produces some kinds of log messages (usually only explain()-s).
+    configure_logging_from_args(args)
 
-    # Configure log levels set in the config file
-    # TODO Catch config section exceptions
-    if args.explain is None:
-        log.output_explain = config.default_section.explain()
+    config = load_config(args)
+
+    # Now, after loading the config file, we can apply its logging settings in
+    # all places that were not already covered by CLI args.
+    configure_logging_from_config(args, config)
 
     if args.dump_config is not None:
-        try:
-            if args.dump_config is True:
-                config.dump()
-            elif args.dump_config == "-":
-                config.dump_to_stdout()
-            else:
-                config.dump(Path(args.dump_config))
-        except ConfigDumpException:
-            exit(1)
+        dump_config(args, config)
         exit()
 
+    # TODO Unset exclusive output on exceptions (if it was being held)
     pferd = Pferd(config)
     try:
         asyncio.run(pferd.run())
     except KeyboardInterrupt:
+        log.explain_topic("Interrupted, exiting immediately")
+        log.explain("Open files and connections are left for the OS to clean up")
+        log.explain("Temporary files are not cleaned up")
         # TODO Clean up tmp files
-        pass
+        # And when those files *do* actually get cleaned up properly,
+        # reconsider what exit code to use here.
+        exit(1)
+    except Exception:
+        log.unexpected_exception()
+        exit(1)
diff --git a/PFERD/config.py b/PFERD/config.py
index 30ae3fb..26a9eb6 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -2,7 +2,6 @@ import asyncio
 import os
 import sys
 from configparser import ConfigParser, SectionProxy
-from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, List, NoReturn, Optional, Tuple
 
@@ -10,21 +9,34 @@ from .logging import log
 from .utils import prompt_yes_no
 
 
-@dataclass
-class ConfigLoadException(Exception):
-    path: Path
-    reason: str
+class ConfigLoadError(Exception):
+    """
+    Something went wrong while loading the config from a file.
+    """
+
+    def __init__(self, path: Path, reason: str):
+        super().__init__(f"Failed to load config from {path}")
+        self.path = path
+        self.reason = reason
 
 
-class ConfigDumpException(Exception):
-    pass
+class ConfigOptionError(Exception):
+    """
+    An option in the config file has an invalid or missing value.
+    """
+
+    def __init__(self, section: str, key: str, desc: str):
+        super().__init__(f"Section {section!r}, key {key!r}: {desc}")
+        self.section = section
+        self.key = key
+        self.desc = desc
 
 
-@dataclass
-class ConfigFormatException(Exception):
-    section: str
-    key: str
-    desc: str
+class ConfigDumpError(Exception):
+    def __init__(self, path: Path, reason: str):
+        super().__init__(f"Failed to dump config to {path}")
+        self.path = path
+        self.reason = reason
 
 
 class Section:
@@ -36,7 +48,7 @@ class Section:
         self.s = section
 
     def error(self, key: str, desc: str) -> NoReturn:
-        raise ConfigFormatException(self.s.name, key, desc)
+        raise ConfigOptionError(self.s.name, key, desc)
 
     def invalid_value(
             self,
@@ -83,7 +95,7 @@ class Config:
     @staticmethod
     def load_parser(parser: ConfigParser, path: Optional[Path] = None) -> None:
         """
-        May throw a ConfigLoadException.
+        May throw a ConfigLoadError.
         """
 
         if path:
@@ -99,21 +111,15 @@ class Config:
             with open(path) as f:
                 parser.read_file(f, source=str(path))
         except FileNotFoundError:
-            raise ConfigLoadException(path, "File does not exist")
+            raise ConfigLoadError(path, "File does not exist")
         except IsADirectoryError:
-            raise ConfigLoadException(path, "That's a directory, not a file")
+            raise ConfigLoadError(path, "That's a directory, not a file")
         except PermissionError:
-            raise ConfigLoadException(path, "Insufficient permissions")
-
-    @staticmethod
-    def _fail_dump(path: Path, reason: str) -> None:
-        print(f"Failed to dump config file to {path}")
-        print(f"Reason: {reason}")
-        raise ConfigDumpException()
+            raise ConfigLoadError(path, "Insufficient permissions")
 
     def dump(self, path: Optional[Path] = None) -> None:
         """
-        May throw a ConfigDumpException.
+        May throw a ConfigDumpError.
         """
 
         if not path:
@@ -124,7 +130,7 @@ class Config:
         try:
             path.parent.mkdir(parents=True, exist_ok=True)
         except PermissionError:
-            self._fail_dump(path, "Could not create parent directory")
+            raise ConfigDumpError(path, "Could not create parent directory")
 
         try:
             # Ensuring we don't accidentally overwrite any existing files by
@@ -140,11 +146,11 @@ class Config:
                     with open(path, "w") as f:
                         self._parser.write(f)
                 else:
-                    self._fail_dump(path, "File already exists")
+                    raise ConfigDumpError(path, "File already exists")
         except IsADirectoryError:
-            self._fail_dump(path, "That's a directory, not a file")
+            raise ConfigDumpError(path, "That's a directory, not a file")
         except PermissionError:
-            self._fail_dump(path, "Insufficient permissions")
+            raise ConfigDumpError(path, "Insufficient permissions")
 
     def dump_to_stdout(self) -> None:
         self._parser.write(sys.stdout)
diff --git a/PFERD/logging.py b/PFERD/logging.py
index e2a6d33..e1ab92f 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -1,4 +1,6 @@
 import asyncio
+import sys
+import traceback
 from contextlib import asynccontextmanager, contextmanager
 # TODO In Python 3.9 and above, ContextManager and AsyncContextManager are deprecated
 from typing import AsyncIterator, ContextManager, Iterator, List, Optional
@@ -110,6 +112,27 @@ class Log:
     def error_contd(self, text: str) -> None:
         self.print(f"[red]{escape(text)}")
 
+    def unexpected_exception(self) -> None:
+        t, v, tb = sys.exc_info()
+
+        self.error("An unexpected exception occurred")
+        self.error_contd("")
+
+        for line in traceback.format_tb(tb):
+            self.error_contd(line[:-1])  # Without trailing newline
+
+        if str(v):
+            self.error_contd(f"{t.__name__}: {v}")
+        else:
+            self.error_contd(t.__name__)
+
+        self.error_contd("")
+        self.error_contd("""
+An unexpected exception occurred. This usually shouldn't happen. Please copy
+your program output and send it to the PFERD maintainers, either directly or as
+a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
+        """.strip())
+
     def explain_topic(self, text: str) -> None:
         if self.output_explain:
             self.print(f"[cyan]{escape(text)}")

From dfde0e23107a85455489058918d7731e264355ff Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 22 May 2021 18:36:25 +0200
Subject: [PATCH 163/524] Improve reporting of unexpected exceptions

---
 PFERD/logging.py | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/PFERD/logging.py b/PFERD/logging.py
index e1ab92f..8d89baf 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -113,24 +113,27 @@ class Log:
         self.print(f"[red]{escape(text)}")
 
     def unexpected_exception(self) -> None:
+        """
+        Call this in an "except" clause to log an unexpected exception.
+        """
+
         t, v, tb = sys.exc_info()
-
-        self.error("An unexpected exception occurred")
-        self.error_contd("")
-
-        for line in traceback.format_tb(tb):
-            self.error_contd(line[:-1])  # Without trailing newline
-
-        if str(v):
-            self.error_contd(f"{t.__name__}: {v}")
+        if t is None or v is None or tb is None:
+            # We're not currently handling an exception, so somebody probably
+            # called this function where they shouldn't.
+            self.error("Something unexpected happened")
+            self.error_contd("")
+            for line in traceback.format_stack():
+                self.error_contd(line[:-1])  # Without the newline
+            self.error_contd("")
         else:
-            self.error_contd(t.__name__)
+            self.error("An unexpected exception occurred")
+            self.error_contd("")
+            self.error_contd(traceback.format_exc())
 
-        self.error_contd("")
         self.error_contd("""
-An unexpected exception occurred. This usually shouldn't happen. Please copy
-your program output and send it to the PFERD maintainers, either directly or as
-a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
+Please copy your program output and send it to the PFERD maintainers, either
+directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
         """.strip())
 
     def explain_topic(self, text: str) -> None:

From 552cd82802dad0c53c8cdb971d70f70bff1dc5da Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 22 May 2021 18:37:53 +0200
Subject: [PATCH 164/524] Run async input and password getters in daemon thread

Previously, it ran in the event loop's default executor, which would block until
all its workers were done working.

If Ctrl+C was pressed while input or a password were being read, the
asyncio.run() call in the main thread would be interrupted however, not the
input thread. This meant that multiple key presses (either enter or a second
Ctrl+C) were necessary to stop a running PFERD in some circumstances.

This change instead runs the input functions in daemon threads so they exit as
soon as the main thread exits.
---
 PFERD/utils.py | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/PFERD/utils.py b/PFERD/utils.py
index 56d6f53..1d11565 100644
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -1,8 +1,7 @@
 import asyncio
-import contextvars
-import functools
 import getpass
 import sys
+import threading
 from abc import ABC, abstractmethod
 from contextlib import AsyncExitStack
 from types import TracebackType
@@ -14,21 +13,25 @@ import bs4
 T = TypeVar("T")
 
 
-# TODO When switching to 3.9, use asyncio.to_thread instead of this
-async def to_thread(func: Callable[..., T], *args: Any, **kwargs: Any) -> T:
-    # https://github.com/python/cpython/blob/8d47f92d46a92a5931b8f3dcb4a484df672fc4de/Lib/asyncio/threads.py
-    loop = asyncio.get_event_loop()
-    ctx = contextvars.copy_context()
-    func_call = functools.partial(ctx.run, func, *args, **kwargs)
-    return await loop.run_in_executor(None, func_call)  # type: ignore
+async def in_daemon_thread(func: Callable[..., T], *args: Any, **kwargs: Any) -> T:
+    loop = asyncio.get_running_loop()
+    future: asyncio.Future[T] = asyncio.Future()
+
+    def thread_func() -> None:
+        result = func()
+        loop.call_soon_threadsafe(future.set_result, result)
+
+    threading.Thread(target=thread_func, daemon=True).start()
+
+    return await future
 
 
 async def ainput(prompt: str) -> str:
-    return await to_thread(lambda: input(prompt))
+    return await in_daemon_thread(lambda: input(prompt))
 
 
 async def agetpass(prompt: str) -> str:
-    return await to_thread(lambda: getpass.getpass(prompt))
+    return await in_daemon_thread(lambda: getpass.getpass(prompt))
 
 
 def soupify(data: bytes) -> bs4.BeautifulSoup:

From afac22c5626b1967001ae9d9fe4bf975a35c9701 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 22 May 2021 18:58:00 +0200
Subject: [PATCH 165/524] Handle abort in exclusive output state correctly

If the event loop is stopped while something holds the exclusive output, the
"log" singleton is now reset so the main thread can print a few more messages
before exiting.
---
 PFERD/__main__.py |  3 ++-
 PFERD/logging.py  | 16 ++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index c418095..d588836 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -115,11 +115,11 @@ def main() -> None:
         dump_config(args, config)
         exit()
 
-    # TODO Unset exclusive output on exceptions (if it was being held)
     pferd = Pferd(config)
     try:
         asyncio.run(pferd.run())
     except KeyboardInterrupt:
+        log.unlock()
         log.explain_topic("Interrupted, exiting immediately")
         log.explain("Open files and connections are left for the OS to clean up")
         log.explain("Temporary files are not cleaned up")
@@ -128,5 +128,6 @@ def main() -> None:
         # reconsider what exit code to use here.
         exit(1)
     except Exception:
+        log.unlock()
         log.unexpected_exception()
         exit(1)
diff --git a/PFERD/logging.py b/PFERD/logging.py
index 8d89baf..beb92c6 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -97,12 +97,28 @@ class Log:
                     self.print(line)
                 self._lines = []
 
+    def unlock(self) -> None:
+        """
+        Get rid of an exclusive output state.
+
+        This function is meant to let PFERD print log messages after the event
+        loop was forcibly stopped and if it will not be started up again. After
+        this is called, it is not safe to use any functions except the logging
+        functions (print, warn, ...).
+        """
+
+        self._progress_suspended = False
+        for line in self._lines:
+            self.print(line)
+
     def print(self, text: str) -> None:
         if self._progress_suspended:
             self._lines.append(text)
         else:
             self.console.print(text)
 
+    # TODO Print errors (and warnings?) to stderr
+
     def warn(self, text: str) -> None:
         self.print(f"[bold bright_red]Warning[/] {escape(text)}")
 

From b4d97cd545a03a2e8bb2fee1b43cf6a1d431dbeb Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 22 May 2021 20:54:42 +0200
Subject: [PATCH 166/524] Improve output dir and report error handling

---
 PFERD/crawler.py    | 11 +++++------
 PFERD/output_dir.py | 31 +++++++++++--------------------
 PFERD/report.py     | 25 +++++++++++++------------
 3 files changed, 29 insertions(+), 38 deletions(-)

diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index 2785e41..1269ba2 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -13,7 +13,8 @@ from .authenticator import Authenticator
 from .config import Config, Section
 from .limiter import Limiter
 from .logging import ProgressBar, log
-from .output_dir import FileSink, OnConflict, OutputDirectory, Redownload
+from .output_dir import FileSink, OnConflict, OutputDirectory, OutputDirError, Redownload
+from .report import MarkConflictError, MarkDuplicateError
 from .transformer import Transformer
 from .version import NAME, VERSION
 
@@ -45,12 +46,10 @@ def noncritical(f: Wrapped) -> Wrapped:
 
         try:
             f(*args, **kwargs)
-        except CrawlWarning as e:
-            log.print(f"[bold bright_red]Warning[/] {escape(str(e))}")
+        except (CrawlWarning, OutputDirError, MarkDuplicateError, MarkConflictError) as e:
+            log.warn(str(e))
             crawler.error_free = False
-        except CrawlError as e:
-            # TODO Don't print error, just pass it on upwards
-            log.print(f"[bold bright_red]Error[/] [red]{escape(str(e))}")
+        except CrawlError:
             crawler.error_free = False
             raise
 
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 783d6bc..ee4910e 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -14,7 +14,7 @@ from typing import AsyncContextManager, BinaryIO, Iterator, Optional, Tuple
 from rich.markup import escape
 
 from .logging import log
-from .report import MarkConflictException, MarkDuplicateException, Report
+from .report import Report
 from .utils import ReusableAsyncContextManager, prompt_yes_no
 
 SUFFIX_CHARS = string.ascii_lowercase + string.digits
@@ -22,7 +22,7 @@ SUFFIX_LENGTH = 6
 TRIES = 5
 
 
-class OutputDirException(Exception):
+class OutputDirError(Exception):
     pass
 
 
@@ -146,25 +146,15 @@ class OutputDirectory:
     def register_reserved(self, path: PurePath) -> None:
         self._report.mark_reserved(path)
 
-    def _mark(self, path: PurePath) -> None:
-        """
-        May throw an OutputDirException
-        """
-
-        try:
-            self._report.mark(path)
-        except MarkDuplicateException:
-            raise OutputDirException("Another file has already been placed here.")
-        except MarkConflictException as e:
-            raise OutputDirException(f"Collides with other file: {e.collides_with}")
-
     def resolve(self, path: PurePath) -> Path:
         """
-        May throw an OutputDirException.
+        May throw an OutputDirError.
         """
 
         if ".." in path.parts:
-            raise OutputDirException(f"Path {path} contains forbidden '..'")
+            raise OutputDirError(f"Forbidden segment '..' in path {path}")
+        if "." in path.parts:
+            raise OutputDirError(f"Forbidden segment '.' in path {path}")
         return self._root / path
 
     def _should_download(
@@ -297,7 +287,7 @@ class OutputDirectory:
             local_path: Path,
     ) -> Tuple[Path, BinaryIO]:
         """
-        May raise an OutputDirException.
+        May raise an OutputDirError.
         """
 
         # Create tmp file
@@ -309,7 +299,7 @@ class OutputDirectory:
             except FileExistsError:
                 pass  # Try again
 
-        raise OutputDirException(f"Failed to create temporary file {tmp_path}")
+        raise OutputDirError("Failed to create temporary file")
 
     async def download(
             self,
@@ -319,7 +309,8 @@ class OutputDirectory:
             on_conflict: Optional[OnConflict] = None,
     ) -> Optional[AsyncContextManager[FileSink]]:
         """
-        May throw an OutputDirException.
+        May throw an OutputDirError, a MarkDuplicateError or a
+        MarkConflictError.
         """
 
         heuristics = Heuristics(mtime)
@@ -327,7 +318,7 @@ class OutputDirectory:
         on_conflict = self._on_conflict if on_conflict is None else on_conflict
         local_path = self.resolve(path)
 
-        self._mark(path)
+        self._report.mark(path)
 
         if not self._should_download(local_path, heuristics, redownload):
             return None
diff --git a/PFERD/report.py b/PFERD/report.py
index 1c46216..7d8aa85 100644
--- a/PFERD/report.py
+++ b/PFERD/report.py
@@ -1,19 +1,18 @@
-from dataclasses import dataclass
 from pathlib import PurePath
 from typing import Set
 
 
-@dataclass
-class MarkDuplicateException(Exception):
+class MarkDuplicateError(Exception):
     """
     Tried to mark a file that was already marked.
     """
 
-    path: PurePath
+    def __init__(self, path: PurePath):
+        super().__init__(f"A previous file already used path {path}")
+        self.path = path
 
 
-@dataclass
-class MarkConflictException(Exception):
+class MarkConflictError(Exception):
     """
     Marking the path would have caused a conflict.
 
@@ -24,8 +23,10 @@ class MarkConflictException(Exception):
     usually not possible.
     """
 
-    path: PurePath
-    collides_with: PurePath
+    def __init__(self, path: PurePath, collides_with: PurePath):
+        super().__init__(f"File at {path} collides with previous file at {collides_with}")
+        self.path = path
+        self.collides_with = collides_with
 
 
 # TODO Use PurePath.is_relative_to when updating to 3.9
@@ -58,16 +59,16 @@ class Report:
         """
         Mark a previously unknown file as known.
 
-        May throw a MarkDuplicateException or a MarkConflictException. For more
-        detail, see the respective exception's docstring.
+        May throw a MarkDuplicateError or a MarkConflictError. For more detail,
+        see the respective exception's docstring.
         """
 
         for other in self.marked:
             if path == other:
-                raise MarkDuplicateException(path)
+                raise MarkDuplicateError(path)
 
             if is_relative_to(path, other) or is_relative_to(other, path):
-                raise MarkConflictException(path, other)
+                raise MarkConflictError(path, other)
 
         self.known_files.add(path)
 

From 9889ce6b57b041e9ba84c003855876a155273ba9 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 22 May 2021 21:05:32 +0200
Subject: [PATCH 167/524] Improve PFERD error handling

---
 PFERD/__main__.py |  7 ++++-
 PFERD/pferd.py    | 65 +++++++++++++++++++----------------------------
 2 files changed, 32 insertions(+), 40 deletions(-)

diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index d588836..9a307b2 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -115,7 +115,12 @@ def main() -> None:
         dump_config(args, config)
         exit()
 
-    pferd = Pferd(config)
+    try:
+        pferd = Pferd(config)
+    except ConfigOptionError as e:
+        log.error(str(e))
+        exit(1)
+
     try:
         asyncio.run(pferd.run())
     except KeyboardInterrupt:
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index 10cd1c2..20c770f 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -1,71 +1,58 @@
 from typing import Dict
 
-from rich import print
 from rich.markup import escape
 
 from .authenticator import Authenticator
 from .authenticators import AUTHENTICATORS
-from .config import Config
-from .crawler import Crawler
+from .config import Config, ConfigOptionError
+from .crawler import Crawler, CrawlError
 from .crawlers import CRAWLERS
-
-
-class PferdLoadException(Exception):
-    pass
+from .logging import log
 
 
 class Pferd:
     def __init__(self, config: Config):
+        """
+        May throw ConfigOptionError.
+        """
+
         self._config = config
         self._authenticators: Dict[str, Authenticator] = {}
         self._crawlers: Dict[str, Crawler] = {}
 
+        self._load_authenticators()
+        self._load_crawlers()
+
     def _load_authenticators(self) -> None:
-        abort = False
         for name, section in self._config.authenticator_sections():
-            print(f"[bold bright_cyan]Loading[/] {escape(name)}")
-            authenticator_type = section.get("type")
-            authenticator_constructor = AUTHENTICATORS.get(authenticator_type)
+            log.print(f"[bold bright_cyan]Loading[/] {escape(name)}")
+            auth_type = section.get("type")
+            authenticator_constructor = AUTHENTICATORS.get(auth_type)
             if authenticator_constructor is None:
-                abort = True
-                t = escape(repr(authenticator_type))
-                print(f"[red]Error: Unknown authenticator type {t}")
-                continue
+                raise ConfigOptionError(name, "type", f"Unknown authenticator type: {auth_type!r}")
 
             authenticator = authenticator_constructor(name, section, self._config)
             self._authenticators[name] = authenticator
 
-        if abort:
-            raise PferdLoadException()
-
     def _load_crawlers(self) -> None:
-        abort = False
         for name, section in self._config.crawler_sections():
-            print(f"[bold bright_cyan]Loading[/] {escape(name)}")
-            crawler_type = section.get("type")
-            crawler_constructor = CRAWLERS.get(crawler_type)
+            log.print(f"[bold bright_cyan]Loading[/] {escape(name)}")
+            crawl_type = section.get("type")
+            crawler_constructor = CRAWLERS.get(crawl_type)
             if crawler_constructor is None:
-                abort = True
-                t = escape(repr(crawler_type))
-                print(f"[red]Error: Unknown crawler type {t}")
-                continue
+                raise ConfigOptionError(name, "type", f"Unknown crawler type: {crawl_type!r}")
 
             crawler = crawler_constructor(name, section, self._config, self._authenticators)
             self._crawlers[name] = crawler
 
-        if abort:
-            raise PferdLoadException()
-
     async def run(self) -> None:
-        try:
-            self._load_authenticators()
-            self._load_crawlers()
-        except PferdLoadException:
-            print("[bold red]Could not initialize PFERD properly")
-            exit(1)
-
         for name, crawler in self._crawlers.items():
-            print()
-            print(f"[bold bright_cyan]Running[/] {escape(name)}")
+            log.print("")
+            log.print(f"[bold bright_cyan]Running[/] {escape(name)}")
 
-            await crawler.run()
+            try:
+                await crawler.run()
+            except CrawlError as e:
+                log.error(str(e))
+            except Exception:
+                log.unexpected_exception()

From 098ac45758a73f145fc15b67d911b1c9698a8f2d Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 22 May 2021 21:06:13 +0200
Subject: [PATCH 168/524] Remove deprecated repeat decorators

---
 PFERD/crawler.py | 30 ------------------------------
 1 file changed, 30 deletions(-)

diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index 1269ba2..f5286b8 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -316,33 +316,3 @@ class HttpCrawler(Crawler):
             cookie_jar.save(self._cookie_jar_path)
         except Exception:
             log.print(f"[bold red]Warning:[/] Failed to save cookies to {escape(str(self.COOKIE_FILE))}")
-
-
-def repeat(attempts: int) -> Callable[[Wrapped], Wrapped]:
-    """Deprecated."""
-    def decorator(f: Wrapped) -> Wrapped:
-        def wrapper(self: "Crawler", *args: Any, **kwargs: Any) -> None:
-            for _ in range(attempts - 1):
-                try:
-                    f(self, *args, **kwargs)
-                    return
-                except Exception:
-                    pass
-            f(self, *args, **kwargs)
-        return wrapper  # type: ignore
-    return decorator
-
-
-def arepeat(attempts: int) -> Callable[[AWrapped], AWrapped]:
-    """Deprecated."""
-    def decorator(f: AWrapped) -> AWrapped:
-        async def wrapper(self: "Crawler", *args: Any, **kwargs: Any) -> None:
-            for _ in range(attempts - 1):
-                try:
-                    await f(self, *args, **kwargs)
-                    return
-                except Exception:
-                    pass
-            await f(self, *args, **kwargs)
-        return wrapper  # type: ignore
-    return decorator

From ec95dda18f0f65dd6c5638aba0eb5a9c86f03a3a Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 22 May 2021 21:36:53 +0200
Subject: [PATCH 169/524] Unify crawling and downloading steps

Now, the progress bar, limiter etc. for downloading and crawling are all handled
via the reusable CrawlToken and DownloadToken context managers.
---
 PFERD/crawler.py    | 79 +++++++++++++++++++++++++++------------------
 PFERD/output_dir.py |  5 ++-
 2 files changed, 49 insertions(+), 35 deletions(-)

diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index f5286b8..42f66a3 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -1,10 +1,8 @@
 import asyncio
 from abc import ABC, abstractmethod
-from contextlib import asynccontextmanager
 from datetime import datetime
 from pathlib import Path, PurePath
-# TODO In Python 3.9 and above, AsyncContextManager is deprecated
-from typing import Any, AsyncContextManager, AsyncIterator, Awaitable, Callable, Dict, Optional, TypeVar
+from typing import Any, Awaitable, Callable, Dict, Optional, Tuple, TypeVar
 
 import aiohttp
 from rich.markup import escape
@@ -13,9 +11,10 @@ from .authenticator import Authenticator
 from .config import Config, Section
 from .limiter import Limiter
 from .logging import ProgressBar, log
-from .output_dir import FileSink, OnConflict, OutputDirectory, OutputDirError, Redownload
+from .output_dir import FileSink, FileSinkToken, OnConflict, OutputDirectory, OutputDirError, Redownload
 from .report import MarkConflictError, MarkDuplicateError
 from .transformer import Transformer
+from .utils import ReusableAsyncContextManager
 from .version import NAME, VERSION
 
 
@@ -88,6 +87,36 @@ def anoncritical(f: AWrapped) -> AWrapped:
     return wrapper  # type: ignore
 
 
+class CrawlToken(ReusableAsyncContextManager[ProgressBar]):
+    def __init__(self, limiter: Limiter, desc: str):
+        super().__init__()
+
+        self._limiter = limiter
+        self._desc = desc
+
+    async def _on_aenter(self) -> ProgressBar:
+        await self._stack.enter_async_context(self._limiter.limit_crawl())
+        bar = self._stack.enter_context(log.crawl_bar(self._desc))
+
+        return bar
+
+
+class DownloadToken(ReusableAsyncContextManager[Tuple[ProgressBar, FileSink]]):
+    def __init__(self, limiter: Limiter, fs_token: FileSinkToken, desc: str):
+        super().__init__()
+
+        self._limiter = limiter
+        self._fs_token = fs_token
+        self._desc = desc
+
+    async def _on_aenter(self) -> Tuple[ProgressBar, FileSink]:
+        await self._stack.enter_async_context(self._limiter.limit_crawl())
+        sink = await self._stack.enter_async_context(self._fs_token)
+        bar = self._stack.enter_context(log.crawl_bar(self._desc))
+
+        return bar, sink
+
+
 class CrawlerSection(Section):
     def output_dir(self, name: str) -> Path:
         # TODO Use removeprefix() after switching to 3.9
@@ -190,30 +219,12 @@ class Crawler(ABC):
             section.on_conflict(),
         )
 
-    @asynccontextmanager
-    async def crawl_bar(
-            self,
-            path: PurePath,
-            total: Optional[int] = None,
-    ) -> AsyncIterator[ProgressBar]:
+    async def crawl(self, path: PurePath) -> Optional[CrawlToken]:
+        if self._transformer.transform(path) is None:
+            return None
+
         desc = f"[bold bright_cyan]Crawling[/] {escape(str(path))}"
-        async with self._limiter.limit_crawl():
-            with log.crawl_bar(desc, total=total) as bar:
-                yield bar
-
-    @asynccontextmanager
-    async def download_bar(
-            self,
-            path: PurePath,
-            total: Optional[int] = None,
-    ) -> AsyncIterator[ProgressBar]:
-        desc = f"[bold bright_cyan]Downloading[/] {escape(str(path))}"
-        async with self._limiter.limit_download():
-            with log.download_bar(desc, total=total) as bar:
-                yield bar
-
-    def should_crawl(self, path: PurePath) -> bool:
-        return self._transformer.transform(path) is not None
+        return CrawlToken(self._limiter, desc)
 
     async def download(
             self,
@@ -221,13 +232,17 @@ class Crawler(ABC):
             mtime: Optional[datetime] = None,
             redownload: Optional[Redownload] = None,
             on_conflict: Optional[OnConflict] = None,
-    ) -> Optional[AsyncContextManager[FileSink]]:
+    ) -> Optional[DownloadToken]:
         transformed_path = self._transformer.transform(path)
         if transformed_path is None:
             return None
 
-        return await self._output_dir.download(
-            transformed_path, mtime, redownload, on_conflict)
+        fs_token = await self._output_dir.download(transformed_path, mtime, redownload, on_conflict)
+        if fs_token is None:
+            return None
+
+        desc = f"[bold bright_cyan]Downloading[/] {escape(str(path))}"
+        return DownloadToken(self._limiter, fs_token, desc)
 
     async def cleanup(self) -> None:
         await self._output_dir.cleanup()
@@ -239,10 +254,10 @@ class Crawler(ABC):
         """
 
         with log.show_progress():
-            await self.crawl()
+            await self._run()
 
     @abstractmethod
-    async def crawl(self) -> None:
+    async def _run(self) -> None:
         """
         Overwrite this function if you are writing a crawler.
 
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index ee4910e..fef6914 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -8,8 +8,7 @@ from dataclasses import dataclass
 from datetime import datetime
 from enum import Enum
 from pathlib import Path, PurePath
-# TODO In Python 3.9 and above, AsyncContextManager is deprecated
-from typing import AsyncContextManager, BinaryIO, Iterator, Optional, Tuple
+from typing import BinaryIO, Iterator, Optional, Tuple
 
 from rich.markup import escape
 
@@ -307,7 +306,7 @@ class OutputDirectory:
             mtime: Optional[datetime] = None,
             redownload: Optional[Redownload] = None,
             on_conflict: Optional[OnConflict] = None,
-    ) -> Optional[AsyncContextManager[FileSink]]:
+    ) -> Optional[FileSinkToken]:
         """
         May throw an OutputDirError, a MarkDuplicateError or a
         MarkConflictError.

From e21795ee357f06526915dcc829c8ff78bfb0f7ca Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 22 May 2021 21:45:51 +0200
Subject: [PATCH 170/524] Make file cleanup part of default crawler behaviour

---
 PFERD/crawler.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index 42f66a3..ec0e147 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -244,8 +244,15 @@ class Crawler(ABC):
         desc = f"[bold bright_cyan]Downloading[/] {escape(str(path))}"
         return DownloadToken(self._limiter, fs_token, desc)
 
-    async def cleanup(self) -> None:
-        await self._output_dir.cleanup()
+    async def _cleanup(self) -> None:
+        log.explain_topic("Decision: Clean up files?")
+        if self.error_free:
+            log.explain("No warnings or errors occurred during this run")
+            log.explain("Cleaning up files")
+            await self._output_dir.cleanup()
+        else:
+            log.explain("Warnings or errors occurred during this run")
+            log.explain("Not cleaning up files")
 
     async def run(self) -> None:
         """
@@ -255,6 +262,7 @@ class Crawler(ABC):
 
         with log.show_progress():
             await self._run()
+            await self._cleanup()
 
     @abstractmethod
     async def _run(self) -> None:

From ae3d80664cee6b03023c854600f157fd83c1c87f Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 22 May 2021 21:46:05 +0200
Subject: [PATCH 171/524] Update local crawler to new crawler structure

---
 PFERD/crawlers/local.py | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/PFERD/crawlers/local.py b/PFERD/crawlers/local.py
index 8cfc79a..176f36d 100644
--- a/PFERD/crawlers/local.py
+++ b/PFERD/crawlers/local.py
@@ -56,10 +56,8 @@ class LocalCrawler(Crawler):
         else:
             self._block_size = 1024**2  # 1 MiB
 
-    async def crawl(self) -> None:
+    async def _run(self) -> None:
         await self._crawl_path(self._target, PurePath())
-        if self.error_free:
-            await self.cleanup()
 
     @anoncritical
     async def _crawl_path(self, path: Path, pure: PurePath) -> None:
@@ -69,9 +67,13 @@ class LocalCrawler(Crawler):
             await self._crawl_file(path, pure)
 
     async def _crawl_dir(self, path: Path, pure: PurePath) -> None:
+        cl = await self.crawl(pure)
+        if not cl:
+            return
+
         tasks = []
 
-        async with self.crawl_bar(pure):
+        async with cl:
             await asyncio.sleep(random.uniform(
                 0.5 * self._crawl_delay,
                 self._crawl_delay,
@@ -79,8 +81,7 @@ class LocalCrawler(Crawler):
 
             for child in path.iterdir():
                 pure_child = pure / child.name
-                if self.should_crawl(child):
-                    tasks.append(self._crawl_path(child, pure_child))
+                tasks.append(self._crawl_path(child, pure_child))
 
         await asyncio.gather(*tasks)
 
@@ -91,7 +92,7 @@ class LocalCrawler(Crawler):
         if not dl:
             return
 
-        async with self.download_bar(pure) as bar:
+        async with dl as (bar, sink):
             await asyncio.sleep(random.uniform(
                 0.5 * self._download_delay,
                 self._download_delay,
@@ -99,19 +100,18 @@ class LocalCrawler(Crawler):
 
             bar.set_total(stat.st_size)
 
-            async with dl as sink:
-                with open(path, "rb") as f:
-                    while True:
-                        data = f.read(self._block_size)
-                        if len(data) == 0:
-                            break
+            with open(path, "rb") as f:
+                while True:
+                    data = f.read(self._block_size)
+                    if len(data) == 0:
+                        break
 
-                        sink.file.write(data)
-                        bar.advance(len(data))
+                    sink.file.write(data)
+                    bar.advance(len(data))
 
-                        if self._download_speed:
-                            delay = self._block_size / self._download_speed
-                            delay = random.uniform(0.8 * delay, 1.2 * delay)
-                            await asyncio.sleep(delay)
+                    if self._download_speed:
+                        delay = self._block_size / self._download_speed
+                        delay = random.uniform(0.8 * delay, 1.2 * delay)
+                        await asyncio.sleep(delay)
 
-                    sink.done()
+                sink.done()

From 8fad8edc1e2ddee4fb46fa85b9f23a65a421d196 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 22 May 2021 20:02:15 +0000
Subject: [PATCH 172/524] Remove duplicated beautifulsoup4 dependency

---
 setup.cfg | 1 -
 1 file changed, 1 deletion(-)

diff --git a/setup.cfg b/setup.cfg
index cb85ab0..431c3b9 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -9,7 +9,6 @@ install_requires =
   aiohttp>=3.7.4.post0
   beautifulsoup4>=4.9.3
   rich>=10.1.0
-  beautifulsoup4>=4.9.3
 
 [options.entry_points]
 console_scripts =

From 662191eca9fa38172e541d9eaa7f217301aaef19 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 22 May 2021 20:25:58 +0000
Subject: [PATCH 173/524] Fix crash as soon as first cl or dl token was
 acquired

---
 PFERD/__main__.py | 5 ++---
 PFERD/pferd.py    | 9 ++++++---
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index 9a307b2..0e84e34 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -117,12 +117,11 @@ def main() -> None:
 
     try:
         pferd = Pferd(config)
+        asyncio.run(pferd.run())
     except ConfigOptionError as e:
+        log.unlock()
         log.error(str(e))
         exit(1)
-
-    try:
-        asyncio.run(pferd.run())
     except KeyboardInterrupt:
         log.unlock()
         log.explain_topic("Interrupted, exiting immediately")
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index 20c770f..4aee043 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -20,9 +20,6 @@ class Pferd:
         self._authenticators: Dict[str, Authenticator] = {}
         self._crawlers: Dict[str, Crawler] = {}
 
-        self._load_authenticators()
-        self._load_crawlers()
-
     def _load_authenticators(self) -> None:
         for name, section in self._config.authenticator_sections():
             log.print(f"[bold bright_cyan]Loading[/] {escape(name)}")
@@ -46,6 +43,12 @@ class Pferd:
             self._crawlers[name] = crawler
 
     async def run(self) -> None:
+        # These two functions must run inside the same event loop as the
+        # crawlers, so that any new objects (like Conditions or Futures) can
+        # obtain the correct event loop.
+        self._load_authenticators()
+        self._load_crawlers()
+
         for name, crawler in self._crawlers.items():
             log.print("")
             log.print(f"[bold bright_cyan]Running[/] {escape(name)}")

From 1bbc0b705f29452ed105d95687343173b8af60d7 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 22 May 2021 20:38:56 +0000
Subject: [PATCH 174/524] Improve transformer error handling

---
 PFERD/__main__.py    |  5 +++++
 PFERD/transformer.py | 33 ++++++++++++++++++---------------
 2 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index 0e84e34..5cc6ef6 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -7,6 +7,7 @@ from .cli import PARSER, load_default_section
 from .config import Config, ConfigDumpError, ConfigLoadError, ConfigOptionError
 from .logging import log
 from .pferd import Pferd
+from .transformer import RuleParseError
 from .version import NAME, VERSION
 
 
@@ -122,6 +123,10 @@ def main() -> None:
         log.unlock()
         log.error(str(e))
         exit(1)
+    except RuleParseError as e:
+        log.unlock()
+        e.pretty_print()
+        exit(1)
     except KeyboardInterrupt:
         log.unlock()
         log.explain_topic("Interrupted, exiting immediately")
diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index 135baf2..293274a 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -6,10 +6,11 @@
 import ast
 import re
 from abc import ABC, abstractmethod
-from dataclasses import dataclass
 from pathlib import PurePath
 from typing import Dict, Optional, Union
 
+from .logging import log
+
 
 class Rule(ABC):
     @abstractmethod
@@ -122,16 +123,18 @@ class ReRule(Rule):
         return False
 
 
-@dataclass
-class RuleParseException(Exception):
-    line: "Line"
-    reason: str
+class RuleParseError(Exception):
+    def __init__(self, line: "Line", reason: str):
+        super().__init__(f"Error in rule on line {line.line_nr}, column {line.index}: {reason}")
+
+        self.line = line
+        self.reason = reason
 
     def pretty_print(self) -> None:
-        print(f"Error parsing rule on line {self.line.line_nr}:")
-        print(self.line.line)
+        log.error(f"Error parsing rule on line {self.line.line_nr}:")
+        log.error_contd(self.line.line)
         spaces = " " * self.line.index
-        print(f"{spaces}^--- {self.reason}")
+        log.error_contd(f"{spaces}^--- {self.reason}")
 
 
 class Line:
@@ -170,7 +173,7 @@ class Line:
             if self.get() == char:
                 self.advance()
             else:
-                raise RuleParseException(self, f"Expected {char!r}")
+                raise RuleParseError(self, f"Expected {char!r}")
 
 
 QUOTATION_MARKS = {'"', "'"}
@@ -186,7 +189,7 @@ def parse_string_literal(line: Line) -> str:
     if quotation_mark not in QUOTATION_MARKS:
         # This should never happen as long as this function is only called from
         # parse_string.
-        raise RuleParseException(line, "Invalid quotation mark")
+        raise RuleParseError(line, "Invalid quotation mark")
     line.advance()
 
     while c := line.get():
@@ -204,7 +207,7 @@ def parse_string_literal(line: Line) -> str:
         else:
             line.advance()
 
-    raise RuleParseException(line, "Expected end of string literal")
+    raise RuleParseError(line, "Expected end of string literal")
 
 
 def parse_until_space_or_eol(line: Line) -> str:
@@ -235,12 +238,12 @@ def parse_arrow(line: Line) -> str:
     while True:
         c = line.get()
         if not c:
-            raise RuleParseException(line, "Expected rest of arrow")
+            raise RuleParseError(line, "Expected rest of arrow")
         elif c == "-":
             line.advance()
             c = line.get()
             if not c:
-                raise RuleParseException(line, "Expected rest of arrow")
+                raise RuleParseError(line, "Expected rest of arrow")
             elif c == ">":
                 line.advance()
                 break  # End of arrow
@@ -267,7 +270,7 @@ def parse_rule(line: Line) -> Rule:
     left = parse_string(line)
     if isinstance(left, bool):
         line.index = leftindex
-        raise RuleParseException(line, "Left side can't be '!'")
+        raise RuleParseError(line, "Left side can't be '!'")
 
     # Parse arrow
     parse_whitespace(line)
@@ -301,7 +304,7 @@ def parse_rule(line: Line) -> Rule:
         return NameRule(ReRule(left, right))
     else:
         line.index = arrowindex + 1  # For nicer error message
-        raise RuleParseException(line, "Invalid arrow name")
+        raise RuleParseError(line, "Invalid arrow name")
 
 
 class Transformer:

From 9cb2b68f09504704baf738e71e29ba8f07ec7429 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 22 May 2021 20:39:29 +0000
Subject: [PATCH 175/524] Fix arrow parsing error messages

---
 PFERD/transformer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index 293274a..130473a 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -249,7 +249,7 @@ def parse_arrow(line: Line) -> str:
                 break  # End of arrow
             else:
                 name.append("-")
-                name.append(c)
+                continue
         else:
             name.append(c)
 
@@ -304,7 +304,7 @@ def parse_rule(line: Line) -> Rule:
         return NameRule(ReRule(left, right))
     else:
         line.index = arrowindex + 1  # For nicer error message
-        raise RuleParseError(line, "Invalid arrow name")
+        raise RuleParseError(line, f"Invalid arrow name {arrowname!r}")
 
 
 class Transformer:

From 62f0f7bfc5d539ebac21341dbdb3873f86cef5df Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 22 May 2021 20:39:57 +0000
Subject: [PATCH 176/524] Explain crawling and partially explain downloading

---
 PFERD/crawler.py     | 17 ++++++++++++++---
 PFERD/transformer.py | 10 ++++++++--
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index ec0e147..4095c53 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -220,9 +220,14 @@ class Crawler(ABC):
         )
 
     async def crawl(self, path: PurePath) -> Optional[CrawlToken]:
+        log.explain_topic(f"Decision: Crawl {path}")
+
         if self._transformer.transform(path) is None:
+            log.explain("Answer: No")
             return None
 
+        log.explain("Answer: Yes")
+
         desc = f"[bold bright_cyan]Crawling[/] {escape(str(path))}"
         return CrawlToken(self._limiter, desc)
 
@@ -233,26 +238,32 @@ class Crawler(ABC):
             redownload: Optional[Redownload] = None,
             on_conflict: Optional[OnConflict] = None,
     ) -> Optional[DownloadToken]:
+        log.explain_topic(f"Decision: Download {path}")
+
         transformed_path = self._transformer.transform(path)
         if transformed_path is None:
+            log.explain("Answer: No")
             return None
 
         fs_token = await self._output_dir.download(transformed_path, mtime, redownload, on_conflict)
         if fs_token is None:
+            log.explain("Answer: No")
             return None
 
+        log.explain("Answer: Yes")
+
         desc = f"[bold bright_cyan]Downloading[/] {escape(str(path))}"
         return DownloadToken(self._limiter, fs_token, desc)
 
     async def _cleanup(self) -> None:
-        log.explain_topic("Decision: Clean up files?")
+        log.explain_topic("Decision: Clean up files")
         if self.error_free:
             log.explain("No warnings or errors occurred during this run")
-            log.explain("Cleaning up files")
+            log.explain("Answer: Yes")
             await self._output_dir.cleanup()
         else:
             log.explain("Warnings or errors occurred during this run")
-            log.explain("Not cleaning up files")
+            log.explain("Answer: No")
 
     async def run(self) -> None:
         """
diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index 130473a..d7d3be8 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -317,16 +317,22 @@ class Transformer:
         for i, line in enumerate(rules.split("\n")):
             line = line.strip()
             if line:
-                self._rules.append(parse_rule(Line(line, i)))
+                rule = parse_rule(Line(line, i))
+                self._rules.append((line, rule))
 
     def transform(self, path: PurePath) -> Optional[PurePath]:
-        for rule in self._rules:
+        for i, (line, rule) in enumerate(self._rules):
+            log.explain(f"Testing rule {i}: {line}")
+
             result = rule.transform(path)
             if isinstance(result, PurePath):
+                log.explain(f"Match! Transformed to {result}")
                 return result
             elif result:  # Exclamation mark
+                log.explain("Match! Ignored")
                 return None
             else:
                 continue
 
+        log.explain("No rule matched, path is unchanged")
         return path

From e724ff7c93b3f05d4125992a0101da2676f5239c Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 22 May 2021 20:44:59 +0000
Subject: [PATCH 177/524] Fix normal arrow

---
 PFERD/transformer.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index d7d3be8..7b5745b 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -51,6 +51,7 @@ class NormalRule(Rule):
         if left_parts:
             return None
 
+        path_parts.reverse()
         return PurePath(*path_parts)
 
     def transform(self, path: PurePath) -> Union[PurePath, bool]:

From 953a1bba93ba67886f4d8f67345801c055f1227d Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 22 May 2021 23:18:05 +0200
Subject: [PATCH 178/524] Adjust to new crawl / download names

---
 PFERD/crawlers/ilias/kit_ilias_web_crawler.py | 49 +++++++++----------
 1 file changed, 23 insertions(+), 26 deletions(-)

diff --git a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
index 0ca6565..f2125aa 100644
--- a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
@@ -116,7 +116,7 @@ class KitIliasWebCrawler(HttpCrawler):
         self._link_file_redirect_delay = section.link_file_redirect_delay()
         self._link_file_use_plaintext = section.link_file_use_plaintext()
 
-    async def crawl(self) -> None:
+    async def _run(self) -> None:
         if isinstance(self._target, int):
             log.explain_topic(f"Inferred crawl target: Course with id {self._target}")
             await self._crawl_course(self._target)
@@ -127,11 +127,6 @@ class KitIliasWebCrawler(HttpCrawler):
             log.explain_topic(f"Inferred crawl target: URL {escape(self._target)}")
             await self._crawl_url(self._target)
 
-        if self.error_free:
-            await self.cleanup()
-        else:
-            log.explain_topic("Skipping file cleanup as errors occurred earlier")
-
     async def _crawl_course(self, course_id: int) -> None:
         # Start crawling at the given course
         root_url = url_set_query_param(
@@ -144,10 +139,14 @@ class KitIliasWebCrawler(HttpCrawler):
         await self._crawl_url(self._base_url)
 
     async def _crawl_url(self, url: str, expected_id: Optional[int] = None) -> None:
+        cl = await self.crawl(PurePath("."))
+        if not cl:
+            return
+
         tasks = []
 
         # TODO: Retry this when the crawl and download bar are reworked
-        async with self.crawl_bar(PurePath("Root element")):
+        async with cl:
             soup = await self._get_page(url)
 
             if expected_id is not None:
@@ -165,14 +164,12 @@ class KitIliasWebCrawler(HttpCrawler):
         await asyncio.gather(*tasks)
 
     async def _handle_ilias_page(self, url: str, parent: IliasPageElement, path: PurePath) -> None:
-        # We might not want to crawl this directory-ish page.
-        # This is not in #handle_element, as the download methods check it themselves and therefore
-        # would perform this check twice - messing with the explain output
-        if not self.should_crawl(path):
+        cl = await self.crawl(path)
+        if not cl:
             return
 
         tasks = []
-        async with self.crawl_bar(path):
+        async with cl:
             soup = await self._get_page(url)
             page = IliasPage(soup, url, parent)
 
@@ -189,7 +186,9 @@ class KitIliasWebCrawler(HttpCrawler):
         if element.type == IliasElementType.FILE:
             await self._download_file(element, element_path)
         elif element.type == IliasElementType.FORUM:
-            log.explain_topic(f"Skipping forum at {escape(str(element_path))}")
+            log.explain_topic(f"Decision: Crawl {escape(str(element_path))}")
+            log.explain("Is a forum")
+            log.explain("Answer: No")
         elif element.type == IliasElementType.LINK:
             await self._download_link(element, element_path)
         elif element.type == IliasElementType.VIDEO:
@@ -208,20 +207,19 @@ class KitIliasWebCrawler(HttpCrawler):
         if not dl:
             return
 
-        async with self.download_bar(element_path):
+        async with dl as (bar, sink):
             export_url = element.url.replace("cmd=calldirectlink", "cmd=exportHTML")
             async with self.session.get(export_url) as response:
                 html_page: BeautifulSoup = soupify(await response.read())
                 real_url: str = html_page.select_one("a").get("href").strip()
 
-            async with dl as sink:
-                content = link_template_plain if self._link_file_use_plaintext else link_template_rich
-                content = content.replace("{{link}}", real_url)
-                content = content.replace("{{name}}", element.name)
-                content = content.replace("{{description}}", str(element.description))
-                content = content.replace("{{redirect_delay}}", str(self._link_file_redirect_delay))
-                sink.file.write(content.encode("utf-8"))
-                sink.done()
+            content = link_template_plain if self._link_file_use_plaintext else link_template_rich
+            content = content.replace("{{link}}", real_url)
+            content = content.replace("{{name}}", element.name)
+            content = content.replace("{{description}}", str(element.description))
+            content = content.replace("{{redirect_delay}}", str(self._link_file_redirect_delay))
+            sink.file.write(content.encode("utf-8"))
+            sink.done()
 
     async def _download_video(self, element: IliasPageElement, element_path: PurePath) -> None:
         # Videos will NOT be redownloaded - their content doesn't really change and they are chunky
@@ -229,19 +227,18 @@ class KitIliasWebCrawler(HttpCrawler):
         if not dl:
             return
 
-        async with self.download_bar(element_path) as bar:
+        async with dl as (bar, sink):
             page = IliasPage(await self._get_page(element.url), element.url, element)
             real_element = page.get_child_elements()[0]
 
-            async with dl as sink:
-                await self._stream_from_url(real_element.url, sink, bar)
+            await self._stream_from_url(real_element.url, sink, bar)
 
     async def _download_file(self, element: IliasPageElement, element_path: PurePath) -> None:
         dl = await self.download(element_path, mtime=element.mtime)
         if not dl:
             return
 
-        async with self.download_bar(element_path) as bar, dl as sink:
+        async with dl as (bar, sink):
             await self._stream_from_url(element.url, sink, bar)
 
     async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar) -> None:

From 4d07de0d717195e445a9c0ad881c40d2d6ea79f2 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 22 May 2021 23:20:21 +0200
Subject: [PATCH 179/524] Adjust forum log message in ilias crawler

---
 PFERD/crawlers/ilias/kit_ilias_web_crawler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
index f2125aa..9db9267 100644
--- a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
@@ -187,7 +187,7 @@ class KitIliasWebCrawler(HttpCrawler):
             await self._download_file(element, element_path)
         elif element.type == IliasElementType.FORUM:
             log.explain_topic(f"Decision: Crawl {escape(str(element_path))}")
-            log.explain("Is a forum")
+            log.explain("Forums are not supported")
             log.explain("Answer: No")
         elif element.type == IliasElementType.LINK:
             await self._download_link(element, element_path)

From 3053278721cd6b6e61ff9f6c60eabb41e5f5ba3e Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 22 May 2021 23:23:21 +0200
Subject: [PATCH 180/524] Move HTTP crawler to own file

---
 PFERD/crawler.py                              | 68 -----------------
 PFERD/crawlers/ilias/kit_ilias_web_crawler.py |  3 +-
 PFERD/http_crawler.py                         | 75 +++++++++++++++++++
 3 files changed, 77 insertions(+), 69 deletions(-)
 create mode 100644 PFERD/http_crawler.py

diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index 4095c53..731cfb9 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -1,10 +1,8 @@
-import asyncio
 from abc import ABC, abstractmethod
 from datetime import datetime
 from pathlib import Path, PurePath
 from typing import Any, Awaitable, Callable, Dict, Optional, Tuple, TypeVar
 
-import aiohttp
 from rich.markup import escape
 
 from .authenticator import Authenticator
@@ -15,7 +13,6 @@ from .output_dir import FileSink, FileSinkToken, OnConflict, OutputDirectory, Ou
 from .report import MarkConflictError, MarkDuplicateError
 from .transformer import Transformer
 from .utils import ReusableAsyncContextManager
-from .version import NAME, VERSION
 
 
 class CrawlWarning(Exception):
@@ -285,68 +282,3 @@ class Crawler(ABC):
         """
 
         pass
-
-
-class HttpCrawler(Crawler):
-    COOKIE_FILE = PurePath(".cookies")
-
-    def __init__(
-            self,
-            name: str,
-            section: CrawlerSection,
-            config: Config,
-    ) -> None:
-        super().__init__(name, section, config)
-
-        self._cookie_jar_path = self._output_dir.resolve(self.COOKIE_FILE)
-        self._output_dir.register_reserved(self.COOKIE_FILE)
-        self._authentication_id = 0
-        self._authentication_lock = asyncio.Lock()
-
-    async def prepare_request(self) -> int:
-        # We acquire the lock here to ensure we wait for any concurrent authenticate to finish.
-        # This should reduce the amount of requests we make: If an authentication is in progress
-        # all future requests wait for authentication to complete.
-        async with self._authentication_lock:
-            return self._authentication_id
-
-    async def authenticate(self, current_id: int) -> None:
-        async with self._authentication_lock:
-            # Another thread successfully called authenticate in between
-            # We do not want to perform auth again, so return here. We can
-            # assume auth suceeded as authenticate will throw an error if
-            # it failed.
-            if current_id != self._authentication_id:
-                return
-            await self._authenticate()
-            self._authentication_id += 1
-
-    async def _authenticate(self) -> None:
-        """
-        Performs authentication. This method must only return normally if authentication suceeded.
-        In all other cases it mus either retry internally or throw a terminal exception.
-        """
-        raise RuntimeError("_authenticate() was called but crawler doesn't provide an implementation")
-
-    async def run(self) -> None:
-        cookie_jar = aiohttp.CookieJar()
-
-        try:
-            cookie_jar.load(self._cookie_jar_path)
-        except Exception:
-            pass
-
-        async with aiohttp.ClientSession(
-                headers={"User-Agent": f"{NAME}/{VERSION}"},
-                cookie_jar=cookie_jar,
-        ) as session:
-            self.session = session
-            try:
-                await super().run()
-            finally:
-                del self.session
-
-        try:
-            cookie_jar.save(self._cookie_jar_path)
-        except Exception:
-            log.print(f"[bold red]Warning:[/] Failed to save cookies to {escape(str(self.COOKIE_FILE))}")
diff --git a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
index 9db9267..7ffa993 100644
--- a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
@@ -11,7 +11,8 @@ from rich.markup import escape
 
 from PFERD.authenticators import Authenticator
 from PFERD.config import Config
-from PFERD.crawler import CrawlError, CrawlerSection, CrawlWarning, HttpCrawler, anoncritical
+from PFERD.crawler import CrawlError, CrawlerSection, CrawlWarning, anoncritical
+from PFERD.http_crawler import HttpCrawler
 from PFERD.logging import ProgressBar, log
 from PFERD.output_dir import FileSink, Redownload
 from PFERD.utils import soupify, url_set_query_param
diff --git a/PFERD/http_crawler.py b/PFERD/http_crawler.py
new file mode 100644
index 0000000..2b025e3
--- /dev/null
+++ b/PFERD/http_crawler.py
@@ -0,0 +1,75 @@
+import asyncio
+from pathlib import PurePath
+
+import aiohttp
+from rich.markup import escape
+
+from .config import Config
+from .crawler import Crawler, CrawlerSection
+from .logging import log
+from .version import NAME, VERSION
+
+
+class HttpCrawler(Crawler):
+    COOKIE_FILE = PurePath(".cookies")
+
+    def __init__(
+            self,
+            name: str,
+            section: CrawlerSection,
+            config: Config,
+    ) -> None:
+        super().__init__(name, section, config)
+
+        self._cookie_jar_path = self._output_dir.resolve(self.COOKIE_FILE)
+        self._output_dir.register_reserved(self.COOKIE_FILE)
+        self._authentication_id = 0
+        self._authentication_lock = asyncio.Lock()
+
+    async def prepare_request(self) -> int:
+        # We acquire the lock here to ensure we wait for any concurrent authenticate to finish.
+        # This should reduce the amount of requests we make: If an authentication is in progress
+        # all future requests wait for authentication to complete.
+        async with self._authentication_lock:
+            return self._authentication_id
+
+    async def authenticate(self, current_id: int) -> None:
+        async with self._authentication_lock:
+            # Another thread successfully called authenticate in between
+            # We do not want to perform auth again, so return here. We can
+            # assume auth suceeded as authenticate will throw an error if
+            # it failed.
+            if current_id != self._authentication_id:
+                return
+            await self._authenticate()
+            self._authentication_id += 1
+
+    async def _authenticate(self) -> None:
+        """
+        Performs authentication. This method must only return normally if authentication suceeded.
+        In all other cases it mus either retry internally or throw a terminal exception.
+        """
+        raise RuntimeError("_authenticate() was called but crawler doesn't provide an implementation")
+
+    async def run(self) -> None:
+        cookie_jar = aiohttp.CookieJar()
+
+        try:
+            cookie_jar.load(self._cookie_jar_path)
+        except Exception:
+            pass
+
+        async with aiohttp.ClientSession(
+                headers={"User-Agent": f"{NAME}/{VERSION}"},
+                cookie_jar=cookie_jar,
+        ) as session:
+            self.session = session
+            try:
+                await super().run()
+            finally:
+                del self.session
+
+        try:
+            cookie_jar.save(self._cookie_jar_path)
+        except Exception:
+            log.print(f"[bold red]Warning:[/] Failed to save cookies to {escape(str(self.COOKIE_FILE))}")

From adfdc302d7b0a4f2a24f10afc6f360d9b419e5ff Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 22 May 2021 23:30:32 +0200
Subject: [PATCH 181/524] Save cookies after successful authentication in HTTP
 crawler

---
 PFERD/http_crawler.py | 31 +++++++++++++++++++++++--------
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/PFERD/http_crawler.py b/PFERD/http_crawler.py
index 2b025e3..41bf612 100644
--- a/PFERD/http_crawler.py
+++ b/PFERD/http_crawler.py
@@ -1,5 +1,6 @@
 import asyncio
 from pathlib import PurePath
+from typing import Optional
 
 import aiohttp
 from rich.markup import escape
@@ -25,6 +26,7 @@ class HttpCrawler(Crawler):
         self._output_dir.register_reserved(self.COOKIE_FILE)
         self._authentication_id = 0
         self._authentication_lock = asyncio.Lock()
+        self._current_cookie_jar: Optional[aiohttp.CookieJar] = None
 
     async def prepare_request(self) -> int:
         # We acquire the lock here to ensure we wait for any concurrent authenticate to finish.
@@ -43,6 +45,9 @@ class HttpCrawler(Crawler):
                 return
             await self._authenticate()
             self._authentication_id += 1
+            # Saving the cookies after the first auth ensures we won't need to re-authenticate
+            # on the next run, should this one be aborted or crash
+            await self._save_cookies()
 
     async def _authenticate(self) -> None:
         """
@@ -51,17 +56,29 @@ class HttpCrawler(Crawler):
         """
         raise RuntimeError("_authenticate() was called but crawler doesn't provide an implementation")
 
-    async def run(self) -> None:
-        cookie_jar = aiohttp.CookieJar()
+    async def _save_cookies(self) -> None:
+        log.explain_topic("Saving cookies")
+        if not self._current_cookie_jar:
+            log.explain("No cookie jar - save aborted")
+            return
 
         try:
-            cookie_jar.load(self._cookie_jar_path)
+            self._current_cookie_jar.save(self._cookie_jar_path)
+            log.explain(f"Cookies saved to {escape(str(self.COOKIE_FILE))}")
+        except Exception:
+            log.print(f"[bold red]Warning:[/] Failed to save cookies to {escape(str(self.COOKIE_FILE))}")
+
+    async def run(self) -> None:
+        self._current_cookie_jar = aiohttp.CookieJar()
+
+        try:
+            self._current_cookie_jar.load(self._cookie_jar_path)
         except Exception:
             pass
 
         async with aiohttp.ClientSession(
                 headers={"User-Agent": f"{NAME}/{VERSION}"},
-                cookie_jar=cookie_jar,
+                cookie_jar=self._current_cookie_jar,
         ) as session:
             self.session = session
             try:
@@ -69,7 +86,5 @@ class HttpCrawler(Crawler):
             finally:
                 del self.session
 
-        try:
-            cookie_jar.save(self._cookie_jar_path)
-        except Exception:
-            log.print(f"[bold red]Warning:[/] Failed to save cookies to {escape(str(self.COOKIE_FILE))}")
+        # They are saved in authenticate, but a final save won't hurt
+        await self._save_cookies()

From 8ac85ea0bd6e2efcb6a1cd03fb31fde0a091ea90 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 22 May 2021 23:37:34 +0200
Subject: [PATCH 182/524] Fix a few typos in HttpCrawler

---
 PFERD/http_crawler.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/PFERD/http_crawler.py b/PFERD/http_crawler.py
index 41bf612..b9cfeea 100644
--- a/PFERD/http_crawler.py
+++ b/PFERD/http_crawler.py
@@ -37,10 +37,10 @@ class HttpCrawler(Crawler):
 
     async def authenticate(self, current_id: int) -> None:
         async with self._authentication_lock:
-            # Another thread successfully called authenticate in between
-            # We do not want to perform auth again, so return here. We can
-            # assume auth suceeded as authenticate will throw an error if
-            # it failed.
+            # Another thread successfully called authenticate in-between
+            # We do not want to perform auth again, so we return here. We can
+            # assume the other thread suceeded as authenticate will throw an error
+            # if it failed and aborts the crawl process.
             if current_id != self._authentication_id:
                 return
             await self._authenticate()
@@ -52,7 +52,7 @@ class HttpCrawler(Crawler):
     async def _authenticate(self) -> None:
         """
         Performs authentication. This method must only return normally if authentication suceeded.
-        In all other cases it mus either retry internally or throw a terminal exception.
+        In all other cases it must either retry internally or throw a terminal exception.
         """
         raise RuntimeError("_authenticate() was called but crawler doesn't provide an implementation")
 

From 53e031d9f6202cad3b14a80f0e26967a93e1d79d Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 23 May 2021 00:28:27 +0200
Subject: [PATCH 183/524] Reuse dl/cl for I/O retries in ILIAS crawler

---
 PFERD/crawlers/ilias/kit_ilias_web_crawler.py | 133 ++++++++++++++----
 1 file changed, 105 insertions(+), 28 deletions(-)

diff --git a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
index 7ffa993..1bdf5e4 100644
--- a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
@@ -72,11 +72,11 @@ AWrapped = TypeVar("AWrapped", bound=Callable[..., Awaitable[None]])
 
 def _iorepeat(attempts: int, name: str) -> Callable[[AWrapped], AWrapped]:
     def decorator(f: AWrapped) -> AWrapped:
-        async def wrapper(self: "HttpCrawler", *args: Any, **kwargs: Any) -> None:
+        async def wrapper(*args: Any, **kwargs: Any) -> None:
             last_exception: Optional[BaseException] = None
             for round in range(attempts):
                 try:
-                    await f(self, *args, **kwargs)
+                    await f(*args, **kwargs)
                     return
                 except aiohttp.ContentTypeError:  # invalid content type
                     raise CrawlWarning("ILIAS returned an invalid content type")
@@ -97,6 +97,43 @@ def _iorepeat(attempts: int, name: str) -> Callable[[AWrapped], AWrapped]:
     return decorator
 
 
+def _wrap_io_in_warning(name: str) -> Callable[[AWrapped], AWrapped]:
+    """
+    Wraps any I/O exception in a CrawlWarning.
+    """
+    return _iorepeat(1, name)
+
+
+# Crawler control flow:
+#
+#     crawl_desktop -+
+#                    |
+#     crawl_course --+
+#                    |
+#  +- crawl_url    <-+
+#  |
+#  |
+#  |  @_wrap_io_exception     # does not need to retry as children acquire bars
+#  +> crawl_ilias_element -+
+#  ^                       |
+#  |  @_io_repeat          |  # retries internally (before the bar)
+#  +- crawl_ilias_page <---+
+#  |                       |
+#  +> get_page             |  # Handles and retries authentication
+#                          |
+#     @_io_repeat          |  # retries internally (before the bar)
+#  +- download_link    <---+
+#  |                       |
+#  +> resolve_target       |  # Handles and retries authentication
+#                          |
+#     @_io_repeat          |  # retries internally (before the bar)
+#  +- download_video   <---+
+#  |                       |
+#  |  @_io_repeat          |  # retries internally (before the bar)
+#  +- download_file    <---+
+#  |
+#  +> stream_from_url         # Handles and retries authentication
+
 class KitIliasWebCrawler(HttpCrawler):
     def __init__(
             self,
@@ -169,18 +206,30 @@ class KitIliasWebCrawler(HttpCrawler):
         if not cl:
             return
 
-        tasks = []
-        async with cl:
-            soup = await self._get_page(url)
-            page = IliasPage(soup, url, parent)
+        @_iorepeat(3, "crawling folder")
+        async def impl() -> None:
+            assert cl  # The function is only reached when cl is not None
+            tasks = []
+            async with cl:
+                soup = await self._get_page(url)
+                page = IliasPage(soup, url, parent)
 
-            for child in page.get_child_elements():
-                tasks.append(self._handle_ilias_element(path, child))
+                for child in page.get_child_elements():
+                    tasks.append(self._handle_ilias_element(path, child))
 
-        await asyncio.gather(*tasks)
+            # The only point an I/O exception can be thrown is in `get_page`.
+            # If that happens, no task was spawned yet. Therefore, we only retry
+            # this method without having spawned a single task. Due to this we do
+            # not need to cancel anything or worry about this gather call or the forks
+            # further up.
+            await asyncio.gather(*tasks)
+
+        await impl()
 
     @anoncritical
-    @_iorepeat(3, "ILIAS element crawling")
+    # Shouldn't happen but this method must never raise an I/O error as that might interfere with
+    # handle_ilias_page
+    @_wrap_io_in_warning("ilias element handling")
     async def _handle_ilias_element(self, parent_path: PurePath, element: IliasPageElement) -> None:
         element_path = PurePath(parent_path, element.name)
 
@@ -208,19 +257,37 @@ class KitIliasWebCrawler(HttpCrawler):
         if not dl:
             return
 
-        async with dl as (bar, sink):
-            export_url = element.url.replace("cmd=calldirectlink", "cmd=exportHTML")
-            async with self.session.get(export_url) as response:
-                html_page: BeautifulSoup = soupify(await response.read())
-                real_url: str = html_page.select_one("a").get("href").strip()
+        @_iorepeat(3, "link resolving")
+        async def impl() -> None:
+            assert dl  # This function is only reached when dl is not None
+            async with dl as (bar, sink):
+                export_url = element.url.replace("cmd=calldirectlink", "cmd=exportHTML")
+                real_url = await self._resolve_link_target(export_url)
 
-            content = link_template_plain if self._link_file_use_plaintext else link_template_rich
-            content = content.replace("{{link}}", real_url)
-            content = content.replace("{{name}}", element.name)
-            content = content.replace("{{description}}", str(element.description))
-            content = content.replace("{{redirect_delay}}", str(self._link_file_redirect_delay))
-            sink.file.write(content.encode("utf-8"))
-            sink.done()
+                content = link_template_plain if self._link_file_use_plaintext else link_template_rich
+                content = content.replace("{{link}}", real_url)
+                content = content.replace("{{name}}", element.name)
+                content = content.replace("{{description}}", str(element.description))
+                content = content.replace("{{redirect_delay}}", str(self._link_file_redirect_delay))
+                sink.file.write(content.encode("utf-8"))
+                sink.done()
+
+        await impl()
+
+    async def _resolve_link_target(self, export_url: str) -> str:
+        async with self.session.get(export_url, allow_redirects=False) as resp:
+            # No redirect means we were authenticated
+            if hdrs.LOCATION not in resp.headers:
+                return soupify(await resp.read()).select_one("a").get("href").strip()
+
+        self._authenticate()
+
+        async with self.session.get(export_url, allow_redirects=False) as resp:
+            # No redirect means we were authenticated
+            if hdrs.LOCATION not in resp.headers:
+                return soupify(await resp.read()).select_one("a").get("href").strip()
+
+        raise CrawlError("resolve_link_target failed even after authenticating")
 
     async def _download_video(self, element: IliasPageElement, element_path: PurePath) -> None:
         # Videos will NOT be redownloaded - their content doesn't really change and they are chunky
@@ -228,19 +295,29 @@ class KitIliasWebCrawler(HttpCrawler):
         if not dl:
             return
 
-        async with dl as (bar, sink):
-            page = IliasPage(await self._get_page(element.url), element.url, element)
-            real_element = page.get_child_elements()[0]
+        @_iorepeat(3, "video download")
+        async def impl() -> None:
+            assert dl  # The function is only reached when dl is not None
+            async with dl as (bar, sink):
+                page = IliasPage(await self._get_page(element.url), element.url, element)
+                real_element = page.get_child_elements()[0]
 
-            await self._stream_from_url(real_element.url, sink, bar)
+                await self._stream_from_url(real_element.url, sink, bar)
+
+        await impl()
 
     async def _download_file(self, element: IliasPageElement, element_path: PurePath) -> None:
         dl = await self.download(element_path, mtime=element.mtime)
         if not dl:
             return
 
-        async with dl as (bar, sink):
-            await self._stream_from_url(element.url, sink, bar)
+        @_iorepeat(3, "file download")
+        async def impl() -> None:
+            assert dl  # The function is only reached when dl is not None
+            async with dl as (bar, sink):
+                await self._stream_from_url(element.url, sink, bar)
+
+        await impl()
 
     async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar) -> None:
         async def try_stream() -> bool:

From 44ecb2fbe77b9c5caa6096d9b4309034b8326ba3 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 23 May 2021 10:44:04 +0200
Subject: [PATCH 184/524] Fix cleanup deleting crawler's base directory

---
 PFERD/output_dir.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index fef6914..02d5fe8 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -385,7 +385,10 @@ class OutputDirectory:
                 self._report.add_file(info.path)
 
     async def cleanup(self) -> None:
-        await self._cleanup_dir(self._root, PurePath())
+        if not self._root.exists():
+            return
+
+        await self._cleanup_dir(self._root, PurePath(), delete_self=False)
 
     async def _cleanup(self, path: Path, pure: PurePath) -> None:
         if path.is_dir():
@@ -393,15 +396,16 @@ class OutputDirectory:
         elif path.is_file():
             await self._cleanup_file(path, pure)
 
-    async def _cleanup_dir(self, path: Path, pure: PurePath) -> None:
+    async def _cleanup_dir(self, path: Path, pure: PurePath, delete_self: bool = True) -> None:
         for child in path.iterdir():
             pure_child = pure / child.name
             await self._cleanup(child, pure_child)
 
-        try:
-            path.rmdir()
-        except OSError:
-            pass
+        if delete_self:
+            try:
+                path.rmdir()
+            except OSError:
+                pass
 
     async def _cleanup_file(self, path: Path, pure: PurePath) -> None:
         if self._report.is_marked(pure):

From 6fe51e258f14d478b2585947e18868dd20a60f4c Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 23 May 2021 10:44:18 +0200
Subject: [PATCH 185/524] Number rules starting at 1

---
 PFERD/transformer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index 7b5745b..2604c43 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -323,7 +323,7 @@ class Transformer:
 
     def transform(self, path: PurePath) -> Optional[PurePath]:
         for i, (line, rule) in enumerate(self._rules):
-            log.explain(f"Testing rule {i}: {line}")
+            log.explain(f"Testing rule {i+1}: {line}")
 
             result = rule.transform(path)
             if isinstance(result, PurePath):

From 729ff0a4c7ab8870326232cfb4cc5ef5533b06fd Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 23 May 2021 10:44:59 +0200
Subject: [PATCH 186/524] Fix simple authenticator output

---
 PFERD/authenticators/simple.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/authenticators/simple.py b/PFERD/authenticators/simple.py
index caa0002..bcbe69c 100644
--- a/PFERD/authenticators/simple.py
+++ b/PFERD/authenticators/simple.py
@@ -37,7 +37,7 @@ class SimpleAuthenticator(Authenticator):
             if self._username is None:
                 self._username = await ainput("Username: ")
             else:
-                print(f"Username: {self.username}")
+                print(f"Username: {self._username}")
 
             if self._password is None:
                 self._password = await agetpass("Password: ")

From ec3767c545b674d68afd9fe812caaf4b67f192dd Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 23 May 2021 10:52:02 +0200
Subject: [PATCH 187/524] Create crawler base dir at start of crawl

---
 PFERD/crawler.py    |  1 +
 PFERD/output_dir.py | 11 ++++++++---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index 731cfb9..61f1868 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -269,6 +269,7 @@ class Crawler(ABC):
         """
 
         with log.show_progress():
+            self._output_dir.prepare()
             await self._run()
             await self._cleanup()
 
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 02d5fe8..f9a7c99 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -142,6 +142,14 @@ class OutputDirectory:
 
         self._report = Report()
 
+    def prepare(self) -> None:
+        log.explain_topic(f"Creating base directory at {str(self._root.absolute())!r}")
+
+        try:
+            self._root.mkdir(parents=True, exist_ok=True)
+        except OSError:
+            raise OutputDirError("Failed to create base directory")
+
     def register_reserved(self, path: PurePath) -> None:
         self._report.mark_reserved(path)
 
@@ -385,9 +393,6 @@ class OutputDirectory:
                 self._report.add_file(info.path)
 
     async def cleanup(self) -> None:
-        if not self._root.exists():
-            return
-
         await self._cleanup_dir(self._root, PurePath(), delete_self=False)
 
     async def _cleanup(self, path: Path, pure: PurePath) -> None:

From c88f20859aa20f32b17e7f728195f31d6146bbfb Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 23 May 2021 11:04:50 +0200
Subject: [PATCH 188/524] Explain config file dumping

---
 PFERD/__main__.py |  2 ++
 PFERD/config.py   | 10 ++++++++--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index 5cc6ef6..2578487 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -81,6 +81,8 @@ def configure_logging_from_config(args: argparse.Namespace, config: Config) -> N
 
 
 def dump_config(args: argparse.Namespace, config: Config) -> None:
+    log.explain_topic("Dumping config")
+
     try:
         if args.dump_config is True:
             config.dump()
diff --git a/PFERD/config.py b/PFERD/config.py
index 26a9eb6..e68db53 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -5,6 +5,8 @@ from configparser import ConfigParser, SectionProxy
 from pathlib import Path
 from typing import Any, List, NoReturn, Optional, Tuple
 
+from rich.markup import escape
+
 from .logging import log
 from .utils import prompt_yes_no
 
@@ -122,10 +124,14 @@ class Config:
         May throw a ConfigDumpError.
         """
 
-        if not path:
+        if path:
+            log.explain("Using custom path")
+        else:
+            log.explain("Using default path")
             path = self._default_path()
 
-        print(f"Dumping config to {path}")
+        log.explain(f"Dumping to {str(path.absolute())!r}")
+        log.print(f"[bold bright_cyan]Dumping[/] to {escape(repr(str(path.absolute())))}")
 
         try:
             path.parent.mkdir(parents=True, exist_ok=True)

From 803e5628a22d49877ef1be6d8974901b91605c31 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 23 May 2021 11:30:16 +0200
Subject: [PATCH 189/524] Clean up logging

Paths are now (hopefully) logged consistently across all crawlers
---
 PFERD/config.py                               | 12 ++---
 PFERD/crawler.py                              | 10 ++--
 PFERD/crawlers/ilias/kit_ilias_web_crawler.py | 15 +++---
 PFERD/http_crawler.py                         |  8 +--
 PFERD/logging.py                              | 32 ++++++++++++
 PFERD/output_dir.py                           | 22 ++++-----
 PFERD/transformer.py                          |  3 +-
 PFERD/utils.py                                | 49 +++++++++++--------
 8 files changed, 95 insertions(+), 56 deletions(-)

diff --git a/PFERD/config.py b/PFERD/config.py
index e68db53..3c69fc7 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -8,7 +8,7 @@ from typing import Any, List, NoReturn, Optional, Tuple
 from rich.markup import escape
 
 from .logging import log
-from .utils import prompt_yes_no
+from .utils import fmt_real_path, prompt_yes_no
 
 
 class ConfigLoadError(Exception):
@@ -17,7 +17,7 @@ class ConfigLoadError(Exception):
     """
 
     def __init__(self, path: Path, reason: str):
-        super().__init__(f"Failed to load config from {path}")
+        super().__init__(f"Failed to load config from {fmt_real_path(path)}")
         self.path = path
         self.reason = reason
 
@@ -36,7 +36,7 @@ class ConfigOptionError(Exception):
 
 class ConfigDumpError(Exception):
     def __init__(self, path: Path, reason: str):
-        super().__init__(f"Failed to dump config to {path}")
+        super().__init__(f"Failed to dump config to {fmt_real_path(path)}")
         self.path = path
         self.reason = reason
 
@@ -105,7 +105,7 @@ class Config:
         else:
             log.explain("Using default path")
             path = Config._default_path()
-        log.explain(f"Loading {str(path)!r}")
+        log.explain(f"Loading {fmt_real_path(path)}")
 
         # Using config.read_file instead of config.read because config.read
         # would just ignore a missing file and carry on.
@@ -130,8 +130,8 @@ class Config:
             log.explain("Using default path")
             path = self._default_path()
 
-        log.explain(f"Dumping to {str(path.absolute())!r}")
-        log.print(f"[bold bright_cyan]Dumping[/] to {escape(repr(str(path.absolute())))}")
+        log.explain(f"Dumping to {fmt_real_path(path)}")
+        log.print(f"[bold bright_cyan]Dumping[/] to {escape(fmt_real_path(path))}")
 
         try:
             path.parent.mkdir(parents=True, exist_ok=True)
diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index 61f1868..53640e3 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -12,7 +12,7 @@ from .logging import ProgressBar, log
 from .output_dir import FileSink, FileSinkToken, OnConflict, OutputDirectory, OutputDirError, Redownload
 from .report import MarkConflictError, MarkDuplicateError
 from .transformer import Transformer
-from .utils import ReusableAsyncContextManager
+from .utils import ReusableAsyncContextManager, fmt_path
 
 
 class CrawlWarning(Exception):
@@ -217,7 +217,7 @@ class Crawler(ABC):
         )
 
     async def crawl(self, path: PurePath) -> Optional[CrawlToken]:
-        log.explain_topic(f"Decision: Crawl {path}")
+        log.explain_topic(f"Decision: Crawl {fmt_path(path)}")
 
         if self._transformer.transform(path) is None:
             log.explain("Answer: No")
@@ -225,7 +225,7 @@ class Crawler(ABC):
 
         log.explain("Answer: Yes")
 
-        desc = f"[bold bright_cyan]Crawling[/] {escape(str(path))}"
+        desc = f"[bold bright_cyan]Crawling[/] {escape(fmt_path(path))}"
         return CrawlToken(self._limiter, desc)
 
     async def download(
@@ -235,7 +235,7 @@ class Crawler(ABC):
             redownload: Optional[Redownload] = None,
             on_conflict: Optional[OnConflict] = None,
     ) -> Optional[DownloadToken]:
-        log.explain_topic(f"Decision: Download {path}")
+        log.explain_topic(f"Decision: Download {fmt_path(path)}")
 
         transformed_path = self._transformer.transform(path)
         if transformed_path is None:
@@ -249,7 +249,7 @@ class Crawler(ABC):
 
         log.explain("Answer: Yes")
 
-        desc = f"[bold bright_cyan]Downloading[/] {escape(str(path))}"
+        desc = f"[bold bright_cyan]Downloading[/] {escape(fmt_path(path))}"
         return DownloadToken(self._limiter, fs_token, desc)
 
     async def _cleanup(self) -> None:
diff --git a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
index 1bdf5e4..424b4ba 100644
--- a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
@@ -1,13 +1,11 @@
 import asyncio
 import re
 from pathlib import PurePath
-# TODO In Python 3.9 and above, AsyncContextManager is deprecated
 from typing import Any, Awaitable, Callable, Dict, Optional, Set, TypeVar, Union
 
 import aiohttp
 from aiohttp import hdrs
 from bs4 import BeautifulSoup, Tag
-from rich.markup import escape
 
 from PFERD.authenticators import Authenticator
 from PFERD.config import Config
@@ -17,6 +15,7 @@ from PFERD.logging import ProgressBar, log
 from PFERD.output_dir import FileSink, Redownload
 from PFERD.utils import soupify, url_set_query_param
 
+from ...utils import fmt_path
 from .file_templates import link_template_plain, link_template_rich
 from .kit_ilias_html import IliasElementType, IliasPage, IliasPageElement
 
@@ -86,10 +85,10 @@ def _iorepeat(attempts: int, name: str) -> Callable[[AWrapped], AWrapped]:
                     last_exception = e
                 except aiohttp.ClientConnectionError as e:  # e.g. timeout, disconnect, resolve failed, etc.
                     last_exception = e
-                log.explain_topic(f"Retrying operation {escape(name)}. Retries left: {attempts - 1 - round}")
+                log.explain_topic(f"Retrying operation {name}. Retries left: {attempts - 1 - round}")
 
             if last_exception:
-                message = f"Error in I/O Operation: {escape(str(last_exception))}"
+                message = f"Error in I/O Operation: {last_exception}"
                 raise CrawlWarning(message) from last_exception
             raise CrawlError("Impossible return in ilias _iorepeat")
 
@@ -162,7 +161,7 @@ class KitIliasWebCrawler(HttpCrawler):
             log.explain_topic("Inferred crawl target: Personal desktop")
             await self._crawl_desktop()
         else:
-            log.explain_topic(f"Inferred crawl target: URL {escape(self._target)}")
+            log.explain_topic(f"Inferred crawl target: URL {self._target}")
             await self._crawl_url(self._target)
 
     async def _crawl_course(self, course_id: int) -> None:
@@ -190,9 +189,7 @@ class KitIliasWebCrawler(HttpCrawler):
             if expected_id is not None:
                 perma_link_element: Tag = soup.find(id="current_perma_link")
                 if not perma_link_element or "crs_" not in perma_link_element.get("value"):
-                    raise CrawlError(
-                        "Invalid course id? I didn't find anything looking like a course"
-                    )
+                    raise CrawlError("Invalid course id? Didn't find anything looking like a course")
 
             # Duplicated code, but the root page is special - we want to void fetching it twice!
             page = IliasPage(soup, url, None)
@@ -236,7 +233,7 @@ class KitIliasWebCrawler(HttpCrawler):
         if element.type == IliasElementType.FILE:
             await self._download_file(element, element_path)
         elif element.type == IliasElementType.FORUM:
-            log.explain_topic(f"Decision: Crawl {escape(str(element_path))}")
+            log.explain_topic(f"Decision: Crawl {fmt_path(element_path)}")
             log.explain("Forums are not supported")
             log.explain("Answer: No")
         elif element.type == IliasElementType.LINK:
diff --git a/PFERD/http_crawler.py b/PFERD/http_crawler.py
index b9cfeea..15e9ff1 100644
--- a/PFERD/http_crawler.py
+++ b/PFERD/http_crawler.py
@@ -3,11 +3,11 @@ from pathlib import PurePath
 from typing import Optional
 
 import aiohttp
-from rich.markup import escape
 
 from .config import Config
 from .crawler import Crawler, CrawlerSection
 from .logging import log
+from .utils import fmt_real_path
 from .version import NAME, VERSION
 
 
@@ -59,14 +59,14 @@ class HttpCrawler(Crawler):
     async def _save_cookies(self) -> None:
         log.explain_topic("Saving cookies")
         if not self._current_cookie_jar:
-            log.explain("No cookie jar - save aborted")
+            log.explain("No cookie jar, save aborted")
             return
 
         try:
             self._current_cookie_jar.save(self._cookie_jar_path)
-            log.explain(f"Cookies saved to {escape(str(self.COOKIE_FILE))}")
+            log.explain(f"Cookies saved to {fmt_real_path(self._cookie_jar_path)}")
         except Exception:
-            log.print(f"[bold red]Warning:[/] Failed to save cookies to {escape(str(self.COOKIE_FILE))}")
+            log.warn(f"Failed to save cookies to {fmt_real_path(self._cookie_jar_path)}")
 
     async def run(self) -> None:
         self._current_cookie_jar = aiohttp.CookieJar()
diff --git a/PFERD/logging.py b/PFERD/logging.py
index beb92c6..9eb2c7c 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -112,6 +112,10 @@ class Log:
             self.print(line)
 
     def print(self, text: str) -> None:
+        """
+        Print a normal message. Allows markup.
+        """
+
         if self._progress_suspended:
             self._lines.append(text)
         else:
@@ -120,12 +124,24 @@ class Log:
     # TODO Print errors (and warnings?) to stderr
 
     def warn(self, text: str) -> None:
+        """
+        Print a warning message. Allows no markup.
+        """
+
         self.print(f"[bold bright_red]Warning[/] {escape(text)}")
 
     def error(self, text: str) -> None:
+        """
+        Print an error message. Allows no markup.
+        """
+
         self.print(f"[bold bright_red]Error[/] [red]{escape(text)}")
 
     def error_contd(self, text: str) -> None:
+        """
+        Print further lines of an error message. Allows no markup.
+        """
+
         self.print(f"[red]{escape(text)}")
 
     def unexpected_exception(self) -> None:
@@ -153,18 +169,34 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
         """.strip())
 
     def explain_topic(self, text: str) -> None:
+        """
+        Print a top-level explain text. Allows no markup.
+        """
+
         if self.output_explain:
             self.print(f"[cyan]{escape(text)}")
 
     def explain(self, text: str) -> None:
+        """
+        Print an indented explain text. Allows no markup.
+        """
+
         if self.output_explain:
             self.print(f"  {escape(text)}")
 
     def action(self, text: str) -> None:
+        """
+        Print a status update while crawling. Allows markup.
+        """
+
         if self.output_action:
             self.print(text)
 
     def report(self, text: str) -> None:
+        """
+        Print a report after crawling. Allows markup.
+        """
+
         if self.output_report:
             self.print(text)
 
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index f9a7c99..1f83de6 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -14,7 +14,7 @@ from rich.markup import escape
 
 from .logging import log
 from .report import Report
-from .utils import ReusableAsyncContextManager, prompt_yes_no
+from .utils import ReusableAsyncContextManager, fmt_path, fmt_real_path, prompt_yes_no
 
 SUFFIX_CHARS = string.ascii_lowercase + string.digits
 SUFFIX_LENGTH = 6
@@ -143,7 +143,7 @@ class OutputDirectory:
         self._report = Report()
 
     def prepare(self) -> None:
-        log.explain_topic(f"Creating base directory at {str(self._root.absolute())!r}")
+        log.explain_topic(f"Creating base directory at {fmt_real_path(self._root)}")
 
         try:
             self._root.mkdir(parents=True, exist_ok=True)
@@ -159,9 +159,9 @@ class OutputDirectory:
         """
 
         if ".." in path.parts:
-            raise OutputDirError(f"Forbidden segment '..' in path {path}")
+            raise OutputDirError(f"Forbidden segment '..' in path {fmt_path(path)}")
         if "." in path.parts:
-            raise OutputDirError(f"Forbidden segment '.' in path {path}")
+            raise OutputDirError(f"Forbidden segment '.' in path {fmt_path(path)}")
         return self._root / path
 
     def _should_download(
@@ -213,7 +213,7 @@ class OutputDirectory:
     ) -> bool:
         if on_conflict == OnConflict.PROMPT:
             async with log.exclusive_output():
-                prompt = f"Replace {path} with remote file?"
+                prompt = f"Replace {fmt_path(path)} with remote file?"
                 return await prompt_yes_no(prompt, default=False)
         elif on_conflict == OnConflict.LOCAL_FIRST:
             return False
@@ -232,7 +232,7 @@ class OutputDirectory:
     ) -> bool:
         if on_conflict == OnConflict.PROMPT:
             async with log.exclusive_output():
-                prompt = f"Recursively delete {path} and replace with remote file?"
+                prompt = f"Recursively delete {fmt_path(path)} and replace with remote file?"
                 return await prompt_yes_no(prompt, default=False)
         elif on_conflict == OnConflict.LOCAL_FIRST:
             return False
@@ -252,7 +252,7 @@ class OutputDirectory:
     ) -> bool:
         if on_conflict == OnConflict.PROMPT:
             async with log.exclusive_output():
-                prompt = f"Delete {parent} so remote file {path} can be downloaded?"
+                prompt = f"Delete {fmt_path(parent)} so remote file {fmt_path(path)} can be downloaded?"
                 return await prompt_yes_no(prompt, default=False)
         elif on_conflict == OnConflict.LOCAL_FIRST:
             return False
@@ -271,7 +271,7 @@ class OutputDirectory:
     ) -> bool:
         if on_conflict == OnConflict.PROMPT:
             async with log.exclusive_output():
-                prompt = f"Delete {path}?"
+                prompt = f"Delete {fmt_path(path)}?"
                 return await prompt_yes_no(prompt, default=False)
         elif on_conflict == OnConflict.LOCAL_FIRST:
             return False
@@ -386,10 +386,10 @@ class OutputDirectory:
             self._update_metadata(info)
 
             if changed:
-                log.action(f"[bold bright_yellow]Changed[/] {escape(str(info.path))}")
+                log.action(f"[bold bright_yellow]Changed[/] {escape(fmt_path(info.path))}")
                 self._report.change_file(info.path)
             else:
-                log.action(f"[bold bright_green]Added[/] {escape(str(info.path))}")
+                log.action(f"[bold bright_green]Added[/] {escape(fmt_path(info.path))}")
                 self._report.add_file(info.path)
 
     async def cleanup(self) -> None:
@@ -419,7 +419,7 @@ class OutputDirectory:
         if await self._conflict_delete_lf(self._on_conflict, pure):
             try:
                 path.unlink()
-                log.action(f"[bold bright_magenta]Deleted[/] {escape(str(path))}")
+                log.action(f"[bold bright_magenta]Deleted[/] {escape(fmt_path(path))}")
                 self._report.delete_file(pure)
             except OSError:
                 pass
diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index 2604c43..9670d0e 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -10,6 +10,7 @@ from pathlib import PurePath
 from typing import Dict, Optional, Union
 
 from .logging import log
+from .utils import fmt_path
 
 
 class Rule(ABC):
@@ -327,7 +328,7 @@ class Transformer:
 
             result = rule.transform(path)
             if isinstance(result, PurePath):
-                log.explain(f"Match! Transformed to {result}")
+                log.explain(f"Match! Transformed to {fmt_path(result)}")
                 return result
             elif result:  # Exclamation mark
                 log.explain("Match! Ignored")
diff --git a/PFERD/utils.py b/PFERD/utils.py
index 1d11565..397feda 100644
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -4,6 +4,7 @@ import sys
 import threading
 from abc import ABC, abstractmethod
 from contextlib import AsyncExitStack
+from pathlib import Path, PurePath
 from types import TracebackType
 from typing import Any, Callable, Dict, Generic, Optional, Type, TypeVar
 from urllib.parse import parse_qs, urlencode, urlsplit, urlunsplit
@@ -34,6 +35,30 @@ async def agetpass(prompt: str) -> str:
     return await in_daemon_thread(lambda: getpass.getpass(prompt))
 
 
+async def prompt_yes_no(query: str, default: Optional[bool]) -> bool:
+    """
+    Asks the user a yes/no question and returns their choice.
+    """
+
+    if default is True:
+        query += " [Y/n] "
+    elif default is False:
+        query += " [y/N] "
+    else:
+        query += " [y/n] "
+
+    while True:
+        response = (await ainput(query)).strip().lower()
+        if response == "y":
+            return True
+        elif response == "n":
+            return False
+        elif response == "" and default is not None:
+            return default
+
+        print("Please answer with 'y' or 'n'.")
+
+
 def soupify(data: bytes) -> bs4.BeautifulSoup:
     """
     Parses HTML to a beautifulsoup object.
@@ -66,28 +91,12 @@ def url_set_query_params(url: str, params: Dict[str, str]) -> str:
     return result
 
 
-async def prompt_yes_no(query: str, default: Optional[bool]) -> bool:
-    """
-    Asks the user a yes/no question and returns their choice.
-    """
+def fmt_path(path: PurePath) -> str:
+    return repr(str(path))
 
-    if default is True:
-        query += " [Y/n] "
-    elif default is False:
-        query += " [y/N] "
-    else:
-        query += " [y/n] "
 
-    while True:
-        response = (await ainput(query)).strip().lower()
-        if response == "y":
-            return True
-        elif response == "n":
-            return False
-        elif response == "" and default is not None:
-            return default
-
-        print("Please answer with 'y' or 'n'.")
+def fmt_real_path(path: Path) -> str:
+    return repr(str(path.absolute()))
 
 
 class ReusableAsyncContextManager(ABC, Generic[T]):

From 25e2abdb033f1029bb6a841cb1d45958313907f3 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 23 May 2021 11:45:14 +0200
Subject: [PATCH 190/524] Improve transformer explain wording

---
 PFERD/transformer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index 9670d0e..5a20207 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -328,10 +328,10 @@ class Transformer:
 
             result = rule.transform(path)
             if isinstance(result, PurePath):
-                log.explain(f"Match! Transformed to {fmt_path(result)}")
+                log.explain(f"Match found, transformed path to {fmt_path(result)}")
                 return result
             elif result:  # Exclamation mark
-                log.explain("Match! Ignored")
+                log.explain("Match found, path ignored")
                 return None
             else:
                 continue

From 33a81a5f5c6cfa8756989cd682a9745df5ce4978 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 23 May 2021 11:55:34 +0200
Subject: [PATCH 191/524] Document authentication in HTTP crawler and rename
 prepare_request

---
 PFERD/crawlers/ilias/kit_ilias_web_crawler.py |  4 ++--
 PFERD/http_crawler.py                         | 21 ++++++++++++++++---
 2 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
index 424b4ba..cde8654 100644
--- a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
@@ -333,7 +333,7 @@ class KitIliasWebCrawler(HttpCrawler):
                 sink.done()
             return True
 
-        auth_id = await self.prepare_request()
+        auth_id = await self._current_auth_id()
         if await try_stream():
             return
 
@@ -343,7 +343,7 @@ class KitIliasWebCrawler(HttpCrawler):
             raise CrawlError("File streaming failed after authenticate()")
 
     async def _get_page(self, url: str) -> BeautifulSoup:
-        auth_id = await self.prepare_request()
+        auth_id = await self._current_auth_id()
         async with self.session.get(url) as request:
             soup = soupify(await request.read())
             if self._is_logged_in(soup):
diff --git a/PFERD/http_crawler.py b/PFERD/http_crawler.py
index 15e9ff1..adbac5d 100644
--- a/PFERD/http_crawler.py
+++ b/PFERD/http_crawler.py
@@ -28,20 +28,35 @@ class HttpCrawler(Crawler):
         self._authentication_lock = asyncio.Lock()
         self._current_cookie_jar: Optional[aiohttp.CookieJar] = None
 
-    async def prepare_request(self) -> int:
+    async def _current_auth_id(self) -> int:
+        """
+        Returns the id for the current authentication, i.e. an identifier for the last
+        successful call to [authenticate].
+
+        This method must be called before any request that might authenticate is made, so the
+        HttpCrawler can properly track when [authenticate] can return early and when actual
+        authentication is necessary.
+        """
         # We acquire the lock here to ensure we wait for any concurrent authenticate to finish.
         # This should reduce the amount of requests we make: If an authentication is in progress
         # all future requests wait for authentication to complete.
         async with self._authentication_lock:
             return self._authentication_id
 
-    async def authenticate(self, current_id: int) -> None:
+    async def authenticate(self, caller_auth_id: int) -> None:
+        """
+        Starts the authentication process. The main work is offloaded to _authenticate, which
+        you should overwrite in a subclass if needed. This method should *NOT* be overwritten.
+
+        The [caller_auth_id] should be the result of a [_current_auth_id] call made *before*
+        the request was made. This ensures that authentication is not performed needlessly.
+        """
         async with self._authentication_lock:
             # Another thread successfully called authenticate in-between
             # We do not want to perform auth again, so we return here. We can
             # assume the other thread suceeded as authenticate will throw an error
             # if it failed and aborts the crawl process.
-            if current_id != self._authentication_id:
+            if caller_auth_id != self._authentication_id:
                 return
             await self._authenticate()
             self._authentication_id += 1

From e81005ae4bccda785c19e7dd869e480ee487a5ee Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 23 May 2021 11:57:59 +0200
Subject: [PATCH 192/524] Fix CLI arguments

---
 PFERD/cli/__init__.py                    | 131 ++---------------------
 PFERD/cli/{local.py => command_local.py} |   2 +-
 PFERD/cli/parser.py                      | 125 +++++++++++++++++++++
 3 files changed, 134 insertions(+), 124 deletions(-)
 rename PFERD/cli/{local.py => command_local.py} (96%)
 create mode 100644 PFERD/cli/parser.py

diff --git a/PFERD/cli/__init__.py b/PFERD/cli/__init__.py
index 71d9732..2a9f124 100644
--- a/PFERD/cli/__init__.py
+++ b/PFERD/cli/__init__.py
@@ -1,125 +1,10 @@
-import argparse
-import configparser
-from pathlib import Path
+# isort: skip_file
 
-from ..output_dir import OnConflict, Redownload
+# The order of imports matters because each command module registers itself
+# with the parser from ".parser". Because of this, isort is disabled for this
+# file. Also, since we're reexporting or just using the side effect of
+# importing itself, we get a few linting warnings, which we're disabling as
+# well.
 
-CRAWLER_PARSER = argparse.ArgumentParser(add_help=False)
-CRAWLER_PARSER_GROUP = CRAWLER_PARSER.add_argument_group(
-    title="general crawler arguments",
-    description="arguments common to all crawlers",
-)
-CRAWLER_PARSER_GROUP.add_argument(
-    "--redownload",
-    type=Redownload.from_string,
-    metavar="OPTION",
-    help="when to redownload a file that's already present locally"
-)
-CRAWLER_PARSER_GROUP.add_argument(
-    "--on-conflict",
-    type=OnConflict.from_string,
-    metavar="OPTION",
-    help="what to do when local and remote files or directories differ"
-)
-CRAWLER_PARSER_GROUP.add_argument(
-    "--transform", "-t",
-    action="append",
-    type=str,
-    metavar="RULE",
-    help="add a single transformation rule. Can be specified multiple times"
-)
-CRAWLER_PARSER_GROUP.add_argument(
-    "--max-concurrent-tasks",
-    type=int,
-    metavar="N",
-    help="maximum number of concurrent tasks (crawling, downloading)"
-)
-CRAWLER_PARSER_GROUP.add_argument(
-    "--max-concurrent-downloads",
-    type=int,
-    metavar="N",
-    help="maximum number of tasks that may download data at the same time"
-)
-CRAWLER_PARSER_GROUP.add_argument(
-    "--delay-between-tasks",
-    type=float,
-    metavar="SECONDS",
-    help="time the crawler should wait between subsequent tasks"
-)
-
-
-def load_crawler(
-        args: argparse.Namespace,
-        section: configparser.SectionProxy,
-) -> None:
-    if args.redownload is not None:
-        section["redownload"] = args.redownload.value
-    if args.on_conflict is not None:
-        section["on_conflict"] = args.on_conflict.value
-    if args.transform is not None:
-        section["transform"] = "\n" + "\n".join(args.transform)
-    if args.max_concurrent_tasks is not None:
-        section["max_concurrent_tasks"] = str(args.max_concurrent_tasks)
-    if args.max_concurrent_downloads is not None:
-        section["max_concurrent_downloads"] = str(args.max_concurrent_downloads)
-    if args.delay_between_tasks is not None:
-        section["delay_between_tasks"] = str(args.delay_between_tasks)
-
-
-PARSER = argparse.ArgumentParser()
-PARSER.set_defaults(command=None)
-PARSER.add_argument(
-    "--version",
-    action="store_true",
-    help="print version and exit"
-)
-PARSER.add_argument(
-    "--config", "-c",
-    type=Path,
-    metavar="PATH",
-    help="custom config file"
-)
-PARSER.add_argument(
-    "--dump-config",
-    nargs="?",
-    const=True,
-    metavar="PATH",
-    help="dump current configuration to a file and exit."
-    " Uses default config file path if no path is specified"
-)
-PARSER.add_argument(
-    "--crawler",
-    action="append",
-    type=str,
-    metavar="NAME",
-    help="only execute a single crawler."
-    " Can be specified multiple times to execute multiple crawlers"
-)
-PARSER.add_argument(
-    "--working-dir",
-    type=Path,
-    metavar="PATH",
-    help="custom working directory"
-)
-PARSER.add_argument(
-    "--explain", "-e",
-    # TODO Use argparse.BooleanOptionalAction after updating to 3.9
-    action="store_const",
-    const=True,
-    help="log and explain in detail what PFERD is doing"
-)
-
-
-def load_default_section(
-        args: argparse.Namespace,
-        parser: configparser.ConfigParser,
-) -> None:
-    section = parser[parser.default_section]
-
-    if args.working_dir is not None:
-        section["working_dir"] = str(args.working_dir)
-    if args.explain is not None:
-        section["explain"] = "true" if args.explain else "false"
-
-
-SUBPARSERS = PARSER.add_subparsers(title="crawlers")
+from . import command_local  # noqa: F401 imported but unused
+from .parser import PARSER, load_default_section  # noqa: F401 imported but unused
diff --git a/PFERD/cli/local.py b/PFERD/cli/command_local.py
similarity index 96%
rename from PFERD/cli/local.py
rename to PFERD/cli/command_local.py
index 5df81db..73f9d43 100644
--- a/PFERD/cli/local.py
+++ b/PFERD/cli/command_local.py
@@ -2,7 +2,7 @@ import argparse
 import configparser
 from pathlib import Path
 
-from . import CRAWLER_PARSER, SUBPARSERS, load_crawler
+from .parser import CRAWLER_PARSER, SUBPARSERS, load_crawler
 
 SUBPARSER = SUBPARSERS.add_parser(
     "local",
diff --git a/PFERD/cli/parser.py b/PFERD/cli/parser.py
new file mode 100644
index 0000000..71d9732
--- /dev/null
+++ b/PFERD/cli/parser.py
@@ -0,0 +1,125 @@
+import argparse
+import configparser
+from pathlib import Path
+
+from ..output_dir import OnConflict, Redownload
+
+CRAWLER_PARSER = argparse.ArgumentParser(add_help=False)
+CRAWLER_PARSER_GROUP = CRAWLER_PARSER.add_argument_group(
+    title="general crawler arguments",
+    description="arguments common to all crawlers",
+)
+CRAWLER_PARSER_GROUP.add_argument(
+    "--redownload",
+    type=Redownload.from_string,
+    metavar="OPTION",
+    help="when to redownload a file that's already present locally"
+)
+CRAWLER_PARSER_GROUP.add_argument(
+    "--on-conflict",
+    type=OnConflict.from_string,
+    metavar="OPTION",
+    help="what to do when local and remote files or directories differ"
+)
+CRAWLER_PARSER_GROUP.add_argument(
+    "--transform", "-t",
+    action="append",
+    type=str,
+    metavar="RULE",
+    help="add a single transformation rule. Can be specified multiple times"
+)
+CRAWLER_PARSER_GROUP.add_argument(
+    "--max-concurrent-tasks",
+    type=int,
+    metavar="N",
+    help="maximum number of concurrent tasks (crawling, downloading)"
+)
+CRAWLER_PARSER_GROUP.add_argument(
+    "--max-concurrent-downloads",
+    type=int,
+    metavar="N",
+    help="maximum number of tasks that may download data at the same time"
+)
+CRAWLER_PARSER_GROUP.add_argument(
+    "--delay-between-tasks",
+    type=float,
+    metavar="SECONDS",
+    help="time the crawler should wait between subsequent tasks"
+)
+
+
+def load_crawler(
+        args: argparse.Namespace,
+        section: configparser.SectionProxy,
+) -> None:
+    if args.redownload is not None:
+        section["redownload"] = args.redownload.value
+    if args.on_conflict is not None:
+        section["on_conflict"] = args.on_conflict.value
+    if args.transform is not None:
+        section["transform"] = "\n" + "\n".join(args.transform)
+    if args.max_concurrent_tasks is not None:
+        section["max_concurrent_tasks"] = str(args.max_concurrent_tasks)
+    if args.max_concurrent_downloads is not None:
+        section["max_concurrent_downloads"] = str(args.max_concurrent_downloads)
+    if args.delay_between_tasks is not None:
+        section["delay_between_tasks"] = str(args.delay_between_tasks)
+
+
+PARSER = argparse.ArgumentParser()
+PARSER.set_defaults(command=None)
+PARSER.add_argument(
+    "--version",
+    action="store_true",
+    help="print version and exit"
+)
+PARSER.add_argument(
+    "--config", "-c",
+    type=Path,
+    metavar="PATH",
+    help="custom config file"
+)
+PARSER.add_argument(
+    "--dump-config",
+    nargs="?",
+    const=True,
+    metavar="PATH",
+    help="dump current configuration to a file and exit."
+    " Uses default config file path if no path is specified"
+)
+PARSER.add_argument(
+    "--crawler",
+    action="append",
+    type=str,
+    metavar="NAME",
+    help="only execute a single crawler."
+    " Can be specified multiple times to execute multiple crawlers"
+)
+PARSER.add_argument(
+    "--working-dir",
+    type=Path,
+    metavar="PATH",
+    help="custom working directory"
+)
+PARSER.add_argument(
+    "--explain", "-e",
+    # TODO Use argparse.BooleanOptionalAction after updating to 3.9
+    action="store_const",
+    const=True,
+    help="log and explain in detail what PFERD is doing"
+)
+
+
+def load_default_section(
+        args: argparse.Namespace,
+        parser: configparser.ConfigParser,
+) -> None:
+    section = parser[parser.default_section]
+
+    if args.working_dir is not None:
+        section["working_dir"] = str(args.working_dir)
+    if args.explain is not None:
+        section["explain"] = "true" if args.explain else "false"
+
+
+SUBPARSERS = PARSER.add_subparsers(title="crawlers")

From 3d4b997d4a58bdb728a4f653ee0b0ffec2d662ff Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 23 May 2021 12:24:10 +0200
Subject: [PATCH 193/524] Retry crawl_url and work around Python's closure
 handling

Closures capture the scope and not the variables. Therefore, any
type-narrowing performed by mypy on captured variables is lost inside
the closure.
---
 PFERD/crawlers/ilias/kit_ilias_web_crawler.py | 72 +++++++++++--------
 1 file changed, 42 insertions(+), 30 deletions(-)

diff --git a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
index cde8654..2f27683 100644
--- a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
@@ -109,6 +109,7 @@ def _wrap_io_in_warning(name: str) -> Callable[[AWrapped], AWrapped]:
 #                    |
 #     crawl_course --+
 #                    |
+#     @_io_repeat    |        # retries internally (before the bar)
 #  +- crawl_url    <-+
 #  |
 #  |
@@ -176,36 +177,45 @@ class KitIliasWebCrawler(HttpCrawler):
         await self._crawl_url(self._base_url)
 
     async def _crawl_url(self, url: str, expected_id: Optional[int] = None) -> None:
-        cl = await self.crawl(PurePath("."))
-        if not cl:
+        maybe_cl = await self.crawl(PurePath("."))
+        if not maybe_cl:
             return
+        cl = maybe_cl  # Not mypy's fault, but explained here: https://github.com/python/mypy/issues/2608
 
-        tasks = []
+        @_iorepeat(3, "crawling url")
+        async def impl() -> None:
+            tasks = []
 
-        # TODO: Retry this when the crawl and download bar are reworked
-        async with cl:
-            soup = await self._get_page(url)
+            async with cl:
+                soup = await self._get_page(url)
 
-            if expected_id is not None:
-                perma_link_element: Tag = soup.find(id="current_perma_link")
-                if not perma_link_element or "crs_" not in perma_link_element.get("value"):
-                    raise CrawlError("Invalid course id? Didn't find anything looking like a course")
+                if expected_id is not None:
+                    perma_link_element: Tag = soup.find(id="current_perma_link")
+                    if not perma_link_element or "crs_" not in perma_link_element.get("value"):
+                        raise CrawlError("Invalid course id? Didn't find anything looking like a course")
 
-            # Duplicated code, but the root page is special - we want to void fetching it twice!
-            page = IliasPage(soup, url, None)
-            for child in page.get_child_elements():
-                tasks.append(self._handle_ilias_element(PurePath("."), child))
+                # Duplicated code, but the root page is special - we want to void fetching it twice!
+                page = IliasPage(soup, url, None)
+                for child in page.get_child_elements():
+                    tasks.append(self._handle_ilias_element(PurePath("."), child))
 
-        await asyncio.gather(*tasks)
+            # The only point an I/O exception can be thrown is in `get_page`.
+            # If that happens, no task was spawned yet. Therefore, we only retry
+            # this method without having spawned a single task. Due to this we do
+            # not need to cancel anything or worry about this gather call or the forks
+            # further up.
+            await asyncio.gather(*tasks)
+
+        await impl()
 
     async def _handle_ilias_page(self, url: str, parent: IliasPageElement, path: PurePath) -> None:
-        cl = await self.crawl(path)
-        if not cl:
+        maybe_cl = await self.crawl(path)
+        if not maybe_cl:
             return
+        cl = maybe_cl  # Not mypy's fault, but explained here: https://github.com/python/mypy/issues/2608
 
         @_iorepeat(3, "crawling folder")
         async def impl() -> None:
-            assert cl  # The function is only reached when cl is not None
             tasks = []
             async with cl:
                 soup = await self._get_page(url)
@@ -225,8 +235,8 @@ class KitIliasWebCrawler(HttpCrawler):
 
     @anoncritical
     # Shouldn't happen but this method must never raise an I/O error as that might interfere with
-    # handle_ilias_page
-    @_wrap_io_in_warning("ilias element handling")
+    # handle_ilias_page or crawl_url
+    @_wrap_io_in_warning("handling ilias element")
     async def _handle_ilias_element(self, parent_path: PurePath, element: IliasPageElement) -> None:
         element_path = PurePath(parent_path, element.name)
 
@@ -250,13 +260,13 @@ class KitIliasWebCrawler(HttpCrawler):
             raise CrawlWarning(f"Unknown element type: {element.type!r}")
 
     async def _download_link(self, element: IliasPageElement, element_path: PurePath) -> None:
-        dl = await self.download(element_path, mtime=element.mtime)
-        if not dl:
+        maybe_dl = await self.download(element_path, mtime=element.mtime)
+        if not maybe_dl:
             return
+        dl = maybe_dl  # Not mypy's fault, but explained here: https://github.com/python/mypy/issues/2608
 
-        @_iorepeat(3, "link resolving")
+        @_iorepeat(3, "resolving link")
         async def impl() -> None:
-            assert dl  # This function is only reached when dl is not None
             async with dl as (bar, sink):
                 export_url = element.url.replace("cmd=calldirectlink", "cmd=exportHTML")
                 real_url = await self._resolve_link_target(export_url)
@@ -288,11 +298,12 @@ class KitIliasWebCrawler(HttpCrawler):
 
     async def _download_video(self, element: IliasPageElement, element_path: PurePath) -> None:
         # Videos will NOT be redownloaded - their content doesn't really change and they are chunky
-        dl = await self.download(element_path, mtime=element.mtime, redownload=Redownload.NEVER)
-        if not dl:
+        maybe_dl = await self.download(element_path, mtime=element.mtime, redownload=Redownload.NEVER)
+        if not maybe_dl:
             return
+        dl = maybe_dl  # Not mypy's fault, but explained here: https://github.com/python/mypy/issues/2608
 
-        @_iorepeat(3, "video download")
+        @_iorepeat(3, "downloading video")
         async def impl() -> None:
             assert dl  # The function is only reached when dl is not None
             async with dl as (bar, sink):
@@ -304,11 +315,12 @@ class KitIliasWebCrawler(HttpCrawler):
         await impl()
 
     async def _download_file(self, element: IliasPageElement, element_path: PurePath) -> None:
-        dl = await self.download(element_path, mtime=element.mtime)
-        if not dl:
+        maybe_dl = await self.download(element_path, mtime=element.mtime)
+        if not maybe_dl:
             return
+        dl = maybe_dl  # Not mypy's fault, but explained here: https://github.com/python/mypy/issues/2608
 
-        @_iorepeat(3, "file download")
+        @_iorepeat(3, "downloading file")
         async def impl() -> None:
             assert dl  # The function is only reached when dl is not None
             async with dl as (bar, sink):

From ecdedfa1cfa2bfac01a4b1f96046eaec146eb9fe Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 23 May 2021 12:36:09 +0200
Subject: [PATCH 194/524] Add no-videos flag to ILIAS crawler

---
 CONFIG.md                                     |  1 +
 PFERD/crawlers/ilias/kit_ilias_web_crawler.py | 21 +++++++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/CONFIG.md b/CONFIG.md
index 29fc7e2..e92858f 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -136,6 +136,7 @@ This crawler crawls the KIT ILIAS instance. It performs remote calls to a poor S
 - `link_file_plain_text`: If this is set to true, PFERD will generate plain-text files containing only the link
    target for external links. If this is false or not specified, PFERD will generate a neat, pretty and functional 
    HTML page instead.
+- `no-videos`: If this is set to true, PFERD will not crawl or download any videos.
 ## Authenticator types
 
 ### The `simple` authenticator
diff --git a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
index 2f27683..f69d769 100644
--- a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
@@ -57,6 +57,9 @@ class KitIliasWebCrawlerSection(CrawlerSection):
     def link_file_use_plaintext(self) -> bool:
         return self.s.getboolean("link_file_plain_text", fallback=False)
 
+    def no_videos(self) -> bool:
+        return self.s.getboolean("no-videos", fallback=True)
+
 
 _DIRECTORY_PAGES: Set[IliasElementType] = set([
     IliasElementType.EXERCISE,
@@ -66,6 +69,13 @@ _DIRECTORY_PAGES: Set[IliasElementType] = set([
     IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED,
 ])
 
+_VIDEO_ELEMENTS: Set[IliasElementType] = set([
+    IliasElementType.VIDEO,
+    IliasElementType.VIDEO_PLAYER,
+    IliasElementType.VIDEO_FOLDER,
+    IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED,
+])
+
 AWrapped = TypeVar("AWrapped", bound=Callable[..., Awaitable[None]])
 
 
@@ -153,6 +163,7 @@ class KitIliasWebCrawler(HttpCrawler):
         self._target = section.target()
         self._link_file_redirect_delay = section.link_file_redirect_delay()
         self._link_file_use_plaintext = section.link_file_use_plaintext()
+        self._no_videos = section.no_videos()
 
     async def _run(self) -> None:
         if isinstance(self._target, int):
@@ -240,6 +251,16 @@ class KitIliasWebCrawler(HttpCrawler):
     async def _handle_ilias_element(self, parent_path: PurePath, element: IliasPageElement) -> None:
         element_path = PurePath(parent_path, element.name)
 
+        if element.type in _VIDEO_ELEMENTS:
+            log.explain_topic(f"Decision: Crawl video element {fmt_path(element_path)}")
+            if self._no_videos:
+                log.explain("Video crawling is disabled")
+                log.explain("Answer: no")
+                return
+            else:
+                log.explain("Video crawling is enabled")
+                log.explain("Answer: yes")
+
         if element.type == IliasElementType.FILE:
             await self._download_file(element, element_path)
         elif element.type == IliasElementType.FORUM:

From 7e0bb0625924ea27fba57687b40e70d32d335c21 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 23 May 2021 12:47:30 +0200
Subject: [PATCH 195/524] Clean up TODOs

---
 PFERD/crawlers/ilias/kit_ilias_html.py | 1 -
 PFERD/logging.py                       | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/PFERD/crawlers/ilias/kit_ilias_html.py b/PFERD/crawlers/ilias/kit_ilias_html.py
index 17eb855..eed0884 100644
--- a/PFERD/crawlers/ilias/kit_ilias_html.py
+++ b/PFERD/crawlers/ilias/kit_ilias_html.py
@@ -3,7 +3,6 @@ import re
 from dataclasses import dataclass
 from datetime import date, datetime, timedelta
 from enum import Enum
-# TODO In Python 3.9 and above, AsyncContextManager is deprecated
 from typing import List, Optional, Union
 from urllib.parse import urljoin, urlparse
 
diff --git a/PFERD/logging.py b/PFERD/logging.py
index 9eb2c7c..ef6ee4c 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -2,7 +2,7 @@ import asyncio
 import sys
 import traceback
 from contextlib import asynccontextmanager, contextmanager
-# TODO In Python 3.9 and above, ContextManager and AsyncContextManager are deprecated
+# TODO In Python 3.9 and above, ContextManager is deprecated
 from typing import AsyncIterator, ContextManager, Iterator, List, Optional
 
 from rich.console import Console, RenderGroup

From b44b49476d36ef93bcaf4ab836fba30b0e56e3e6 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 23 May 2021 13:23:28 +0200
Subject: [PATCH 196/524] Fix noncritical and anoncritical decorators

I must've forgot to update the anoncritical decorator when I last changed the
noncritical decorator. Also, every exception should make the crawler not
error_free, not just CrawlErrors.
---
 PFERD/crawler.py | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index 53640e3..c1184c0 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -28,8 +28,11 @@ Wrapped = TypeVar("Wrapped", bound=Callable[..., None])
 
 def noncritical(f: Wrapped) -> Wrapped:
     """
-    Catches all exceptions occuring during the function call. If an exception
-    occurs, the crawler's error_free variable is set to False.
+    Catches and logs a few noncritical exceptions occurring during the function
+    call, mainly CrawlWarning.
+
+    If any exception occurs during the function call, the crawler's error_free
+    variable is set to False. This includes noncritical exceptions.
 
     Warning: Must only be applied to member functions of the Crawler class!
     """
@@ -45,7 +48,7 @@ def noncritical(f: Wrapped) -> Wrapped:
         except (CrawlWarning, OutputDirError, MarkDuplicateError, MarkConflictError) as e:
             log.warn(str(e))
             crawler.error_free = False
-        except CrawlError:
+        except:  # noqa: E722 do not use bare 'except'
             crawler.error_free = False
             raise
 
@@ -59,8 +62,11 @@ def anoncritical(f: AWrapped) -> AWrapped:
     """
     An async version of @noncritical.
 
-    Catches all exceptions occuring during the function call. If an exception
-    occurs, the crawler's error_free variable is set to False.
+    Catches and logs a few noncritical exceptions occurring during the function
+    call, mainly CrawlWarning.
+
+    If any exception occurs during the function call, the crawler's error_free
+    variable is set to False. This includes noncritical exceptions.
 
     Warning: Must only be applied to member functions of the Crawler class!
     """
@@ -73,11 +79,10 @@ def anoncritical(f: AWrapped) -> AWrapped:
 
         try:
             await f(*args, **kwargs)
-        except CrawlWarning as e:
-            log.print(f"[bold bright_red]Warning[/] {escape(str(e))}")
+        except (CrawlWarning, OutputDirError, MarkDuplicateError, MarkConflictError) as e:
+            log.warn(str(e))
             crawler.error_free = False
-        except CrawlError as e:
-            log.print(f"[bold bright_red]Error[/] [red]{escape(str(e))}")
+        except:  # noqa: E722 do not use bare 'except'
             crawler.error_free = False
             raise
 

From e1d18708b3cf6a93e0b1f2ea1f94a657656e6db2 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 23 May 2021 13:26:23 +0200
Subject: [PATCH 197/524] Rename "no_videos" to videos

---
 PFERD/crawlers/ilias/kit_ilias_web_crawler.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
index f69d769..9094a7b 100644
--- a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
@@ -55,10 +55,10 @@ class KitIliasWebCrawlerSection(CrawlerSection):
         return self.s.getint("link_file_redirect_delay", fallback=-1)
 
     def link_file_use_plaintext(self) -> bool:
-        return self.s.getboolean("link_file_plain_text", fallback=False)
+        return self.s.getboolean("link_file_plaintext", fallback=False)
 
-    def no_videos(self) -> bool:
-        return self.s.getboolean("no-videos", fallback=True)
+    def videos(self) -> bool:
+        return self.s.getboolean("videos", fallback=False)
 
 
 _DIRECTORY_PAGES: Set[IliasElementType] = set([
@@ -163,7 +163,7 @@ class KitIliasWebCrawler(HttpCrawler):
         self._target = section.target()
         self._link_file_redirect_delay = section.link_file_redirect_delay()
         self._link_file_use_plaintext = section.link_file_use_plaintext()
-        self._no_videos = section.no_videos()
+        self._videos = section.videos()
 
     async def _run(self) -> None:
         if isinstance(self._target, int):
@@ -253,7 +253,7 @@ class KitIliasWebCrawler(HttpCrawler):
 
         if element.type in _VIDEO_ELEMENTS:
             log.explain_topic(f"Decision: Crawl video element {fmt_path(element_path)}")
-            if self._no_videos:
+            if not self._videos:
                 log.explain("Video crawling is disabled")
                 log.explain("Answer: no")
                 return

From d8f26a789ed79b680f0a76d516cd9babdcc22c0c Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 23 May 2021 13:26:40 +0200
Subject: [PATCH 198/524] Implement CLI Command for ilias crawler

---
 PFERD/cli/__init__.py              |  1 +
 PFERD/cli/command_kit_ilias_web.py | 83 ++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+)
 create mode 100644 PFERD/cli/command_kit_ilias_web.py

diff --git a/PFERD/cli/__init__.py b/PFERD/cli/__init__.py
index 2a9f124..f9cb5d2 100644
--- a/PFERD/cli/__init__.py
+++ b/PFERD/cli/__init__.py
@@ -7,4 +7,5 @@
 # well.
 
 from . import command_local  # noqa: F401 imported but unused
+from . import command_kit_ilias_web  # noqa: F401 imported but unused
 from .parser import PARSER, load_default_section  # noqa: F401 imported but unused
diff --git a/PFERD/cli/command_kit_ilias_web.py b/PFERD/cli/command_kit_ilias_web.py
new file mode 100644
index 0000000..c743a51
--- /dev/null
+++ b/PFERD/cli/command_kit_ilias_web.py
@@ -0,0 +1,83 @@
+import argparse
+import configparser
+from pathlib import Path
+
+from .parser import CRAWLER_PARSER, SUBPARSERS, load_crawler
+
+SUBPARSER = SUBPARSERS.add_parser(
+    "kit-ilias-web",
+    parents=[CRAWLER_PARSER],
+)
+
+GROUP = SUBPARSER.add_argument_group(
+    title="KIT ILIAS web-crawler arguments",
+    description="arguments for the 'kit-ilias-web' crawler",
+)
+GROUP.add_argument(
+    "target",
+    type=str,
+    metavar="TARGET",
+    help="course id, 'desktop', or ILIAS https-URL to crawl"
+)
+GROUP.add_argument(
+    "output",
+    type=Path,
+    metavar="OUTPUT",
+    help="output directory"
+)
+GROUP.add_argument(
+    "--videos",
+    # TODO Use argparse.BooleanOptionalAction after updating to 3.9
+    action="store_const",
+    const=True,
+    help="crawl and download videos"
+)
+GROUP.add_argument(
+    "--username",
+    type=str,
+    metavar="USER_NAME",
+    help="user name for authentication"
+)
+GROUP.add_argument(
+    "--link-file-redirect-delay",
+    type=int,
+    metavar="SECONDS",
+    help="delay before external link files redirect you to their target (-1 to disable)"
+)
+GROUP.add_argument(
+    "--link-file-plaintext",
+    # TODO Use argparse.BooleanOptionalAction after updating to 3.9
+    action="store_const",
+    const=True,
+    help="use plain text files for external links"
+)
+
+
+def load(
+        args: argparse.Namespace,
+        parser: configparser.ConfigParser,
+) -> None:
+    parser["crawl:kit-ilias-web"] = {}
+    section = parser["crawl:kit-ilias-web"]
+    load_crawler(args, section)
+
+    section["type"] = "kit-ilias-web"
+    section["target"] = str(args.target)
+    section["output_dir"] = str(args.output)
+    section["auth"] = "auth:kit-ilias-web"
+    if args.link_file_redirect_delay is not None:
+        section["link_file_redirect_delay"] = str(args.link_file_redirect_delay)
+    if args.link_file_plaintext is not None:
+        section["link_file_plaintext"] = str(args.link_file_plaintext)
+    if args.videos is not None:
+        section["videos"] = str(False)
+
+    parser["auth:kit-ilias-web"] = {}
+    auth_section = parser["auth:kit-ilias-web"]
+    auth_section["type"] = "simple"
+
+    if args.username is not None:
+        auth_section["username"] = str(args.username)
+
+
+SUBPARSER.set_defaults(command=load)

From 245c9c3dcc11d03395432efc831f2a42d2c28214 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 23 May 2021 16:22:14 +0200
Subject: [PATCH 199/524] Explain output dir decisions and steps

---
 PFERD/output_dir.py | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 1f83de6..c81b598 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -174,9 +174,12 @@ class OutputDirectory:
         # since we know that the remote is different from the local files. This
         # includes the case where no local file exists.
         if not local_path.is_file():
+            log.explain("No corresponding file present locally")
             # TODO Don't download if on_conflict is LOCAL_FIRST or NO_DELETE
             return True
 
+        log.explain(f"Redownload policy is {redownload.value}")
+
         if redownload == Redownload.NEVER:
             return False
         elif redownload == Redownload.ALWAYS:
@@ -187,6 +190,10 @@ class OutputDirectory:
         remote_newer = None
         if mtime := heuristics.mtime:
             remote_newer = mtime.timestamp() > stat.st_mtime
+            if remote_newer:
+                log.explain("Remote file seems to be newer")
+            else:
+                log.explain("Local file seems to be newer")
 
         if redownload == Redownload.NEVER_SMART:
             if remote_newer is None:
@@ -332,19 +339,25 @@ class OutputDirectory:
 
         # Detect and solve local-dir-remote-file conflict
         if local_path.is_dir():
+            log.explain("Conflict: There's a dir in place of the local file")
             if await self._conflict_ldrf(on_conflict, path):
+                log.explain("Result: Delete the dir")
                 shutil.rmtree(local_path)
             else:
+                log.explain("Result: Keep the dir")
                 return None
 
         # Detect and solve local-file-remote-dir conflict
         for parent in path.parents:
             local_parent = self.resolve(parent)
             if local_parent.exists() and not local_parent.is_dir():
+                log.explain("Conflict: One of the local file's parents is a file")
                 if await self._conflict_lfrd(on_conflict, path, parent):
+                    log.explain("Result: Delete the obstructing file")
                     local_parent.unlink()
                     break
                 else:
+                    log.explain("Result: Keep the obstructing file")
                     return None
 
         # Ensure parent directory exists
@@ -366,9 +379,12 @@ class OutputDirectory:
 
     async def _after_download(self, info: DownloadInfo) -> None:
         with self._ensure_deleted(info.tmp_path):
+            log.explain_topic(f"Processing downloaded file for {fmt_path(info.path)}")
+
             changed = False
 
             if not info.success:
+                log.explain("Download unsuccessful, aborting")
                 return
 
             # Solve conflicts arising from existing local file
@@ -376,13 +392,21 @@ class OutputDirectory:
                 changed = True
 
                 if filecmp.cmp(info.local_path, info.tmp_path):
+                    log.explain("Contents identical with existing file")
+                    log.explain("Updating metadata on existing file instead")
                     self._update_metadata(info)
                     return
 
-                if not await self._conflict_lfrf(info.on_conflict, info.path):
+                log.explain("Conflict: The local and remote versions differ")
+                if await self._conflict_lfrf(info.on_conflict, info.path):
+                    log.explain("Result: Keeping the remote version")
+                else:
+                    log.explain("Result: Keeping the local version")
                     return
 
+            log.explain("Replacing local file with temporary file")
             info.tmp_path.replace(info.local_path)
+            log.explain("Updating metadata on local file")
             self._update_metadata(info)
 
             if changed:

From b998339002ee93d69f6a1acb8372abcf9ab6c1e1 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 23 May 2021 16:22:38 +0200
Subject: [PATCH 200/524] Fix cleanup logging of paths

---
 PFERD/output_dir.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index c81b598..bf908f8 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -443,7 +443,7 @@ class OutputDirectory:
         if await self._conflict_delete_lf(self._on_conflict, pure):
             try:
                 path.unlink()
-                log.action(f"[bold bright_magenta]Deleted[/] {escape(fmt_path(path))}")
+                log.action(f"[bold bright_magenta]Deleted[/] {escape(fmt_path(pure))}")
                 self._report.delete_file(pure)
             except OSError:
                 pass

From c0cecf8363eb296dad5a04e2c2685d4f5a2080b2 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 23 May 2021 16:22:58 +0200
Subject: [PATCH 201/524] Log crawl and download actions more extensively

---
 PFERD/crawler.py    | 29 ++++++++++++++++-------------
 PFERD/output_dir.py |  8 +++++++-
 2 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index c1184c0..749510c 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -90,31 +90,38 @@ def anoncritical(f: AWrapped) -> AWrapped:
 
 
 class CrawlToken(ReusableAsyncContextManager[ProgressBar]):
-    def __init__(self, limiter: Limiter, desc: str):
+    def __init__(self, limiter: Limiter, path: PurePath):
         super().__init__()
 
         self._limiter = limiter
-        self._desc = desc
+        self._path = path
 
     async def _on_aenter(self) -> ProgressBar:
+        bar_desc = f"[bold bright_cyan]Crawling[/] {escape(fmt_path(self._path))}"
+        after_desc = f"[bold bright_cyan]Crawled[/] {escape(fmt_path(self._path))}"
+
+        self._stack.callback(lambda: log.action(after_desc))
         await self._stack.enter_async_context(self._limiter.limit_crawl())
-        bar = self._stack.enter_context(log.crawl_bar(self._desc))
+        bar = self._stack.enter_context(log.crawl_bar(bar_desc))
 
         return bar
 
 
 class DownloadToken(ReusableAsyncContextManager[Tuple[ProgressBar, FileSink]]):
-    def __init__(self, limiter: Limiter, fs_token: FileSinkToken, desc: str):
+    def __init__(self, limiter: Limiter, fs_token: FileSinkToken, path: PurePath):
         super().__init__()
 
         self._limiter = limiter
         self._fs_token = fs_token
-        self._desc = desc
+        self._path = path
 
     async def _on_aenter(self) -> Tuple[ProgressBar, FileSink]:
+        bar_desc = f"[bold bright_cyan]Downloading[/] {escape(fmt_path(self._path))}"
+        # The "Downloaded ..." message is printed in the output dir, not here
+
         await self._stack.enter_async_context(self._limiter.limit_crawl())
         sink = await self._stack.enter_async_context(self._fs_token)
-        bar = self._stack.enter_context(log.crawl_bar(self._desc))
+        bar = self._stack.enter_context(log.crawl_bar(bar_desc))
 
         return bar, sink
 
@@ -229,9 +236,7 @@ class Crawler(ABC):
             return None
 
         log.explain("Answer: Yes")
-
-        desc = f"[bold bright_cyan]Crawling[/] {escape(fmt_path(path))}"
-        return CrawlToken(self._limiter, desc)
+        return CrawlToken(self._limiter, path)
 
     async def download(
             self,
@@ -247,15 +252,13 @@ class Crawler(ABC):
             log.explain("Answer: No")
             return None
 
-        fs_token = await self._output_dir.download(transformed_path, mtime, redownload, on_conflict)
+        fs_token = await self._output_dir.download(path, transformed_path, mtime, redownload, on_conflict)
         if fs_token is None:
             log.explain("Answer: No")
             return None
 
         log.explain("Answer: Yes")
-
-        desc = f"[bold bright_cyan]Downloading[/] {escape(fmt_path(path))}"
-        return DownloadToken(self._limiter, fs_token, desc)
+        return DownloadToken(self._limiter, fs_token, path)
 
     async def _cleanup(self) -> None:
         log.explain_topic("Decision: Clean up files")
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index bf908f8..b07fe3e 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -78,6 +78,7 @@ class FileSink:
 
 @dataclass
 class DownloadInfo:
+    remote_path: PurePath
     path: PurePath
     local_path: Path
     tmp_path: Path
@@ -96,6 +97,7 @@ class FileSinkToken(ReusableAsyncContextManager[FileSink]):
     def __init__(
             self,
             output_dir: "OutputDirectory",
+            remote_path: PurePath,
             path: PurePath,
             local_path: Path,
             heuristics: Heuristics,
@@ -104,6 +106,7 @@ class FileSinkToken(ReusableAsyncContextManager[FileSink]):
         super().__init__()
 
         self._output_dir = output_dir
+        self._remote_path = remote_path
         self._path = path
         self._local_path = local_path
         self._heuristics = heuristics
@@ -115,6 +118,7 @@ class FileSinkToken(ReusableAsyncContextManager[FileSink]):
 
         async def after_download() -> None:
             await self._output_dir._after_download(DownloadInfo(
+                self._remote_path,
                 self._path,
                 self._local_path,
                 tmp_path,
@@ -317,6 +321,7 @@ class OutputDirectory:
 
     async def download(
             self,
+            remote_path: PurePath,
             path: PurePath,
             mtime: Optional[datetime] = None,
             redownload: Optional[Redownload] = None,
@@ -363,7 +368,7 @@ class OutputDirectory:
         # Ensure parent directory exists
         local_path.parent.mkdir(parents=True, exist_ok=True)
 
-        return FileSinkToken(self, path, local_path, heuristics, on_conflict)
+        return FileSinkToken(self, remote_path, path, local_path, heuristics, on_conflict)
 
     def _update_metadata(self, info: DownloadInfo) -> None:
         if mtime := info.heuristics.mtime:
@@ -379,6 +384,7 @@ class OutputDirectory:
 
     async def _after_download(self, info: DownloadInfo) -> None:
         with self._ensure_deleted(info.tmp_path):
+            log.action(f"[bold bright_cyan]Downloaded[/] {fmt_path(info.remote_path)}")
             log.explain_topic(f"Processing downloaded file for {fmt_path(info.path)}")
 
             changed = False

From 29d5a40c570ac21b7bd73fee64134e6c79216301 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 23 May 2021 17:25:16 +0200
Subject: [PATCH 202/524] Replace asyncio.gather with custom Crawler function

---
 PFERD/crawler.py                              | 22 ++++++++++++++++++-
 PFERD/crawlers/ilias/kit_ilias_web_crawler.py |  5 ++---
 PFERD/crawlers/local.py                       |  2 +-
 3 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index 749510c..e3aef8f 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -1,7 +1,8 @@
+import asyncio
 from abc import ABC, abstractmethod
 from datetime import datetime
 from pathlib import Path, PurePath
-from typing import Any, Awaitable, Callable, Dict, Optional, Tuple, TypeVar
+from typing import Any, Awaitable, Callable, Dict, List, Optional, Sequence, Tuple, TypeVar
 
 from rich.markup import escape
 
@@ -228,6 +229,25 @@ class Crawler(ABC):
             section.on_conflict(),
         )
 
+    @staticmethod
+    async def gather(awaitables: Sequence[Awaitable[Any]]) -> List[Any]:
+        """
+        Similar to asyncio.gather. However, in the case of an exception, all
+        still running tasks are cancelled and the exception is rethrown.
+
+        This should always be preferred over asyncio.gather in crawler code so
+        that an exception like CrawlError may actually stop the crawler.
+        """
+
+        tasks = [asyncio.ensure_future(aw) for aw in awaitables]
+        result = asyncio.gather(*tasks)
+        try:
+            return await result
+        except:  # noqa: E722
+            for task in tasks:
+                task.cancel()
+            raise
+
     async def crawl(self, path: PurePath) -> Optional[CrawlToken]:
         log.explain_topic(f"Decision: Crawl {fmt_path(path)}")
 
diff --git a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
index 9094a7b..597ea17 100644
--- a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
@@ -1,4 +1,3 @@
-import asyncio
 import re
 from pathlib import PurePath
 from typing import Any, Awaitable, Callable, Dict, Optional, Set, TypeVar, Union
@@ -215,7 +214,7 @@ class KitIliasWebCrawler(HttpCrawler):
             # this method without having spawned a single task. Due to this we do
             # not need to cancel anything or worry about this gather call or the forks
             # further up.
-            await asyncio.gather(*tasks)
+            await self.gather(tasks)
 
         await impl()
 
@@ -240,7 +239,7 @@ class KitIliasWebCrawler(HttpCrawler):
             # this method without having spawned a single task. Due to this we do
             # not need to cancel anything or worry about this gather call or the forks
             # further up.
-            await asyncio.gather(*tasks)
+            await self.gather(tasks)
 
         await impl()
 
diff --git a/PFERD/crawlers/local.py b/PFERD/crawlers/local.py
index 176f36d..35e5829 100644
--- a/PFERD/crawlers/local.py
+++ b/PFERD/crawlers/local.py
@@ -83,7 +83,7 @@ class LocalCrawler(Crawler):
                 pure_child = pure / child.name
                 tasks.append(self._crawl_path(child, pure_child))
 
-        await asyncio.gather(*tasks)
+        await self.gather(tasks)
 
     async def _crawl_file(self, path: Path, pure: PurePath) -> None:
         stat = path.stat()

From 05ad06fbc11237582782ca68e587f55b683f2493 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 23 May 2021 17:24:05 +0200
Subject: [PATCH 203/524] Only enclose get_page in iorepeat in ILIAS crawler

We previously also gathered in there, which could lead to some more
surprises when the method was retried.
---
 PFERD/crawlers/ilias/kit_ilias_web_crawler.py | 51 +++++++++----------
 1 file changed, 24 insertions(+), 27 deletions(-)

diff --git a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
index 597ea17..1019d3e 100644
--- a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
@@ -1,6 +1,6 @@
 import re
 from pathlib import PurePath
-from typing import Any, Awaitable, Callable, Dict, Optional, Set, TypeVar, Union
+from typing import Any, Awaitable, Callable, Dict, List, Optional, Set, TypeVar, Union
 
 import aiohttp
 from aiohttp import hdrs
@@ -192,10 +192,11 @@ class KitIliasWebCrawler(HttpCrawler):
             return
         cl = maybe_cl  # Not mypy's fault, but explained here: https://github.com/python/mypy/issues/2608
 
-        @_iorepeat(3, "crawling url")
-        async def impl() -> None:
-            tasks = []
+        elements: List[IliasPageElement] = []
 
+        @_iorepeat(3, "crawling url")
+        async def gather_elements() -> None:
+            elements.clear()
             async with cl:
                 soup = await self._get_page(url)
 
@@ -204,19 +205,16 @@ class KitIliasWebCrawler(HttpCrawler):
                     if not perma_link_element or "crs_" not in perma_link_element.get("value"):
                         raise CrawlError("Invalid course id? Didn't find anything looking like a course")
 
-                # Duplicated code, but the root page is special - we want to void fetching it twice!
+                # Duplicated code, but the root page is special - we want to avoid fetching it twice!
                 page = IliasPage(soup, url, None)
-                for child in page.get_child_elements():
-                    tasks.append(self._handle_ilias_element(PurePath("."), child))
+                elements.extend(page.get_child_elements())
 
-            # The only point an I/O exception can be thrown is in `get_page`.
-            # If that happens, no task was spawned yet. Therefore, we only retry
-            # this method without having spawned a single task. Due to this we do
-            # not need to cancel anything or worry about this gather call or the forks
-            # further up.
-            await self.gather(tasks)
+        # Fill up our task list with the found elements
+        await gather_elements()
+        tasks = [self._handle_ilias_element(PurePath("."), element) for element in elements]
 
-        await impl()
+        # And execute them
+        await self.gather(tasks)
 
     async def _handle_ilias_page(self, url: str, parent: IliasPageElement, path: PurePath) -> None:
         maybe_cl = await self.crawl(path)
@@ -224,28 +222,27 @@ class KitIliasWebCrawler(HttpCrawler):
             return
         cl = maybe_cl  # Not mypy's fault, but explained here: https://github.com/python/mypy/issues/2608
 
+        elements: List[IliasPageElement] = []
+
         @_iorepeat(3, "crawling folder")
-        async def impl() -> None:
-            tasks = []
+        async def gather_elements() -> None:
+            elements.clear()
             async with cl:
                 soup = await self._get_page(url)
                 page = IliasPage(soup, url, parent)
 
-                for child in page.get_child_elements():
-                    tasks.append(self._handle_ilias_element(path, child))
+                elements.extend(page.get_child_elements())
 
-            # The only point an I/O exception can be thrown is in `get_page`.
-            # If that happens, no task was spawned yet. Therefore, we only retry
-            # this method without having spawned a single task. Due to this we do
-            # not need to cancel anything or worry about this gather call or the forks
-            # further up.
-            await self.gather(tasks)
+        # Fill up our task list with the found elements
+        await gather_elements()
+        tasks = [self._handle_ilias_element(PurePath("."), element) for element in elements]
 
-        await impl()
+        # And execute them
+        await self.gather(tasks)
 
     @anoncritical
-    # Shouldn't happen but this method must never raise an I/O error as that might interfere with
-    # handle_ilias_page or crawl_url
+    # Shouldn't happen but we also really don't want to let I/O errors bubble up to anoncritical.
+    # If that happens we will be terminated as anoncritical doesn't tream them as non-critical.
     @_wrap_io_in_warning("handling ilias element")
     async def _handle_ilias_element(self, parent_path: PurePath, element: IliasPageElement) -> None:
         element_path = PurePath(parent_path, element.name)

From 463f8830d736e7b510c282f125eef65533d4b804 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 23 May 2021 18:12:34 +0200
Subject: [PATCH 204/524] Add warn_contd

---
 PFERD/logging.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/PFERD/logging.py b/PFERD/logging.py
index ef6ee4c..e97a3fa 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -130,6 +130,13 @@ class Log:
 
         self.print(f"[bold bright_red]Warning[/] {escape(text)}")
 
+    def warn_contd(self, text: str) -> None:
+        """
+        Print further lines of a warning message. Allows no markup.
+        """
+
+        self.print(f"{escape(text)}")
+
     def error(self, text: str) -> None:
         """
         Print an error message. Allows no markup.

From 59f13bb8d6f2616705fb5d0e3db7e80a46560b61 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 23 May 2021 18:12:51 +0200
Subject: [PATCH 205/524] Explain ILIAS HTML parsing and add some warnings

---
 PFERD/crawlers/ilias/kit_ilias_html.py        | 58 +++++++++++--------
 PFERD/crawlers/ilias/kit_ilias_web_crawler.py |  2 +
 2 files changed, 37 insertions(+), 23 deletions(-)

diff --git a/PFERD/crawlers/ilias/kit_ilias_html.py b/PFERD/crawlers/ilias/kit_ilias_html.py
index eed0884..cc02801 100644
--- a/PFERD/crawlers/ilias/kit_ilias_html.py
+++ b/PFERD/crawlers/ilias/kit_ilias_html.py
@@ -8,6 +8,7 @@ from urllib.parse import urljoin, urlparse
 
 from bs4 import BeautifulSoup, Tag
 
+from PFERD.logging import log
 from PFERD.utils import url_set_query_params
 
 TargetType = Union[str, int]
@@ -48,11 +49,15 @@ class IliasPage:
         Return all child page elements you can find here.
         """
         if self._is_video_player():
+            log.explain("Page is a video player, extracting URL")
             return self._player_to_video()
         if self._is_video_listing():
+            log.explain("Page is a video listing, finding elements")
             return self._find_video_entries()
         if self._is_exercise_file():
+            log.explain("Page is an exercise, finding elements")
             return self._find_exercise_entries()
+        log.explain("Page is a normal folder, finding elements")
         return self._find_normal_entries()
 
     def _is_video_player(self) -> bool:
@@ -96,7 +101,7 @@ class IliasPage:
         json_match = regex.search(str(self._soup))
 
         if json_match is None:
-            print(f"Could not find json stream info for {self._page_url!r}")
+            log.warn("Could not find JSON stream info in video player. Ignoring video.")
             return []
         json_str = json_match.group(1)
 
@@ -125,6 +130,7 @@ class IliasPage:
             url: str = self._abs_url_from_link(content_link)
             query_params = {"limit": "800", "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
             url = url_set_query_params(url, query_params)
+            log.explain("Found ILIAS redirection page, following it as a new entry")
             return [IliasPageElement(IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED, url, "")]
 
         is_paginated = self._soup.find(id=re.compile(r"tab_page_sel.+")) is not None
@@ -139,20 +145,12 @@ class IliasPage:
         table_element: Tag = self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
 
         if table_element is None:
-            # TODO: Properly log this
-            print(
-                "Could not increase elements per page (table not found)."
-                " Some might not be crawled!"
-            )
+            log.warn("Couldn't increase elements per page (table not found). I might miss elements.")
             return self._find_video_entries_no_paging()
 
         id_match = re.match(r"tbl_xoct_(.+)", table_element.attrs["id"])
         if id_match is None:
-            # TODO: Properly log this
-            print(
-                "Could not increase elements per page (table id not found)."
-                " Some might not be crawled!"
-            )
+            log.warn("Couldn't increase elements per page (table id not found). I might miss elements.")
             return self._find_video_entries_no_paging()
 
         table_id = id_match.group(1)
@@ -160,6 +158,8 @@ class IliasPage:
         query_params = {f"tbl_xoct_{table_id}_trows": "800",
                         "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
         url = url_set_query_params(self._page_url, query_params)
+
+        log.explain("Disabled pagination, retrying folder as a new entry")
         return [IliasPageElement(IliasElementType.VIDEO_FOLDER, url, "")]
 
     def _find_video_entries_no_paging(self) -> List[IliasPageElement]:
@@ -173,7 +173,6 @@ class IliasPage:
 
         results: List[IliasPageElement] = []
 
-        # TODO: Sadly the download button is currently broken, so never do that
         for link in video_links:
             results.append(self._listed_video_to_element(link))
 
@@ -194,6 +193,7 @@ class IliasPage:
 
         video_url = self._abs_url_from_link(link)
 
+        log.explain(f"Found video {video_name!r} at {video_url!r}")
         return IliasPageElement(IliasElementType.VIDEO_PLAYER, video_url, video_name, modification_time)
 
     def _find_exercise_entries(self) -> List[IliasPageElement]:
@@ -213,6 +213,8 @@ class IliasPage:
                 text="Download"
             )
 
+            log.explain(f"Found exercise container {container_name!r}")
+
             # Grab each file as you now have the link
             for file_link in files:
                 # Two divs, side by side. Left is the name, right is the link ==> get left
@@ -221,6 +223,7 @@ class IliasPage:
                 file_name = _sanitize_path_name(file_name)
                 url = self._abs_url_from_link(file_link)
 
+                log.explain(f"Found exercise entry {file_name!r}")
                 results.append(IliasPageElement(
                     IliasElementType.FILE,
                     url,
@@ -245,11 +248,14 @@ class IliasPage:
             if not element_type:
                 continue
             if element_type == IliasElementType.MEETING:
-                element_name = _sanitize_path_name(self._normalize_meeting_name(element_name))
+                normalized = _sanitize_path_name(self._normalize_meeting_name(element_name))
+                log.explain(f"Normalized meeting name from {element_name!r} to {normalized!r}")
+                element_name = normalized
             elif element_type == IliasElementType.FILE:
                 result.append(self._file_to_element(element_name, abs_url, link))
                 continue
 
+            log.explain(f"Found {element_name!r}")
             result.append(IliasPageElement(element_type, abs_url, element_name, description=description))
 
         return result
@@ -282,8 +288,8 @@ class IliasPage:
         )
         if modification_date_match is None:
             modification_date = None
-            # TODO: Properly log this
-            print(f"Could not extract start date from {all_properties_text!r}")
+            # TODO: Figure out if this is expected or *always* an error.
+            log.explain(f"Element {name} at {url} has no date. Properties: {all_properties_text!r}")
         else:
             modification_date_str = modification_date_match.group(1)
             modification_date = demangle_date(modification_date_str)
@@ -292,6 +298,7 @@ class IliasPage:
         name = _sanitize_path_name(link_element.getText())
         full_path = name + "." + file_type
 
+        log.explain(f"Found file {full_path!r}")
         return IliasPageElement(IliasElementType.FILE, url, full_path, modification_date)
 
     @staticmethod
@@ -318,8 +325,10 @@ class IliasPage:
         if "ref_id=" in parsed_url.query:
             return IliasPage._find_type_from_folder_like(link_element, url)
 
-        # TODO: Log this properly
-        print(f"Unknown type: The element was at {str(element_name)!r} and it is {link_element!r})")
+        _unexpected_html_warning()
+        log.warn_contd(
+            f"Tried to figure out element type, but failed for {str(element_name)!r} / {link_element!r})"
+        )
         return None
 
     @staticmethod
@@ -339,16 +348,16 @@ class IliasPage:
                 break
 
         if found_parent is None:
-            # TODO: Log this properly
-            print(f"Could not find element icon for {url!r}")
+            _unexpected_html_warning()
+            log.warn_contd(f"Tried to figure out element type, but did not find an icon for {url!r}")
             return None
 
         # Find the small descriptive icon to figure out the type
         img_tag: Optional[Tag] = found_parent.select_one("img.ilListItemIcon")
 
         if img_tag is None:
-            # TODO: Log this properly
-            print(f"Could not find image tag for {url!r}")
+            _unexpected_html_warning()
+            log.warn_contd(f"Tried to figure out element type, but did not find an image for {url!r}")
             return None
 
         if "opencast" in str(img_tag["alt"]).lower():
@@ -393,6 +402,10 @@ class IliasPage:
         return urljoin(self._page_url, link_tag.get("href"))
 
 
+def _unexpected_html_warning() -> None:
+    log.warn("Encountered unexpected HTML structure, ignoring element.")
+
+
 german_months = ['Jan', 'Feb', 'Mär', 'Apr', 'Mai', 'Jun', 'Jul', 'Aug', 'Sep', 'Okt', 'Nov', 'Dez']
 english_months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
 
@@ -429,8 +442,7 @@ def demangle_date(date_str: str) -> Optional[datetime]:
 
         return datetime(year, month, day, hour, minute)
     except Exception:
-        # TODO: Properly log this
-        print(f"Could not parse date {date_str!r}")
+        log.warn(f"Date parsing failed for {date_str!r}")
         return None
 
 
diff --git a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
index 1019d3e..1a4a529 100644
--- a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
@@ -229,6 +229,8 @@ class KitIliasWebCrawler(HttpCrawler):
             elements.clear()
             async with cl:
                 soup = await self._get_page(url)
+                log.explain_topic(f"Parsing HTML page for {path!r}")
+                log.explain(f"URL: {url!r}")
                 page = IliasPage(soup, url, parent)
 
                 elements.extend(page.get_child_elements())

From a9af56a5e9b5752cf7ba1180ac36c5cdf6605316 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 23 May 2021 18:16:25 +0200
Subject: [PATCH 206/524] Improve specifying crawlers via CLI

Instead of removing the sections of unselected crawlers from the config file,
crawler selection now happens in the Pferd after loading the crawlers and is
more sophisticated. It also has better error messages.
---
 PFERD/__main__.py | 24 +++--------------
 PFERD/pferd.py    | 68 ++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 64 insertions(+), 28 deletions(-)

diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index 2578487..9bc2974 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -6,7 +6,7 @@ from pathlib import Path
 from .cli import PARSER, load_default_section
 from .config import Config, ConfigDumpError, ConfigLoadError, ConfigOptionError
 from .logging import log
-from .pferd import Pferd
+from .pferd import Pferd, PferdLoadError
 from .transformer import RuleParseError
 from .version import NAME, VERSION
 
@@ -24,28 +24,10 @@ def load_config_parser(args: argparse.Namespace) -> configparser.ConfigParser:
             args.command(args, parser)
 
     load_default_section(args, parser)
-    prune_crawlers(args, parser)
 
     return parser
 
 
-def prune_crawlers(
-        args: argparse.Namespace,
-        parser: configparser.ConfigParser,
-) -> None:
-    if not args.crawler:
-        return
-
-    for section in parser.sections():
-        if section.startswith("crawl:"):
-            # TODO Use removeprefix() when switching to 3.9
-            name = section[len("crawl:"):]
-            if name not in args.crawler:
-                parser.remove_section(section)
-
-    # TODO Check if crawlers actually exist
-
-
 def load_config(args: argparse.Namespace) -> Config:
     try:
         return Config(load_config_parser(args))
@@ -119,9 +101,9 @@ def main() -> None:
         exit()
 
     try:
-        pferd = Pferd(config)
+        pferd = Pferd(config, args.crawler)
         asyncio.run(pferd.run())
-    except ConfigOptionError as e:
+    except (PferdLoadError, ConfigOptionError) as e:
         log.unlock()
         log.error(str(e))
         exit(1)
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index 4aee043..75b0e9d 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -1,4 +1,4 @@
-from typing import Dict
+from typing import Dict, List, Optional
 
 from rich.markup import escape
 
@@ -10,13 +10,22 @@ from .crawlers import CRAWLERS
 from .logging import log
 
 
+class PferdLoadError(Exception):
+    pass
+
+
 class Pferd:
-    def __init__(self, config: Config):
+    def __init__(self, config: Config, crawlers_to_run: Optional[List[str]]):
         """
-        May throw ConfigOptionError.
+        May throw PferdLoadError.
         """
 
+        if crawlers_to_run is not None and len(crawlers_to_run) != len(set(crawlers_to_run)):
+            raise PferdLoadError("Some crawlers were selected multiple times")
+
         self._config = config
+        self._crawlers_to_run = crawlers_to_run
+
         self._authenticators: Dict[str, Authenticator] = {}
         self._crawlers: Dict[str, Crawler] = {}
 
@@ -31,9 +40,13 @@ class Pferd:
             authenticator = authenticator_constructor(name, section, self._config)
             self._authenticators[name] = authenticator
 
-    def _load_crawlers(self) -> None:
+    def _load_crawlers(self) -> List[str]:
+        names = []
+
         for name, section in self._config.crawler_sections():
             log.print(f"[bold bright_cyan]Loading[/] {escape(name)}")
+            names.append(name)
+
             crawl_type = section.get("type")
             crawler_constructor = CRAWLERS.get(crawl_type)
             if crawler_constructor is None:
@@ -42,15 +55,56 @@ class Pferd:
             crawler = crawler_constructor(name, section, self._config, self._authenticators)
             self._crawlers[name] = crawler
 
+        return names
+
+    def _find_crawlers_to_run(self, loaded_crawlers: List[str]) -> List[str]:
+        log.explain_topic("Deciding which crawlers to run")
+
+        if self._crawlers_to_run is None:
+            log.explain("No crawlers specified on CLI")
+            log.explain("Running all loaded crawlers")
+            return loaded_crawlers
+
+        log.explain("Crawlers specified on CLI")
+
+        names: List[str] = []  # With 'crawl:' prefix
+        unknown_names = []  # Without 'crawl:' prefix
+
+        for name in self._crawlers_to_run:
+            section_name = f"crawl:{name}"
+            if section_name in self._crawlers:
+                log.explain(f"Found crawler section named {section_name!r}")
+                names.append(section_name)
+            else:
+                log.explain(f"There's no crawler section named {section_name!r}")
+                unknown_names.append(name)
+
+        if unknown_names:
+            if len(unknown_names) == 1:
+                [name] = unknown_names
+                raise PferdLoadError(f"There is no crawler named {name!r}")
+            else:
+                names_str = ", ".join(repr(name) for name in unknown_names)
+                raise PferdLoadError(f"There are no crawlers named {names_str}")
+
+        return names
+
     async def run(self) -> None:
+        """
+        May throw PferdLoadError or ConfigOptionError.
+        """
+
         # These two functions must run inside the same event loop as the
         # crawlers, so that any new objects (like Conditions or Futures) can
         # obtain the correct event loop.
         self._load_authenticators()
-        self._load_crawlers()
+        loaded_crawlers = self._load_crawlers()
+
+        log.print("")
+
+        for name in self._find_crawlers_to_run(loaded_crawlers):
+            crawler = self._crawlers[name]
 
-        for name, crawler in self._crawlers.items():
-            log.print("")
             log.print(f"[bold bright_cyan]Running[/] {escape(name)}")
 
             try:

From 79efdb56f7b5e1e3638bda4896cdb6d9df73a690 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 23 May 2021 18:22:29 +0200
Subject: [PATCH 207/524] Adjust ILIAS html explain messages

---
 PFERD/crawlers/ilias/kit_ilias_html.py        | 6 +++---
 PFERD/crawlers/ilias/kit_ilias_web_crawler.py | 6 ++++--
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/PFERD/crawlers/ilias/kit_ilias_html.py b/PFERD/crawlers/ilias/kit_ilias_html.py
index cc02801..636fa68 100644
--- a/PFERD/crawlers/ilias/kit_ilias_html.py
+++ b/PFERD/crawlers/ilias/kit_ilias_html.py
@@ -52,12 +52,12 @@ class IliasPage:
             log.explain("Page is a video player, extracting URL")
             return self._player_to_video()
         if self._is_video_listing():
-            log.explain("Page is a video listing, finding elements")
+            log.explain("Page is a video listing, searching for elements")
             return self._find_video_entries()
         if self._is_exercise_file():
-            log.explain("Page is an exercise, finding elements")
+            log.explain("Page is an exercise, searching for elements")
             return self._find_exercise_entries()
-        log.explain("Page is a normal folder, finding elements")
+        log.explain("Page is a normal folder, searching for elements")
         return self._find_normal_entries()
 
     def _is_video_player(self) -> bool:
diff --git a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
index 1a4a529..41633d2 100644
--- a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
@@ -206,6 +206,8 @@ class KitIliasWebCrawler(HttpCrawler):
                         raise CrawlError("Invalid course id? Didn't find anything looking like a course")
 
                 # Duplicated code, but the root page is special - we want to avoid fetching it twice!
+                log.explain_topic("Parsing root HTML page")
+                log.explain(f"URL: {url}")
                 page = IliasPage(soup, url, None)
                 elements.extend(page.get_child_elements())
 
@@ -229,8 +231,8 @@ class KitIliasWebCrawler(HttpCrawler):
             elements.clear()
             async with cl:
                 soup = await self._get_page(url)
-                log.explain_topic(f"Parsing HTML page for {path!r}")
-                log.explain(f"URL: {url!r}")
+                log.explain_topic(f"Parsing HTML page for {fmt_path(path)}")
+                log.explain(f"URL: {url}")
                 page = IliasPage(soup, url, parent)
 
                 elements.extend(page.get_child_elements())

From d97d6bf147903b245b61f17254674ac6b53d7061 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 23 May 2021 18:29:28 +0200
Subject: [PATCH 208/524] Fix handling nested ILIAS folders

---
 PFERD/crawlers/ilias/kit_ilias_web_crawler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
index 41633d2..7e1562c 100644
--- a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
@@ -239,7 +239,7 @@ class KitIliasWebCrawler(HttpCrawler):
 
         # Fill up our task list with the found elements
         await gather_elements()
-        tasks = [self._handle_ilias_element(PurePath("."), element) for element in elements]
+        tasks = [self._handle_ilias_element(path, element) for element in elements]
 
         # And execute them
         await self.gather(tasks)

From 445dffc987e7dd62913bf8e941c01464a07f7577 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 23 May 2021 18:35:32 +0200
Subject: [PATCH 209/524] Reword some explanations

---
 PFERD/output_dir.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index b07fe3e..b850a03 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -197,7 +197,7 @@ class OutputDirectory:
             if remote_newer:
                 log.explain("Remote file seems to be newer")
             else:
-                log.explain("Local file seems to be newer")
+                log.explain("Remote file doesn't seem to be newer")
 
         if redownload == Redownload.NEVER_SMART:
             if remote_newer is None:
@@ -344,12 +344,12 @@ class OutputDirectory:
 
         # Detect and solve local-dir-remote-file conflict
         if local_path.is_dir():
-            log.explain("Conflict: There's a dir in place of the local file")
+            log.explain("Conflict: There's a directory in place of the local file")
             if await self._conflict_ldrf(on_conflict, path):
-                log.explain("Result: Delete the dir")
+                log.explain("Result: Delete the obstructing directory")
                 shutil.rmtree(local_path)
             else:
-                log.explain("Result: Keep the dir")
+                log.explain("Result: Keep the obstructing directory")
                 return None
 
         # Detect and solve local-file-remote-dir conflict
@@ -399,20 +399,19 @@ class OutputDirectory:
 
                 if filecmp.cmp(info.local_path, info.tmp_path):
                     log.explain("Contents identical with existing file")
-                    log.explain("Updating metadata on existing file instead")
+                    log.explain("Updating metadata of existing file")
                     self._update_metadata(info)
                     return
 
                 log.explain("Conflict: The local and remote versions differ")
                 if await self._conflict_lfrf(info.on_conflict, info.path):
-                    log.explain("Result: Keeping the remote version")
+                    log.explain("Result: Replacing local with remote version")
                 else:
-                    log.explain("Result: Keeping the local version")
+                    log.explain("Result: Keeping local version")
                     return
 
-            log.explain("Replacing local file with temporary file")
             info.tmp_path.replace(info.local_path)
-            log.explain("Updating metadata on local file")
+            log.explain("Updating file metadata")
             self._update_metadata(info)
 
             if changed:

From 74c7b39dc85b4d2693503cd33d2021aea86b0137 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 23 May 2021 18:39:25 +0200
Subject: [PATCH 210/524] Clean up files in alphabetical order

---
 PFERD/output_dir.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index b850a03..8d1c6b1 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -431,7 +431,7 @@ class OutputDirectory:
             await self._cleanup_file(path, pure)
 
     async def _cleanup_dir(self, path: Path, pure: PurePath, delete_self: bool = True) -> None:
-        for child in path.iterdir():
+        for child in sorted(path.iterdir()):
             pure_child = pure / child.name
             await self._cleanup(child, pure_child)
 

From e4e5e83be62c92ebbade8c8b7ea077c171f21e7f Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 23 May 2021 18:39:43 +0200
Subject: [PATCH 211/524] Fix downloader using crawl bar

Looks like I made a dumb copy-paste error. Now the download bar shows the proper
progress and speed again.
---
 PFERD/crawler.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/PFERD/crawler.py b/PFERD/crawler.py
index e3aef8f..e73ce72 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawler.py
@@ -120,9 +120,9 @@ class DownloadToken(ReusableAsyncContextManager[Tuple[ProgressBar, FileSink]]):
         bar_desc = f"[bold bright_cyan]Downloading[/] {escape(fmt_path(self._path))}"
         # The "Downloaded ..." message is printed in the output dir, not here
 
-        await self._stack.enter_async_context(self._limiter.limit_crawl())
+        await self._stack.enter_async_context(self._limiter.limit_download())
         sink = await self._stack.enter_async_context(self._fs_token)
-        bar = self._stack.enter_context(log.crawl_bar(bar_desc))
+        bar = self._stack.enter_context(log.download_bar(bar_desc))
 
         return bar, sink
 

From 5edd868d5bbd44b7c8acc3e31db1adfc5a91dae8 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 23 May 2021 18:49:34 +0200
Subject: [PATCH 212/524] Fix always-smart redownloading the wrong files

---
 PFERD/output_dir.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 8d1c6b1..06cfe6b 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -208,7 +208,7 @@ class OutputDirectory:
             if remote_newer is None:
                 return True
             else:
-                return not remote_newer
+                return remote_newer
 
         # This should never be reached
         raise ValueError(f"{redownload!r} is not a valid redownload policy")

From 37f8d84a9c1fcadeee684f08ae8d3036f5a82213 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 23 May 2021 19:00:01 +0200
Subject: [PATCH 213/524] Output total amount of http requests in HTTP Crawler

---
 PFERD/http_crawler.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/PFERD/http_crawler.py b/PFERD/http_crawler.py
index adbac5d..c6e679d 100644
--- a/PFERD/http_crawler.py
+++ b/PFERD/http_crawler.py
@@ -27,6 +27,7 @@ class HttpCrawler(Crawler):
         self._authentication_id = 0
         self._authentication_lock = asyncio.Lock()
         self._current_cookie_jar: Optional[aiohttp.CookieJar] = None
+        self._request_count = 0
 
     async def _current_auth_id(self) -> int:
         """
@@ -41,6 +42,7 @@ class HttpCrawler(Crawler):
         # This should reduce the amount of requests we make: If an authentication is in progress
         # all future requests wait for authentication to complete.
         async with self._authentication_lock:
+            self._request_count += 1
             return self._authentication_id
 
     async def authenticate(self, caller_auth_id: int) -> None:
@@ -85,6 +87,7 @@ class HttpCrawler(Crawler):
 
     async def run(self) -> None:
         self._current_cookie_jar = aiohttp.CookieJar()
+        self._request_count = 0
 
         try:
             self._current_cookie_jar.load(self._cookie_jar_path)
@@ -100,6 +103,7 @@ class HttpCrawler(Crawler):
                 await super().run()
             finally:
                 del self.session
+        log.explain_topic(f"Total amount of HTTP requests: {self._request_count}")
 
         # They are saved in authenticate, but a final save won't hurt
         await self._save_cookies()

From bbf9f8f130f8087604548e1716fc94bef22dc5a8 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 23 May 2021 19:05:56 +0200
Subject: [PATCH 214/524] Add -C as alias for --crawler

---
 PFERD/cli/parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/cli/parser.py b/PFERD/cli/parser.py
index 71d9732..bd62b6e 100644
--- a/PFERD/cli/parser.py
+++ b/PFERD/cli/parser.py
@@ -88,7 +88,7 @@ PARSER.add_argument(
     " Uses default config file path if no path is specified"
 )
 PARSER.add_argument(
-    "--crawler",
+    "--crawler", "-C",
     action="append",
     type=str,
     metavar="NAME",

From 2fdf24495b1655feb7a2e6a2ef349d19e3442ef3 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 23 May 2021 19:16:42 +0200
Subject: [PATCH 215/524] Restructure crawling and auth related modules

---
 PFERD/{authenticators => auth}/__init__.py       |  2 +-
 PFERD/{ => auth}/authenticator.py                |  2 +-
 PFERD/{authenticators => auth}/simple.py         |  2 +-
 PFERD/{authenticators => auth}/tfa.py            |  2 +-
 PFERD/{crawlers => crawl}/__init__.py            |  6 +++---
 PFERD/{ => crawl}/crawler.py                     | 16 ++++++++--------
 PFERD/{ => crawl}/http_crawler.py                |  8 ++++----
 PFERD/{crawlers => crawl}/ilias/__init__.py      |  0
 .../{crawlers => crawl}/ilias/file_templates.py  |  0
 .../{crawlers => crawl}/ilias/kit_ilias_html.py  |  0
 .../ilias/kit_ilias_web_crawler.py               | 16 +++++++---------
 .../local.py => crawl/local_crawler.py}          |  2 +-
 PFERD/pferd.py                                   |  6 ++----
 13 files changed, 29 insertions(+), 33 deletions(-)
 rename PFERD/{authenticators => auth}/__init__.py (91%)
 rename PFERD/{ => auth}/authenticator.py (98%)
 rename PFERD/{authenticators => auth}/simple.py (96%)
 rename PFERD/{authenticators => auth}/tfa.py (93%)
 rename PFERD/{crawlers => crawl}/__init__.py (82%)
 rename PFERD/{ => crawl}/crawler.py (96%)
 rename PFERD/{ => crawl}/http_crawler.py (97%)
 rename PFERD/{crawlers => crawl}/ilias/__init__.py (100%)
 rename PFERD/{crawlers => crawl}/ilias/file_templates.py (100%)
 rename PFERD/{crawlers => crawl}/ilias/kit_ilias_html.py (100%)
 rename PFERD/{crawlers => crawl}/ilias/kit_ilias_web_crawler.py (98%)
 rename PFERD/{crawlers/local.py => crawl/local_crawler.py} (98%)

diff --git a/PFERD/authenticators/__init__.py b/PFERD/auth/__init__.py
similarity index 91%
rename from PFERD/authenticators/__init__.py
rename to PFERD/auth/__init__.py
index 35096cf..6247e2b 100644
--- a/PFERD/authenticators/__init__.py
+++ b/PFERD/auth/__init__.py
@@ -1,8 +1,8 @@
 from configparser import SectionProxy
 from typing import Callable, Dict
 
-from ..authenticator import Authenticator, AuthSection
 from ..config import Config
+from .authenticator import Authenticator, AuthSection
 from .simple import SimpleAuthenticator, SimpleAuthSection
 from .tfa import TfaAuthenticator
 
diff --git a/PFERD/authenticator.py b/PFERD/auth/authenticator.py
similarity index 98%
rename from PFERD/authenticator.py
rename to PFERD/auth/authenticator.py
index d67b263..9217dcd 100644
--- a/PFERD/authenticator.py
+++ b/PFERD/auth/authenticator.py
@@ -1,7 +1,7 @@
 from abc import ABC, abstractmethod
 from typing import Tuple
 
-from .config import Config, Section
+from ..config import Config, Section
 
 
 class AuthLoadException(Exception):
diff --git a/PFERD/authenticators/simple.py b/PFERD/auth/simple.py
similarity index 96%
rename from PFERD/authenticators/simple.py
rename to PFERD/auth/simple.py
index bcbe69c..a12c359 100644
--- a/PFERD/authenticators/simple.py
+++ b/PFERD/auth/simple.py
@@ -1,9 +1,9 @@
 from typing import Optional, Tuple
 
-from ..authenticator import Authenticator, AuthException, AuthSection
 from ..config import Config
 from ..logging import log
 from ..utils import agetpass, ainput
+from .authenticator import Authenticator, AuthException, AuthSection
 
 
 class SimpleAuthSection(AuthSection):
diff --git a/PFERD/authenticators/tfa.py b/PFERD/auth/tfa.py
similarity index 93%
rename from PFERD/authenticators/tfa.py
rename to PFERD/auth/tfa.py
index b0eef18..670626d 100644
--- a/PFERD/authenticators/tfa.py
+++ b/PFERD/auth/tfa.py
@@ -1,9 +1,9 @@
 from typing import Tuple
 
-from ..authenticator import Authenticator, AuthException, AuthSection
 from ..config import Config
 from ..logging import log
 from ..utils import ainput
+from .authenticator import Authenticator, AuthException, AuthSection
 
 
 class TfaAuthenticator(Authenticator):
diff --git a/PFERD/crawlers/__init__.py b/PFERD/crawl/__init__.py
similarity index 82%
rename from PFERD/crawlers/__init__.py
rename to PFERD/crawl/__init__.py
index dc7dfa0..297c490 100644
--- a/PFERD/crawlers/__init__.py
+++ b/PFERD/crawl/__init__.py
@@ -1,11 +1,11 @@
 from configparser import SectionProxy
 from typing import Callable, Dict
 
-from ..authenticator import Authenticator
+from ..auth import Authenticator
 from ..config import Config
-from ..crawler import Crawler
+from .crawler import Crawler, CrawlError  # noqa: F401
 from .ilias import KitIliasWebCrawler, KitIliasWebCrawlerSection
-from .local import LocalCrawler, LocalCrawlerSection
+from .local_crawler import LocalCrawler, LocalCrawlerSection
 
 CrawlerConstructor = Callable[[
     str,                       # Name (without the "crawl:" prefix)
diff --git a/PFERD/crawler.py b/PFERD/crawl/crawler.py
similarity index 96%
rename from PFERD/crawler.py
rename to PFERD/crawl/crawler.py
index e73ce72..a79e968 100644
--- a/PFERD/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -6,14 +6,14 @@ from typing import Any, Awaitable, Callable, Dict, List, Optional, Sequence, Tup
 
 from rich.markup import escape
 
-from .authenticator import Authenticator
-from .config import Config, Section
-from .limiter import Limiter
-from .logging import ProgressBar, log
-from .output_dir import FileSink, FileSinkToken, OnConflict, OutputDirectory, OutputDirError, Redownload
-from .report import MarkConflictError, MarkDuplicateError
-from .transformer import Transformer
-from .utils import ReusableAsyncContextManager, fmt_path
+from ..auth import Authenticator
+from ..config import Config, Section
+from ..limiter import Limiter
+from ..logging import ProgressBar, log
+from ..output_dir import FileSink, FileSinkToken, OnConflict, OutputDirectory, OutputDirError, Redownload
+from ..report import MarkConflictError, MarkDuplicateError
+from ..transformer import Transformer
+from ..utils import ReusableAsyncContextManager, fmt_path
 
 
 class CrawlWarning(Exception):
diff --git a/PFERD/http_crawler.py b/PFERD/crawl/http_crawler.py
similarity index 97%
rename from PFERD/http_crawler.py
rename to PFERD/crawl/http_crawler.py
index c6e679d..e82dfed 100644
--- a/PFERD/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -4,11 +4,11 @@ from typing import Optional
 
 import aiohttp
 
-from .config import Config
+from ..config import Config
+from ..logging import log
+from ..utils import fmt_real_path
+from ..version import NAME, VERSION
 from .crawler import Crawler, CrawlerSection
-from .logging import log
-from .utils import fmt_real_path
-from .version import NAME, VERSION
 
 
 class HttpCrawler(Crawler):
diff --git a/PFERD/crawlers/ilias/__init__.py b/PFERD/crawl/ilias/__init__.py
similarity index 100%
rename from PFERD/crawlers/ilias/__init__.py
rename to PFERD/crawl/ilias/__init__.py
diff --git a/PFERD/crawlers/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
similarity index 100%
rename from PFERD/crawlers/ilias/file_templates.py
rename to PFERD/crawl/ilias/file_templates.py
diff --git a/PFERD/crawlers/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
similarity index 100%
rename from PFERD/crawlers/ilias/kit_ilias_html.py
rename to PFERD/crawl/ilias/kit_ilias_html.py
diff --git a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
similarity index 98%
rename from PFERD/crawlers/ilias/kit_ilias_web_crawler.py
rename to PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 7e1562c..33356ed 100644
--- a/PFERD/crawlers/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -6,15 +6,13 @@ import aiohttp
 from aiohttp import hdrs
 from bs4 import BeautifulSoup, Tag
 
-from PFERD.authenticators import Authenticator
-from PFERD.config import Config
-from PFERD.crawler import CrawlError, CrawlerSection, CrawlWarning, anoncritical
-from PFERD.http_crawler import HttpCrawler
-from PFERD.logging import ProgressBar, log
-from PFERD.output_dir import FileSink, Redownload
-from PFERD.utils import soupify, url_set_query_param
-
-from ...utils import fmt_path
+from ...auth import Authenticator
+from ...config import Config
+from ...logging import ProgressBar, log
+from ...output_dir import FileSink, Redownload
+from ...utils import fmt_path, soupify, url_set_query_param
+from ..crawler import CrawlError, CrawlerSection, CrawlWarning, anoncritical
+from ..http_crawler import HttpCrawler
 from .file_templates import link_template_plain, link_template_rich
 from .kit_ilias_html import IliasElementType, IliasPage, IliasPageElement
 
diff --git a/PFERD/crawlers/local.py b/PFERD/crawl/local_crawler.py
similarity index 98%
rename from PFERD/crawlers/local.py
rename to PFERD/crawl/local_crawler.py
index 35e5829..7958169 100644
--- a/PFERD/crawlers/local.py
+++ b/PFERD/crawl/local_crawler.py
@@ -5,7 +5,7 @@ from pathlib import Path, PurePath
 from typing import Optional
 
 from ..config import Config
-from ..crawler import Crawler, CrawlerSection, anoncritical
+from .crawler import Crawler, CrawlerSection, anoncritical
 
 
 class LocalCrawlerSection(CrawlerSection):
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index 75b0e9d..5b5b866 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -2,11 +2,9 @@ from typing import Dict, List, Optional
 
 from rich.markup import escape
 
-from .authenticator import Authenticator
-from .authenticators import AUTHENTICATORS
+from .auth import AUTHENTICATORS, Authenticator
 from .config import Config, ConfigOptionError
-from .crawler import Crawler, CrawlError
-from .crawlers import CRAWLERS
+from .crawl import CRAWLERS, Crawler, CrawlError
 from .logging import log
 
 

From 6e9f8fd39107ce2ca0a11b5dd9f08b9d7dfa7cf2 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 23 May 2021 19:44:12 +0200
Subject: [PATCH 216/524] Add a keyring authenticator

---
 CONFIG.md                           |  9 +++++
 PFERD/auth/__init__.py              |  3 ++
 PFERD/auth/keyring_authenticator.py | 56 +++++++++++++++++++++++++++++
 setup.cfg                           |  1 +
 4 files changed, 69 insertions(+)
 create mode 100644 PFERD/auth/keyring_authenticator.py

diff --git a/CONFIG.md b/CONFIG.md
index e92858f..bd3baca 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -154,6 +154,15 @@ This authenticator prompts the user on the console for a two-factor
 authentication token. The token is provided as password and it is not cached.
 This authenticator does not support usernames.
 
+### The `keyring` authenticator
+
+This authenticator uses the system keyring to store passwords. It expects a 
+username in the config and will prompt *once* for the password. After that it
+receives the password from the system keyring.
+
+- `username`: The username. (Required)
+- `keyring_name`: The service name PFERD uses for storing credentials. (Optional)
+
 ## Transformation rules
 
 Transformation rules are rules for renaming and excluding files and directories.
diff --git a/PFERD/auth/__init__.py b/PFERD/auth/__init__.py
index 6247e2b..81ec31d 100644
--- a/PFERD/auth/__init__.py
+++ b/PFERD/auth/__init__.py
@@ -3,6 +3,7 @@ from typing import Callable, Dict
 
 from ..config import Config
 from .authenticator import Authenticator, AuthSection
+from .keyring_authenticator import KeyringAuthenticator, KeyringAuthSection
 from .simple import SimpleAuthenticator, SimpleAuthSection
 from .tfa import TfaAuthenticator
 
@@ -17,4 +18,6 @@ AUTHENTICATORS: Dict[str, AuthConstructor] = {
         SimpleAuthenticator(n, SimpleAuthSection(s), c),
     "tfa": lambda n, s, c:
         TfaAuthenticator(n, AuthSection(s), c),
+    "keyring": lambda n, s, c:
+        KeyringAuthenticator(n, KeyringAuthSection(s), c)
 }
diff --git a/PFERD/auth/keyring_authenticator.py b/PFERD/auth/keyring_authenticator.py
new file mode 100644
index 0000000..413c7ad
--- /dev/null
+++ b/PFERD/auth/keyring_authenticator.py
@@ -0,0 +1,56 @@
+from typing import Optional, Tuple
+
+import keyring
+
+from ..config import Config
+from ..logging import log
+from ..utils import agetpass
+from ..version import NAME
+from .authenticator import Authenticator, AuthException, AuthSection
+
+
+class KeyringAuthSection(AuthSection):
+    def username(self) -> str:
+        name = self.s.get("username")
+        if name is None:
+            self.missing_value("username")
+        return name
+
+    def keyring_name(self) -> str:
+        return self.s.get("keyring_name", fallback=NAME)
+
+
+class KeyringAuthenticator(Authenticator):
+
+    def __init__(
+            self,
+            name: str,
+            section: KeyringAuthSection,
+            config: Config,
+    ) -> None:
+        super().__init__(name, section, config)
+
+        self._username = section.username()
+        self._password: Optional[str] = None
+        self._keyring_name = section.keyring_name()
+
+    async def credentials(self) -> Tuple[str, str]:
+        if self._password is not None:
+            return self._username, self._password
+
+        password = keyring.get_password(self._keyring_name, self._username)
+
+        if not password:
+            async with log.exclusive_output():
+                password = await agetpass("Password: ")
+                keyring.set_password(self._keyring_name, self._username, password)
+
+        self._password = password
+
+        return self._username, password
+
+    def invalidate_credentials(self) -> None:
+        self.invalidate_password()
+
+    def invalidate_password(self) -> None:
+        raise AuthException("Invalid password")
diff --git a/setup.cfg b/setup.cfg
index 431c3b9..1cbfc6a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -9,6 +9,7 @@ install_requires =
   aiohttp>=3.7.4.post0
   beautifulsoup4>=4.9.3
   rich>=10.1.0
+  keyring>=23.0.1
 
 [options.entry_points]
 console_scripts =

From 6ca0ecdf05b85c1986de50724443aaabb5755506 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 23 May 2021 20:46:12 +0200
Subject: [PATCH 217/524] Load and store reports

---
 PFERD/crawl/crawler.py | 12 +++++++-
 PFERD/output_dir.py    | 35 ++++++++++++++++++++-
 PFERD/report.py        | 69 +++++++++++++++++++++++++++++++++++++++---
 3 files changed, 110 insertions(+), 6 deletions(-)

diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index a79e968..60ea708 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -11,7 +11,7 @@ from ..config import Config, Section
 from ..limiter import Limiter
 from ..logging import ProgressBar, log
 from ..output_dir import FileSink, FileSinkToken, OnConflict, OutputDirectory, OutputDirError, Redownload
-from ..report import MarkConflictError, MarkDuplicateError
+from ..report import MarkConflictError, MarkDuplicateError, Report
 from ..transformer import Transformer
 from ..utils import ReusableAsyncContextManager, fmt_path
 
@@ -229,6 +229,14 @@ class Crawler(ABC):
             section.on_conflict(),
         )
 
+    @property
+    def report(self) -> Report:
+        return self._output_dir.report
+
+    @property
+    def prev_report(self) -> Optional[Report]:
+        return self._output_dir.prev_report
+
     @staticmethod
     async def gather(awaitables: Sequence[Awaitable[Any]]) -> List[Any]:
         """
@@ -298,8 +306,10 @@ class Crawler(ABC):
 
         with log.show_progress():
             self._output_dir.prepare()
+            self._output_dir.load_prev_report()
             await self._run()
             await self._cleanup()
+            self._output_dir.store_report()
 
     @abstractmethod
     async def _run(self) -> None:
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 06cfe6b..d60a312 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -1,4 +1,5 @@
 import filecmp
+import json
 import os
 import random
 import shutil
@@ -13,7 +14,7 @@ from typing import BinaryIO, Iterator, Optional, Tuple
 from rich.markup import escape
 
 from .logging import log
-from .report import Report
+from .report import Report, ReportLoadError
 from .utils import ReusableAsyncContextManager, fmt_path, fmt_real_path, prompt_yes_no
 
 SUFFIX_CHARS = string.ascii_lowercase + string.digits
@@ -134,6 +135,8 @@ class FileSinkToken(ReusableAsyncContextManager[FileSink]):
 
 
 class OutputDirectory:
+    REPORT_FILE = PurePath(".report")
+
     def __init__(
             self,
             root: Path,
@@ -144,7 +147,19 @@ class OutputDirectory:
         self._redownload = redownload
         self._on_conflict = on_conflict
 
+        self._report_path = self.resolve(self.REPORT_FILE)
         self._report = Report()
+        self._prev_report: Optional[Report] = None
+
+        self.register_reserved(self.REPORT_FILE)
+
+    @property
+    def report(self) -> Report:
+        return self._report
+
+    @property
+    def prev_report(self) -> Optional[Report]:
+        return self._prev_report
 
     def prepare(self) -> None:
         log.explain_topic(f"Creating base directory at {fmt_real_path(self._root)}")
@@ -452,3 +467,21 @@ class OutputDirectory:
                 self._report.delete_file(pure)
             except OSError:
                 pass
+
+    def load_prev_report(self) -> None:
+        log.explain_topic(f"Loading previous report from {fmt_real_path(self._report_path)}")
+        try:
+            self._prev_report = Report.load(self._report_path)
+            log.explain("Loaded report successfully")
+        except (OSError, json.JSONDecodeError, ReportLoadError) as e:
+            log.explain("Failed to load report")
+            log.explain(str(e))
+
+    def store_report(self) -> None:
+        log.explain_topic(f"Storing report to {fmt_real_path(self._report_path)}")
+        try:
+            self._report.store(self._report_path)
+            log.explain("Stored report successfully")
+        except OSError as e:
+            log.warn(f"Failed to save report to {fmt_real_path(self._report_path)}")
+            log.warn_contd(str(e))
diff --git a/PFERD/report.py b/PFERD/report.py
index 7d8aa85..619c621 100644
--- a/PFERD/report.py
+++ b/PFERD/report.py
@@ -1,5 +1,10 @@
-from pathlib import PurePath
-from typing import Set
+import json
+from pathlib import Path, PurePath
+from typing import Any, Dict, List, Set
+
+
+class ReportLoadError(Exception):
+    pass
 
 
 class MarkDuplicateError(Exception):
@@ -48,10 +53,66 @@ class Report:
         self.reserved_files: Set[PurePath] = set()
         self.known_files: Set[PurePath] = set()
 
-        self.new_files: Set[PurePath] = set()
+        self.added_files: Set[PurePath] = set()
         self.changed_files: Set[PurePath] = set()
         self.deleted_files: Set[PurePath] = set()
 
+    @staticmethod
+    def _get_list_of_strs(data: Dict[str, Any], key: str) -> List[str]:
+        result: Any = data.get(key, [])
+
+        if not isinstance(result, list):
+            raise ReportLoadError(f"Incorrect format: {key!r} is not a list")
+
+        for elem in result:
+            if not isinstance(elem, str):
+                raise ReportLoadError(f"Incorrect format: {key!r} must contain only strings")
+
+        return result
+
+    @classmethod
+    def load(cls, path: Path) -> "Report":
+        """
+        May raise OSError, JsonDecodeError, ReportLoadError.
+        """
+
+        with open(path) as f:
+            data = json.load(f)
+
+        if not isinstance(data, dict):
+            raise ReportLoadError("Incorrect format: Root is not an object")
+
+        self = cls()
+        for elem in self._get_list_of_strs(data, "reserved"):
+            self.mark_reserved(PurePath(elem))
+        for elem in self._get_list_of_strs(data, "known"):
+            self.mark(PurePath(elem))
+        for elem in self._get_list_of_strs(data, "added"):
+            self.add_file(PurePath(elem))
+        for elem in self._get_list_of_strs(data, "changed"):
+            self.change_file(PurePath(elem))
+        for elem in self._get_list_of_strs(data, "deleted"):
+            self.delete_file(PurePath(elem))
+
+        return self
+
+    def store(self, path: Path) -> None:
+        """
+        May raise OSError.
+        """
+
+        data = {
+            "reserved": [str(path) for path in sorted(self.reserved_files)],
+            "known": [str(path) for path in sorted(self.known_files)],
+            "added": [str(path) for path in sorted(self.added_files)],
+            "changed": [str(path) for path in sorted(self.changed_files)],
+            "deleted": [str(path) for path in sorted(self.deleted_files)],
+        }
+
+        with open(path, "w") as f:
+            json.dump(data, f, indent=2, sort_keys=True)
+            f.write("\n")  # json.dump doesn't do this
+
     def mark_reserved(self, path: PurePath) -> None:
         self.reserved_files.add(path)
 
@@ -84,7 +145,7 @@ class Report:
         Unlike mark(), this function accepts any paths.
         """
 
-        self.new_files.add(path)
+        self.added_files.add(path)
 
     def change_file(self, path: PurePath) -> None:
         """

From 9cce78669fc3fe418dde29b643804c7e38755af7 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 23 May 2021 21:13:06 +0200
Subject: [PATCH 218/524] Print report after all crawlers have finished

---
 PFERD/pferd.py | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index 5b5b866..2b9921e 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -6,6 +6,7 @@ from .auth import AUTHENTICATORS, Authenticator
 from .config import Config, ConfigOptionError
 from .crawl import CRAWLERS, Crawler, CrawlError
 from .logging import log
+from .utils import fmt_path
 
 
 class PferdLoadError(Exception):
@@ -97,10 +98,11 @@ class Pferd:
         # obtain the correct event loop.
         self._load_authenticators()
         loaded_crawlers = self._load_crawlers()
+        names = self._find_crawlers_to_run(loaded_crawlers)
 
         log.print("")
 
-        for name in self._find_crawlers_to_run(loaded_crawlers):
+        for name in names:
             crawler = self._crawlers[name]
 
             log.print(f"[bold bright_cyan]Running[/] {escape(name)}")
@@ -111,3 +113,23 @@ class Pferd:
                 log.error(str(e))
             except Exception:
                 log.unexpected_exception()
+
+        for name in names:
+            crawler = self._crawlers[name]
+
+            log.report("")
+            log.report(f"[bold bright_cyan]Report[/] for {escape(name)}")
+
+            something_happened = False
+            for path in sorted(crawler.report.added_files):
+                something_happened = True
+                log.report(f"  [bold bright_green]Added[/] {fmt_path(path)}")
+            for path in sorted(crawler.report.changed_files):
+                something_happened = True
+                log.report(f"  [bold bright_yellow]Changed[/] {fmt_path(path)}")
+            for path in sorted(crawler.report.deleted_files):
+                something_happened = True
+                log.report(f"  [bold bright_magenta]Deleted[/] {fmt_path(path)}")
+
+            if not something_happened:
+                log.report("  Nothing happened")

From ce1dbda5b4144884db48aa9041db32a162b56e1c Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 23 May 2021 21:27:37 +0200
Subject: [PATCH 219/524] Overhaul colours

"Crawled" and "Downloaded" are now printed less bright than "Crawling" and
"Downloading" as they're not as important. Explain topics are printed in yellow
to stand out a bit more from the cyan action messages.
---
 PFERD/crawl/crawler.py | 2 +-
 PFERD/logging.py       | 2 +-
 PFERD/output_dir.py    | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index 60ea708..cefefad 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -99,7 +99,7 @@ class CrawlToken(ReusableAsyncContextManager[ProgressBar]):
 
     async def _on_aenter(self) -> ProgressBar:
         bar_desc = f"[bold bright_cyan]Crawling[/] {escape(fmt_path(self._path))}"
-        after_desc = f"[bold bright_cyan]Crawled[/] {escape(fmt_path(self._path))}"
+        after_desc = f"[bold cyan]Crawled[/] {escape(fmt_path(self._path))}"
 
         self._stack.callback(lambda: log.action(after_desc))
         await self._stack.enter_async_context(self._limiter.limit_crawl())
diff --git a/PFERD/logging.py b/PFERD/logging.py
index e97a3fa..fc13e13 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -181,7 +181,7 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
         """
 
         if self.output_explain:
-            self.print(f"[cyan]{escape(text)}")
+            self.print(f"[yellow]{escape(text)}")
 
     def explain(self, text: str) -> None:
         """
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index d60a312..cd68ead 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -399,7 +399,7 @@ class OutputDirectory:
 
     async def _after_download(self, info: DownloadInfo) -> None:
         with self._ensure_deleted(info.tmp_path):
-            log.action(f"[bold bright_cyan]Downloaded[/] {fmt_path(info.remote_path)}")
+            log.action(f"[bold cyan]Downloaded[/] {fmt_path(info.remote_path)}")
             log.explain_topic(f"Processing downloaded file for {fmt_path(info.path)}")
 
             changed = False

From 0ca06801654acaeb3c99440ce7073c9fff7395c1 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 23 May 2021 21:40:48 +0200
Subject: [PATCH 220/524] Simplify --version

---
 PFERD/__main__.py   | 5 -----
 PFERD/cli/parser.py | 5 +++--
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index 9bc2974..8cb6a11 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -8,7 +8,6 @@ from .config import Config, ConfigDumpError, ConfigLoadError, ConfigOptionError
 from .logging import log
 from .pferd import Pferd, PferdLoadError
 from .transformer import RuleParseError
-from .version import NAME, VERSION
 
 
 def load_config_parser(args: argparse.Namespace) -> configparser.ConfigParser:
@@ -81,10 +80,6 @@ def dump_config(args: argparse.Namespace, config: Config) -> None:
 def main() -> None:
     args = PARSER.parse_args()
 
-    if args.version:
-        print(f"{NAME} {VERSION}")
-        exit()
-
     # Configuring logging happens in two stages because CLI args have
     # precedence over config file options and loading the config already
     # produces some kinds of log messages (usually only explain()-s).
diff --git a/PFERD/cli/parser.py b/PFERD/cli/parser.py
index bd62b6e..af5c340 100644
--- a/PFERD/cli/parser.py
+++ b/PFERD/cli/parser.py
@@ -3,6 +3,7 @@ import configparser
 from pathlib import Path
 
 from ..output_dir import OnConflict, Redownload
+from ..version import NAME, VERSION
 
 CRAWLER_PARSER = argparse.ArgumentParser(add_help=False)
 CRAWLER_PARSER_GROUP = CRAWLER_PARSER.add_argument_group(
@@ -70,8 +71,8 @@ PARSER = argparse.ArgumentParser()
 PARSER.set_defaults(command=None)
 PARSER.add_argument(
     "--version",
-    action="store_true",
-    help="print version and exit"
+    action="version",
+    version=f"{NAME} {VERSION}",
 )
 PARSER.add_argument(
     "--config", "-c",

From 1f400d59643dc8010b2828e9c6e11d29be673423 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 23 May 2021 22:26:41 +0200
Subject: [PATCH 221/524] Implement BooleanOptionalAction

---
 PFERD/cli/parser.py | 58 +++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 54 insertions(+), 4 deletions(-)

diff --git a/PFERD/cli/parser.py b/PFERD/cli/parser.py
index af5c340..66c5959 100644
--- a/PFERD/cli/parser.py
+++ b/PFERD/cli/parser.py
@@ -1,10 +1,62 @@
 import argparse
 import configparser
 from pathlib import Path
+from typing import Any, List, Optional, Sequence, Union
 
 from ..output_dir import OnConflict, Redownload
 from ..version import NAME, VERSION
 
+
+# TODO Replace with argparse version when updating to 3.9?
+class BooleanOptionalAction(argparse.Action):
+    def __init__(
+            self,
+            option_strings: List[str],
+            dest: Any,
+            default: Any = None,
+            type: Any = None,
+            choices: Any = None,
+            required: Any = False,
+            help: Any = None,
+            metavar: Any = None,
+    ):
+        if len(option_strings) != 1:
+            raise ValueError("There must be exactly one option string")
+        [self.name] = option_strings
+        if not self.name.startswith("--"):
+            raise ValueError(f"{self.name!r} doesn't start with '--'")
+        if self.name.startswith("--no-"):
+            raise ValueError(f"{self.name!r} starts with '--no-'")
+
+        options = [self.name, "--no-" + self.name[2:]]
+
+        super().__init__(
+            options,
+            dest,
+            nargs=0,
+            default=default,
+            type=type,
+            choices=choices,
+            required=required,
+            help=help,
+            metavar=metavar,
+        )
+
+    def __call__(
+            self,
+            parser: argparse.ArgumentParser,
+            namespace: argparse.Namespace,
+            values: Union[str, Sequence[Any], None],
+            option_string: Optional[str] = None,
+    ) -> None:
+        if option_string and option_string in self.option_strings:
+            value = not option_string.startswith("--no-")
+            setattr(namespace, self.dest, value)
+
+    def format_usage(self) -> str:
+        return "--[no-]" + self.name[2:]
+
+
 CRAWLER_PARSER = argparse.ArgumentParser(add_help=False)
 CRAWLER_PARSER_GROUP = CRAWLER_PARSER.add_argument_group(
     title="general crawler arguments",
@@ -103,10 +155,8 @@ PARSER.add_argument(
     help="custom working directory"
 )
 PARSER.add_argument(
-    "--explain", "-e",
-    # TODO Use argparse.BooleanOptionalAction after updating to 3.9
-    action="store_const",
-    const=True,
+    "--explain",
+    action=BooleanOptionalAction,
     help="log and explain in detail what PFERD is doing"
 )
 

From 27b5a8e4905746c365ea8d7fa076804a4440410b Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 23 May 2021 22:39:07 +0200
Subject: [PATCH 222/524] Rename log.action to log.status

---
 PFERD/crawl/crawler.py | 2 +-
 PFERD/logging.py       | 6 +++---
 PFERD/output_dir.py    | 8 ++++----
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index cefefad..d6d4abc 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -101,7 +101,7 @@ class CrawlToken(ReusableAsyncContextManager[ProgressBar]):
         bar_desc = f"[bold bright_cyan]Crawling[/] {escape(fmt_path(self._path))}"
         after_desc = f"[bold cyan]Crawled[/] {escape(fmt_path(self._path))}"
 
-        self._stack.callback(lambda: log.action(after_desc))
+        self._stack.callback(lambda: log.status(after_desc))
         await self._stack.enter_async_context(self._limiter.limit_crawl())
         bar = self._stack.enter_context(log.crawl_bar(bar_desc))
 
diff --git a/PFERD/logging.py b/PFERD/logging.py
index fc13e13..5025d88 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -55,7 +55,7 @@ class Log:
 
         # Whether different parts of the output are enabled or disabled
         self.output_explain = False
-        self.output_action = True
+        self.output_status = True
         self.output_report = True
 
     def _update_live(self) -> None:
@@ -191,12 +191,12 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
         if self.output_explain:
             self.print(f"  {escape(text)}")
 
-    def action(self, text: str) -> None:
+    def status(self, text: str) -> None:
         """
         Print a status update while crawling. Allows markup.
         """
 
-        if self.output_action:
+        if self.output_status:
             self.print(text)
 
     def report(self, text: str) -> None:
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index cd68ead..17cb772 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -399,7 +399,7 @@ class OutputDirectory:
 
     async def _after_download(self, info: DownloadInfo) -> None:
         with self._ensure_deleted(info.tmp_path):
-            log.action(f"[bold cyan]Downloaded[/] {fmt_path(info.remote_path)}")
+            log.status(f"[bold cyan]Downloaded[/] {fmt_path(info.remote_path)}")
             log.explain_topic(f"Processing downloaded file for {fmt_path(info.path)}")
 
             changed = False
@@ -430,10 +430,10 @@ class OutputDirectory:
             self._update_metadata(info)
 
             if changed:
-                log.action(f"[bold bright_yellow]Changed[/] {escape(fmt_path(info.path))}")
+                log.status(f"[bold bright_yellow]Changed[/] {escape(fmt_path(info.path))}")
                 self._report.change_file(info.path)
             else:
-                log.action(f"[bold bright_green]Added[/] {escape(fmt_path(info.path))}")
+                log.status(f"[bold bright_green]Added[/] {escape(fmt_path(info.path))}")
                 self._report.add_file(info.path)
 
     async def cleanup(self) -> None:
@@ -463,7 +463,7 @@ class OutputDirectory:
         if await self._conflict_delete_lf(self._on_conflict, pure):
             try:
                 path.unlink()
-                log.action(f"[bold bright_magenta]Deleted[/] {escape(fmt_path(pure))}")
+                log.status(f"[bold bright_magenta]Deleted[/] {escape(fmt_path(pure))}")
                 self._report.delete_file(pure)
             except OSError:
                 pass

From edbd92dbbf5a12a04e7d675b2df3cc1c1eba89ca Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 23 May 2021 22:41:59 +0200
Subject: [PATCH 223/524] Add --status and --report flags

---
 PFERD/__main__.py   | 10 ++++++++++
 PFERD/cli/parser.py | 10 ++++++++++
 PFERD/config.py     |  6 ++++++
 3 files changed, 26 insertions(+)

diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index 8cb6a11..0fbce59 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -39,11 +39,17 @@ def load_config(args: argparse.Namespace) -> Config:
 def configure_logging_from_args(args: argparse.Namespace) -> None:
     if args.explain is not None:
         log.output_explain = args.explain
+    if args.status is not None:
+        log.output_status = args.status
+    if args.report is not None:
+        log.output_report = args.report
 
     # We want to prevent any unnecessary output if we're printing the config to
     # stdout, otherwise it would not be a valid config file.
     if args.dump_config == "-":
         log.output_explain = False
+        log.output_status = False
+        log.output_report = False
 
 
 def configure_logging_from_config(args: argparse.Namespace, config: Config) -> None:
@@ -56,6 +62,10 @@ def configure_logging_from_config(args: argparse.Namespace, config: Config) -> N
     try:
         if args.explain is None:
             log.output_explain = config.default_section.explain()
+        if args.status is None:
+            log.output_status = config.default_section.status()
+        if args.report is None:
+            log.output_report = config.default_section.report()
     except ConfigOptionError as e:
         log.error(str(e))
         exit(1)
diff --git a/PFERD/cli/parser.py b/PFERD/cli/parser.py
index 66c5959..346070f 100644
--- a/PFERD/cli/parser.py
+++ b/PFERD/cli/parser.py
@@ -159,6 +159,16 @@ PARSER.add_argument(
     action=BooleanOptionalAction,
     help="log and explain in detail what PFERD is doing"
 )
+PARSER.add_argument(
+    "--status",
+    action=BooleanOptionalAction,
+    help="print status updates while PFERD is crawling"
+)
+PARSER.add_argument(
+    "--report",
+    action=BooleanOptionalAction,
+    help="print a report of all local changes before exiting"
+)
 
 
 def load_default_section(
diff --git a/PFERD/config.py b/PFERD/config.py
index 3c69fc7..0c99683 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -75,6 +75,12 @@ class DefaultSection(Section):
     def explain(self) -> bool:
         return self.s.getboolean("explain", fallback=False)
 
+    def status(self) -> bool:
+        return self.s.getboolean("status", fallback=True)
+
+    def report(self) -> bool:
+        return self.s.getboolean("report", fallback=True)
+
 
 class Config:
     @staticmethod

From 79be6e1dc541c660b0f39eb23b337baf718ddf06 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 23 May 2021 22:49:09 +0200
Subject: [PATCH 224/524] Switch some other options to BooleanOptionalAction

---
 PFERD/cli/command_kit_ilias_web.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/PFERD/cli/command_kit_ilias_web.py b/PFERD/cli/command_kit_ilias_web.py
index c743a51..e98f192 100644
--- a/PFERD/cli/command_kit_ilias_web.py
+++ b/PFERD/cli/command_kit_ilias_web.py
@@ -2,7 +2,7 @@ import argparse
 import configparser
 from pathlib import Path
 
-from .parser import CRAWLER_PARSER, SUBPARSERS, load_crawler
+from .parser import CRAWLER_PARSER, SUBPARSERS, BooleanOptionalAction, load_crawler
 
 SUBPARSER = SUBPARSERS.add_parser(
     "kit-ilias-web",
@@ -27,9 +27,7 @@ GROUP.add_argument(
 )
 GROUP.add_argument(
     "--videos",
-    # TODO Use argparse.BooleanOptionalAction after updating to 3.9
-    action="store_const",
-    const=True,
+    action=BooleanOptionalAction,
     help="crawl and download videos"
 )
 GROUP.add_argument(
@@ -46,9 +44,7 @@ GROUP.add_argument(
 )
 GROUP.add_argument(
     "--link-file-plaintext",
-    # TODO Use argparse.BooleanOptionalAction after updating to 3.9
-    action="store_const",
-    const=True,
+    action=BooleanOptionalAction,
     help="use plain text files for external links"
 )
 

From be4b1040f8a66292408154e05425cf17b1da8003 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 23 May 2021 22:51:42 +0200
Subject: [PATCH 225/524] Document status and report options

---
 CONFIG.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CONFIG.md b/CONFIG.md
index bd3baca..b976b7d 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -21,6 +21,10 @@ default values for the other sections.
   expanded to the current user's home directory. (Default: `.`)
 - `explain`: Whether PFERD should log and explain its actions and decisions in
   detail. (Default: `no`)
+- `status`: Whether PFERD should print status updates while crawling. (Default:
+   `yes`)
+- `report`: Whether PFERD should print a report of added, changed and deleted
+   local files for all crawlers before exiting. (Default: `yes`)
 
 ## The `crawl:*` sections
 

From 8dd06894207789f763374a128584af13a82dee49 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 23 May 2021 23:04:18 +0200
Subject: [PATCH 226/524] Add keyring authentication to ILIAS CLI

---
 PFERD/cli/command_kit_ilias_web.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/PFERD/cli/command_kit_ilias_web.py b/PFERD/cli/command_kit_ilias_web.py
index e98f192..e47bc77 100644
--- a/PFERD/cli/command_kit_ilias_web.py
+++ b/PFERD/cli/command_kit_ilias_web.py
@@ -36,6 +36,11 @@ GROUP.add_argument(
     metavar="USER_NAME",
     help="user name for authentication"
 )
+GROUP.add_argument(
+    "--keyring",
+    action=BooleanOptionalAction,
+    help="use the system keyring to store and retrieve passwords"
+)
 GROUP.add_argument(
     "--link-file-redirect-delay",
     type=int,
@@ -70,7 +75,11 @@ def load(
 
     parser["auth:kit-ilias-web"] = {}
     auth_section = parser["auth:kit-ilias-web"]
-    auth_section["type"] = "simple"
+
+    if args.keyring:
+        auth_section["type"] = "keyring"
+    else:
+        auth_section["type"] = "simple"
 
     if args.username is not None:
         auth_section["username"] = str(args.username)

From 3ab3581f849ae5ee223c434752dfeffdf30884a9 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 23 May 2021 23:40:28 +0200
Subject: [PATCH 227/524] Add timeout for HTTP connection

---
 CONFIG.md                                  |  4 +++-
 PFERD/cli/command_kit_ilias_web.py         |  8 ++++++++
 PFERD/crawl/http_crawler.py                | 10 +++++++++-
 PFERD/crawl/ilias/kit_ilias_web_crawler.py |  9 ++++++---
 4 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index b976b7d..dcc7421 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -140,7 +140,9 @@ This crawler crawls the KIT ILIAS instance. It performs remote calls to a poor S
 - `link_file_plain_text`: If this is set to true, PFERD will generate plain-text files containing only the link
    target for external links. If this is false or not specified, PFERD will generate a neat, pretty and functional 
    HTML page instead.
-- `no-videos`: If this is set to true, PFERD will not crawl or download any videos.
+- `videos`: If this is set to false, PFERD will not crawl or download any videos.
+- `http_timeout`: The timeout for http requests
+
 ## Authenticator types
 
 ### The `simple` authenticator
diff --git a/PFERD/cli/command_kit_ilias_web.py b/PFERD/cli/command_kit_ilias_web.py
index e47bc77..89da390 100644
--- a/PFERD/cli/command_kit_ilias_web.py
+++ b/PFERD/cli/command_kit_ilias_web.py
@@ -52,6 +52,12 @@ GROUP.add_argument(
     action=BooleanOptionalAction,
     help="use plain text files for external links"
 )
+GROUP.add_argument(
+    "--http-timeout",
+    type=float,
+    metavar="SECONDS",
+    help="the timeout to use for HTTP requests"
+)
 
 
 def load(
@@ -72,6 +78,8 @@ def load(
         section["link_file_plaintext"] = str(args.link_file_plaintext)
     if args.videos is not None:
         section["videos"] = str(False)
+    if args.http_timeout is not None:
+        section["http_timeout"] = str(args.http_timeout)
 
     parser["auth:kit-ilias-web"] = {}
     auth_section = parser["auth:kit-ilias-web"]
diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index e82dfed..8cd6afe 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -3,6 +3,7 @@ from pathlib import PurePath
 from typing import Optional
 
 import aiohttp
+from aiohttp.client import ClientTimeout
 
 from ..config import Config
 from ..logging import log
@@ -11,13 +12,18 @@ from ..version import NAME, VERSION
 from .crawler import Crawler, CrawlerSection
 
 
+class HttpCrawlerSection(CrawlerSection):
+    def http_timeout(self) -> float:
+        return self.s.getfloat("http_timeout", fallback=20)
+
+
 class HttpCrawler(Crawler):
     COOKIE_FILE = PurePath(".cookies")
 
     def __init__(
             self,
             name: str,
-            section: CrawlerSection,
+            section: HttpCrawlerSection,
             config: Config,
     ) -> None:
         super().__init__(name, section, config)
@@ -28,6 +34,7 @@ class HttpCrawler(Crawler):
         self._authentication_lock = asyncio.Lock()
         self._current_cookie_jar: Optional[aiohttp.CookieJar] = None
         self._request_count = 0
+        self._http_timeout = section.http_timeout()
 
     async def _current_auth_id(self) -> int:
         """
@@ -97,6 +104,7 @@ class HttpCrawler(Crawler):
         async with aiohttp.ClientSession(
                 headers={"User-Agent": f"{NAME}/{VERSION}"},
                 cookie_jar=self._current_cookie_jar,
+                timeout=ClientTimeout(total=self._http_timeout)
         ) as session:
             self.session = session
             try:
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 33356ed..445997f 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -1,3 +1,4 @@
+import asyncio
 import re
 from pathlib import PurePath
 from typing import Any, Awaitable, Callable, Dict, List, Optional, Set, TypeVar, Union
@@ -11,15 +12,15 @@ from ...config import Config
 from ...logging import ProgressBar, log
 from ...output_dir import FileSink, Redownload
 from ...utils import fmt_path, soupify, url_set_query_param
-from ..crawler import CrawlError, CrawlerSection, CrawlWarning, anoncritical
-from ..http_crawler import HttpCrawler
+from ..crawler import CrawlError, CrawlWarning, anoncritical
+from ..http_crawler import HttpCrawler, HttpCrawlerSection
 from .file_templates import link_template_plain, link_template_rich
 from .kit_ilias_html import IliasElementType, IliasPage, IliasPageElement
 
 TargetType = Union[str, int]
 
 
-class KitIliasWebCrawlerSection(CrawlerSection):
+class KitIliasWebCrawlerSection(HttpCrawlerSection):
 
     def target(self) -> TargetType:
         target = self.s.get("target")
@@ -92,6 +93,8 @@ def _iorepeat(attempts: int, name: str) -> Callable[[AWrapped], AWrapped]:
                     last_exception = e
                 except aiohttp.ClientConnectionError as e:  # e.g. timeout, disconnect, resolve failed, etc.
                     last_exception = e
+                except asyncio.exceptions.TimeoutError as e:  # explicit http timeouts in HttpCrawler
+                    last_exception = e
                 log.explain_topic(f"Retrying operation {name}. Retries left: {attempts - 1 - round}")
 
             if last_exception:

From fca62541ca56408addecdbcab57c9f958c772f69 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 24 May 2021 00:24:31 +0200
Subject: [PATCH 228/524] De-duplicate element names in ILIAS crawler

This prevents any conflicts caused by multiple files with the same name.
Conflicts may still arise due to transforms, but that is out of our
control and a user error.
---
 PFERD/crawl/ilias/kit_ilias_html.py        | 52 ++++++++++++++++++++++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py |  4 +-
 2 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 636fa68..61df57a 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -461,3 +461,55 @@ def _tomorrow() -> date:
 
 def _sanitize_path_name(name: str) -> str:
     return name.replace("/", "-").replace("\\", "-").strip()
+
+
+def deduplicate_element_names(elements: List[IliasPageElement]) -> List[IliasPageElement]:
+    """
+    De-duplicates element names by appending an incrementing number to later elements:
+      test.pdf
+      test.pdf
+    would result in
+      test.pdf
+      test_1.pdf
+
+    It is also space-aware:
+      "te st.pdf"
+      "te st.pdf"
+    would result in
+      "tes st.pdf"
+      "tes st 1.pdf"
+    """
+    known_names = dict()
+    result_elements = []
+
+    for element in elements:
+        # This file is new - add it and mark its name as used
+        if element.name not in known_names:
+            known_names[element.name] = 1
+            result_elements.append(element)
+            continue
+
+        # This file is a duplicate. Find a suitable suffix
+        current_counter = known_names[element.name]
+        adjusted_element = _append_number(element, current_counter)
+        # increment the counter so the next duplicate does not conflict
+        known_names[element.name] += 1
+        # also block the new name, so another file with the *renamed* name gets renamed as well
+        known_names[adjusted_element.name] = 1
+
+        result_elements.append(adjusted_element)
+
+    return result_elements
+
+
+def _append_number(element: IliasPageElement, number: int) -> IliasPageElement:
+    extension_index = element.name.rfind(".")
+    suffix = f" {number}" if " " in element.name else f"_{number}"
+    if extension_index < 0:
+        new_name = element.name + suffix
+    else:
+        new_name = element.name[:extension_index] + suffix + element.name[extension_index:]
+
+    return IliasPageElement(
+        element.type, element.url, new_name, element.mtime, element.description
+    )
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 445997f..222e1d6 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -15,7 +15,7 @@ from ...utils import fmt_path, soupify, url_set_query_param
 from ..crawler import CrawlError, CrawlWarning, anoncritical
 from ..http_crawler import HttpCrawler, HttpCrawlerSection
 from .file_templates import link_template_plain, link_template_rich
-from .kit_ilias_html import IliasElementType, IliasPage, IliasPageElement
+from .kit_ilias_html import IliasElementType, IliasPage, IliasPageElement, deduplicate_element_names
 
 TargetType = Union[str, int]
 
@@ -214,6 +214,7 @@ class KitIliasWebCrawler(HttpCrawler):
 
         # Fill up our task list with the found elements
         await gather_elements()
+        elements = deduplicate_element_names(elements)
         tasks = [self._handle_ilias_element(PurePath("."), element) for element in elements]
 
         # And execute them
@@ -240,6 +241,7 @@ class KitIliasWebCrawler(HttpCrawler):
 
         # Fill up our task list with the found elements
         await gather_elements()
+        elements = deduplicate_element_names(elements)
         tasks = [self._handle_ilias_element(path, element) for element in elements]
 
         # And execute them

From c687d4a51a27bb2121293282a4640c91c5a4ac14 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Mon, 24 May 2021 13:10:19 +0200
Subject: [PATCH 229/524] Implement cookie sharing

---
 CONFIG.md                                  |  5 ++
 PFERD/cli/parser.py                        |  9 ++-
 PFERD/config.py                            |  3 +
 PFERD/crawl/http_crawler.py                | 80 +++++++++++++++++-----
 PFERD/crawl/ilias/kit_ilias_web_crawler.py |  9 ++-
 PFERD/pferd.py                             | 10 ++-
 6 files changed, 95 insertions(+), 21 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index dcc7421..7e8a717 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -25,6 +25,11 @@ default values for the other sections.
    `yes`)
 - `report`: Whether PFERD should print a report of added, changed and deleted
    local files for all crawlers before exiting. (Default: `yes`)
+- `share_cookies`: Whether crawlers should share cookies where applicable. By
+  default, crawlers are isolated and don't interact with each other. This
+  includes their cookies. However, in situations where multiple crawlers crawl
+  the same website using the same account, sharing cookies between crawlers can
+  make sense. (Default: `yes`)
 
 ## The `crawl:*` sections
 
diff --git a/PFERD/cli/parser.py b/PFERD/cli/parser.py
index 346070f..72abb76 100644
--- a/PFERD/cli/parser.py
+++ b/PFERD/cli/parser.py
@@ -169,6 +169,11 @@ PARSER.add_argument(
     action=BooleanOptionalAction,
     help="print a report of all local changes before exiting"
 )
+PARSER.add_argument(
+    "--share-cookies",
+    action=BooleanOptionalAction,
+    help="whether crawlers should share cookies where applicable"
+)
 
 
 def load_default_section(
@@ -180,7 +185,9 @@ def load_default_section(
     if args.working_dir is not None:
         section["working_dir"] = str(args.working_dir)
     if args.explain is not None:
-        section["explain"] = "true" if args.explain else "false"
+        section["explain"] = "yes" if args.explain else "no"
+    if args.share_cookies is not None:
+        section["share_cookies"] = "yes" if args.share_cookies else "no"
 
 
 SUBPARSERS = PARSER.add_subparsers(title="crawlers")
diff --git a/PFERD/config.py b/PFERD/config.py
index 0c99683..abd6e9e 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -81,6 +81,9 @@ class DefaultSection(Section):
     def report(self) -> bool:
         return self.s.getboolean("report", fallback=True)
 
+    def share_cookies(self) -> bool:
+        return self.s.getboolean("share_cookies", fallback=True)
+
 
 class Config:
     @staticmethod
diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index 8cd6afe..facc2ba 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -1,10 +1,11 @@
 import asyncio
-from pathlib import PurePath
-from typing import Optional
+from pathlib import Path, PurePath
+from typing import Dict, List, Optional
 
 import aiohttp
 from aiohttp.client import ClientTimeout
 
+from ..auth import Authenticator
 from ..config import Config
 from ..logging import log
 from ..utils import fmt_real_path
@@ -25,17 +26,22 @@ class HttpCrawler(Crawler):
             name: str,
             section: HttpCrawlerSection,
             config: Config,
+            shared_auth: Optional[Authenticator] = None,
     ) -> None:
         super().__init__(name, section, config)
 
-        self._cookie_jar_path = self._output_dir.resolve(self.COOKIE_FILE)
-        self._output_dir.register_reserved(self.COOKIE_FILE)
         self._authentication_id = 0
         self._authentication_lock = asyncio.Lock()
-        self._current_cookie_jar: Optional[aiohttp.CookieJar] = None
         self._request_count = 0
         self._http_timeout = section.http_timeout()
 
+        self._cookie_jar_path = self._output_dir.resolve(self.COOKIE_FILE)
+        self._shared_cookie_jar_paths: Optional[List[Path]] = None
+        self._shared_auth = shared_auth
+        self._current_cookie_jar: Optional[aiohttp.CookieJar] = None
+
+        self._output_dir.register_reserved(self.COOKIE_FILE)
+
     async def _current_auth_id(self) -> int:
         """
         Returns the id for the current authentication, i.e. an identifier for the last
@@ -71,7 +77,7 @@ class HttpCrawler(Crawler):
             self._authentication_id += 1
             # Saving the cookies after the first auth ensures we won't need to re-authenticate
             # on the next run, should this one be aborted or crash
-            await self._save_cookies()
+            self._save_cookies()
 
     async def _authenticate(self) -> None:
         """
@@ -80,26 +86,68 @@ class HttpCrawler(Crawler):
         """
         raise RuntimeError("_authenticate() was called but crawler doesn't provide an implementation")
 
-    async def _save_cookies(self) -> None:
+    def share_cookies(self, shared: Dict[Authenticator, List[Path]]) -> None:
+        if not self._shared_auth:
+            return
+
+        if self._shared_auth in shared:
+            self._shared_cookie_jar_paths = shared[self._shared_auth]
+        else:
+            self._shared_cookie_jar_paths = []
+            shared[self._shared_auth] = self._shared_cookie_jar_paths
+
+        self._shared_cookie_jar_paths.append(self._cookie_jar_path)
+
+    def _load_cookies(self) -> None:
+        log.explain_topic("Loading cookies")
+        cookie_jar_path: Optional[Path] = None
+
+        if self._shared_cookie_jar_paths is None:
+            log.explain("Not sharing any cookies")
+            cookie_jar_path = self._cookie_jar_path
+        else:
+            log.explain("Sharing cookies")
+            max_mtime: Optional[float] = None
+            for path in self._shared_cookie_jar_paths:
+                if not path.is_file():
+                    log.explain(f"{fmt_real_path(path)} is not a file")
+                    continue
+                mtime = path.stat().st_mtime
+                if max_mtime is None or mtime > max_mtime:
+                    log.explain(f"{fmt_real_path(path)} has newest mtime so far")
+                    max_mtime = mtime
+                    cookie_jar_path = path
+                else:
+                    log.explain(f"{fmt_real_path(path)} has older mtime")
+
+        if cookie_jar_path is None:
+            log.explain("Couldn't find a suitable cookie file")
+            return
+
+        log.explain(f"Loading cookies from {fmt_real_path(cookie_jar_path)}")
+        try:
+            self._current_cookie_jar = aiohttp.CookieJar()
+            self._current_cookie_jar.load(cookie_jar_path)
+        except Exception as e:
+            log.explain("Failed to load cookies")
+            log.explain(str(e))
+
+    def _save_cookies(self) -> None:
         log.explain_topic("Saving cookies")
         if not self._current_cookie_jar:
             log.explain("No cookie jar, save aborted")
             return
 
         try:
+            log.explain(f"Saving cookies to {fmt_real_path(self._cookie_jar_path)}")
             self._current_cookie_jar.save(self._cookie_jar_path)
-            log.explain(f"Cookies saved to {fmt_real_path(self._cookie_jar_path)}")
-        except Exception:
+        except Exception as e:
             log.warn(f"Failed to save cookies to {fmt_real_path(self._cookie_jar_path)}")
+            log.warn(str(e))
 
     async def run(self) -> None:
-        self._current_cookie_jar = aiohttp.CookieJar()
         self._request_count = 0
-
-        try:
-            self._current_cookie_jar.load(self._cookie_jar_path)
-        except Exception:
-            pass
+        self._load_cookies()
 
         async with aiohttp.ClientSession(
                 headers={"User-Agent": f"{NAME}/{VERSION}"},
@@ -114,4 +162,4 @@ class HttpCrawler(Crawler):
         log.explain_topic(f"Total amount of HTTP requests: {self._request_count}")
 
         # They are saved in authenticate, but a final save won't hurt
-        await self._save_cookies()
+        self._save_cookies()
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 222e1d6..d488974 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -152,12 +152,15 @@ class KitIliasWebCrawler(HttpCrawler):
             config: Config,
             authenticators: Dict[str, Authenticator]
     ):
-        super().__init__(name, section, config)
+        # Setting a main authenticator for cookie sharing
+        auth = section.auth(authenticators)
+        super().__init__(name, section, config, shared_auth=auth)
 
         self._shibboleth_login = KitShibbolethLogin(
-            section.auth(authenticators),
-            section.tfa_auth(authenticators)
+            auth,
+            section.tfa_auth(authenticators),
         )
+
         self._base_url = "https://ilias.studium.kit.edu"
 
         self._target = section.target()
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index 2b9921e..35f5194 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -1,10 +1,11 @@
+from pathlib import Path
 from typing import Dict, List, Optional
 
 from rich.markup import escape
 
 from .auth import AUTHENTICATORS, Authenticator
 from .config import Config, ConfigOptionError
-from .crawl import CRAWLERS, Crawler, CrawlError
+from .crawl import CRAWLERS, Crawler, CrawlError, KitIliasWebCrawler
 from .logging import log
 from .utils import fmt_path
 
@@ -42,6 +43,9 @@ class Pferd:
     def _load_crawlers(self) -> List[str]:
         names = []
 
+        # Cookie sharing
+        kit_ilias_web_paths: Dict[Authenticator, List[Path]] = {}
+
         for name, section in self._config.crawler_sections():
             log.print(f"[bold bright_cyan]Loading[/] {escape(name)}")
             names.append(name)
@@ -54,6 +58,10 @@ class Pferd:
             crawler = crawler_constructor(name, section, self._config, self._authenticators)
             self._crawlers[name] = crawler
 
+            if self._config.default_section.share_cookies():
+                if isinstance(crawler, KitIliasWebCrawler):
+                    crawler.share_cookies(kit_ilias_web_paths)
+
         return names
 
     def _find_crawlers_to_run(self, loaded_crawlers: List[str]) -> List[str]:

From 1c1f781be421bfdab29ee942b6ab1892dd948186 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Mon, 24 May 2021 13:17:28 +0200
Subject: [PATCH 230/524] Reword some log messages

---
 PFERD/config.py |  2 +-
 PFERD/pferd.py  | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/PFERD/config.py b/PFERD/config.py
index abd6e9e..8293331 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -110,7 +110,7 @@ class Config:
         """
 
         if path:
-            log.explain("Using custom path")
+            log.explain("Path specified on CLI")
         else:
             log.explain("Using default path")
             path = Config._default_path()
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index 35f5194..bed7c66 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -80,7 +80,7 @@ class Pferd:
         for name in self._crawlers_to_run:
             section_name = f"crawl:{name}"
             if section_name in self._crawlers:
-                log.explain(f"Found crawler section named {section_name!r}")
+                log.explain(f"Crawler section named {section_name!r} exists")
                 names.append(section_name)
             else:
                 log.explain(f"There's no crawler section named {section_name!r}")
@@ -128,16 +128,16 @@ class Pferd:
             log.report("")
             log.report(f"[bold bright_cyan]Report[/] for {escape(name)}")
 
-            something_happened = False
+            something_changed = False
             for path in sorted(crawler.report.added_files):
-                something_happened = True
+                something_changed = True
                 log.report(f"  [bold bright_green]Added[/] {fmt_path(path)}")
             for path in sorted(crawler.report.changed_files):
-                something_happened = True
+                something_changed = True
                 log.report(f"  [bold bright_yellow]Changed[/] {fmt_path(path)}")
             for path in sorted(crawler.report.deleted_files):
-                something_happened = True
+                something_changed = True
                 log.report(f"  [bold bright_magenta]Deleted[/] {fmt_path(path)}")
 
-            if not something_happened:
-                log.report("  Nothing happened")
+            if not something_changed:
+                log.report("  Nothing changed")

From 5c761930458dc1a181e9014d3e0856d68bf5dc6f Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Mon, 24 May 2021 15:21:25 +0200
Subject: [PATCH 231/524] Set up pyinstaller

---
 .gitignore     |   5 ++
 pferd.py       |   4 ++
 scripts/build  |   5 ++
 scripts/check  |   2 +
 scripts/format |   2 +
 scripts/setup  |   3 +
 sync_url.py    | 160 -------------------------------------------------
 7 files changed, 21 insertions(+), 160 deletions(-)
 create mode 100644 pferd.py
 create mode 100755 scripts/build
 delete mode 100755 sync_url.py

diff --git a/.gitignore b/.gitignore
index 2928b54..455eaca 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,8 @@
 /PFERD.egg-info/
 __pycache__/
 /.vscode/
+
+# pyinstaller
+/pferd.spec
+/build/
+/dist/
diff --git a/pferd.py b/pferd.py
new file mode 100644
index 0000000..21857f4
--- /dev/null
+++ b/pferd.py
@@ -0,0 +1,4 @@
+from PFERD.__main__ import main
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/build b/scripts/build
new file mode 100755
index 0000000..6f88655
--- /dev/null
+++ b/scripts/build
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+
+set -e
+
+pyinstaller --onefile pferd.py
diff --git a/scripts/check b/scripts/check
index ba767cd..2283951 100755
--- a/scripts/check
+++ b/scripts/check
@@ -1,4 +1,6 @@
 #!/usr/bin/env bash
 
+set -e
+
 mypy PFERD
 flake8 PFERD
diff --git a/scripts/format b/scripts/format
index cc196ae..d8917ef 100755
--- a/scripts/format
+++ b/scripts/format
@@ -1,4 +1,6 @@
 #!/usr/bin/env bash
 
+set -e
+
 autopep8 --recursive --in-place PFERD
 isort PFERD
diff --git a/scripts/setup b/scripts/setup
index 8a5399b..46fe2f8 100755
--- a/scripts/setup
+++ b/scripts/setup
@@ -1,5 +1,8 @@
 #!/usr/bin/env bash
 
+set -e
+
 pip install --upgrade pip setuptools
 pip install --editable .
 pip install --upgrade mypy flake8 autopep8 isort
+pip install --upgrade pyinstaller
diff --git a/sync_url.py b/sync_url.py
deleted file mode 100755
index ca78de0..0000000
--- a/sync_url.py
+++ /dev/null
@@ -1,160 +0,0 @@
-#!/usr/bin/env python
-
-"""
-A simple script to download a course by name from ILIAS.
-"""
-
-import argparse
-import logging
-import sys
-from pathlib import Path, PurePath
-from typing import Optional
-from urllib.parse import urlparse
-
-from PFERD import Pferd
-from PFERD.authenticators import KeyringAuthenticator, UserPassAuthenticator
-from PFERD.cookie_jar import CookieJar
-from PFERD.ilias import (IliasCrawler, IliasElementType,
-                         KitShibbolethAuthenticator)
-from PFERD.logging import PrettyLogger, enable_logging
-from PFERD.organizer import (ConflictType, FileConflictResolution,
-                             FileConflictResolver, resolve_prompt_user)
-from PFERD.transform import sanitize_windows_path
-from PFERD.utils import to_path
-
-_LOGGER = logging.getLogger("sync_url")
-_PRETTY = PrettyLogger(_LOGGER)
-
-
-def _extract_credentials(file_path: Optional[str]) -> UserPassAuthenticator:
-    if not file_path:
-        return UserPassAuthenticator("KIT ILIAS Shibboleth", None, None)
-
-    if not Path(file_path).exists():
-        _PRETTY.error("Credential file does not exist")
-        sys.exit(1)
-
-    with open(file_path, "r") as file:
-        first_line = file.read().splitlines()[0]
-        read_name, *read_password = first_line.split(":", 1)
-
-        name = read_name if read_name else None
-        password = read_password[0] if read_password else None
-        return UserPassAuthenticator("KIT ILIAS Shibboleth", username=name, password=password)
-
-
-def _resolve_remote_first(_path: PurePath, _conflict: ConflictType) -> FileConflictResolution:
-    return FileConflictResolution.DESTROY_EXISTING
-
-
-def _resolve_local_first(_path: PurePath, _conflict: ConflictType) -> FileConflictResolution:
-    return FileConflictResolution.KEEP_EXISTING
-
-
-def _resolve_no_delete(_path: PurePath, conflict: ConflictType) -> FileConflictResolution:
-    # Update files
-    if conflict == ConflictType.FILE_OVERWRITTEN:
-        return FileConflictResolution.DESTROY_EXISTING
-    if conflict == ConflictType.MARKED_FILE_OVERWRITTEN:
-        return FileConflictResolution.DESTROY_EXISTING
-    # But do not delete them
-    return FileConflictResolution.KEEP_EXISTING
-
-
-def main() -> None:
-    enable_logging(name="sync_url")
-
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--test-run", action="store_true")
-    parser.add_argument('-c', '--cookies', nargs='?', default=None, help="File to store cookies in")
-    parser.add_argument('-u', '--username', nargs='?', default=None, help="Username for Ilias")
-    parser.add_argument('-p', '--password', nargs='?', default=None, help="Password for Ilias")
-    parser.add_argument('--credential-file', nargs='?', default=None,
-                        help="Path to a file containing credentials for Ilias. The file must have "
-                        "one line in the following format: '<user>:<password>'")
-    parser.add_argument("-k", "--keyring", action="store_true",
-                        help="Use the system keyring service for authentication")
-    parser.add_argument('--no-videos', action="store_true", help="Don't download videos")
-    parser.add_argument('--local-first', action="store_true",
-                        help="Don't prompt for confirmation, keep existing files")
-    parser.add_argument('--remote-first', action="store_true",
-                        help="Don't prompt for confirmation, delete and overwrite local files")
-    parser.add_argument('--no-delete', action="store_true",
-                        help="Don't prompt for confirmation, overwrite local files, don't delete")
-    parser.add_argument('url', help="URL to the course page")
-    parser.add_argument('folder', nargs='?', default=None, help="Folder to put stuff into")
-    args = parser.parse_args()
-
-    cookie_jar = CookieJar(to_path(args.cookies) if args.cookies else None)
-    session = cookie_jar.create_session()
-
-    if args.keyring:
-        if not args.username:
-            _PRETTY.error("Keyring auth selected but no --username passed!")
-            return
-        inner_auth: UserPassAuthenticator = KeyringAuthenticator(
-            "KIT ILIAS Shibboleth", username=args.username, password=args.password
-        )
-    else:
-        inner_auth = _extract_credentials(args.credential_file)
-
-    username, password = inner_auth.get_credentials()
-    authenticator = KitShibbolethAuthenticator(inner_auth)
-
-    url = urlparse(args.url)
-
-    crawler = IliasCrawler(url.scheme + '://' + url.netloc, session,
-                           authenticator, lambda x, y: True)
-
-    cookie_jar.load_cookies()
-
-    if args.folder is None:
-        element_name = crawler.find_element_name(args.url)
-        if not element_name:
-            print("Error, could not get element name. Please specify a folder yourself.")
-            return
-        folder = sanitize_windows_path(Path(element_name.replace("/", "-").replace("\\", "-")))
-        cookie_jar.save_cookies()
-    else:
-        folder = Path(args.folder)
-
-    # files may not escape the pferd_root with relative paths
-    # note: Path(Path.cwd, Path(folder)) == Path(folder) if it is an absolute path
-    pferd_root = Path(Path.cwd(), Path(folder)).parent
-    # Folder might be a *PurePath* at this point
-    target = Path(folder).resolve().name
-    pferd = Pferd(pferd_root, test_run=args.test_run)
-
-    def dir_filter(_: Path, element: IliasElementType) -> bool:
-        if args.no_videos:
-            return element not in [IliasElementType.VIDEO_FILE, IliasElementType.VIDEO_FOLDER]
-        return True
-
-    if args.local_first:
-        file_confilict_resolver: FileConflictResolver = _resolve_local_first
-    elif args.no_delete:
-        file_confilict_resolver = _resolve_no_delete
-    elif args.remote_first:
-        file_confilict_resolver = _resolve_remote_first
-    else:
-        file_confilict_resolver = resolve_prompt_user
-
-    pferd.enable_logging()
-
-    # fetch
-    pferd.ilias_kit_folder(
-        target=target,
-        full_url=args.url,
-        cookies=args.cookies,
-        dir_filter=dir_filter,
-        username=username,
-        password=password,
-        file_conflict_resolver=file_confilict_resolver,
-        transform=sanitize_windows_path
-    )
-
-    pferd.print_summary()
-
-
-if __name__ == "__main__":
-    main()

From d44f6966c28ecfe87e9e9d7ae9c65382418db664 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 24 May 2021 16:22:11 +0200
Subject: [PATCH 232/524] Log authentication attempts in HTTP crawler

---
 PFERD/crawl/http_crawler.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index facc2ba..45f9df2 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -67,12 +67,18 @@ class HttpCrawler(Crawler):
         the request was made. This ensures that authentication is not performed needlessly.
         """
         async with self._authentication_lock:
+            log.explain_topic("Authenticating")
             # Another thread successfully called authenticate in-between
             # We do not want to perform auth again, so we return here. We can
             # assume the other thread suceeded as authenticate will throw an error
             # if it failed and aborts the crawl process.
             if caller_auth_id != self._authentication_id:
+                log.explain(
+                    "Authentication skipped due to auth id mismatch."
+                    "A previous authentication beat us to the race."
+                )
                 return
+            log.explain("Calling crawler-specific authenticate")
             await self._authenticate()
             self._authentication_id += 1
             # Saving the cookies after the first auth ensures we won't need to re-authenticate

From 342076ee0e6da7b96e26da9caa6ef8c0b7f7bf70 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 24 May 2021 16:22:51 +0200
Subject: [PATCH 233/524] Handle exercise detail containers in ILIAS html
 parser

---
 PFERD/crawl/ilias/kit_ilias_html.py        | 65 ++++++++++++++++++++--
 PFERD/crawl/ilias/kit_ilias_web_crawler.py |  1 +
 2 files changed, 62 insertions(+), 4 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 61df57a..36da7d4 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -16,6 +16,7 @@ TargetType = Union[str, int]
 
 class IliasElementType(Enum):
     EXERCISE = "exercise"
+    EXERCISE_FILES = "exercise_files"  # own submitted files
     FILE = "file"
     FOLDER = "folder"
     FORUM = "forum"
@@ -197,6 +198,43 @@ class IliasPage:
         return IliasPageElement(IliasElementType.VIDEO_PLAYER, video_url, video_name, modification_time)
 
     def _find_exercise_entries(self) -> List[IliasPageElement]:
+        if self._soup.find(id="tab_submission"):
+            log.explain("Found submission tab. This is an exercise detail page")
+            return self._find_exercise_entries_detail_page()
+        log.explain("Found no submission tab. This is an exercise root page")
+        return self._find_exercise_entries_root_page()
+
+    def _find_exercise_entries_detail_page(self) -> List[IliasPageElement]:
+        results: List[IliasPageElement] = []
+
+        # Find all download links in the container (this will contain all the files)
+        download_links: List[Tag] = self._soup.findAll(
+            name="a",
+            # download links contain the given command class
+            attrs={"href": lambda x: x and "cmd=download" in x},
+            text="Download"
+        )
+
+        for link in download_links:
+            parent_row: Tag = link.findParent("tr")
+            children: List[Tag] = parent_row.findChildren("td")
+
+            # <checkbox> <name> <uploader> <date> <download>
+            #     0         1        2       3        4
+            name = _sanitize_path_name(children[1].getText().strip())
+            date = demangle_date(children[3].getText().strip())
+
+            log.explain(f"Found exercise detail entry {name!r}")
+            results.append(IliasPageElement(
+                IliasElementType.FILE,
+                self._abs_url_from_link(link),
+                name,
+                date
+            ))
+
+        return results
+
+    def _find_exercise_entries_root_page(self) -> List[IliasPageElement]:
         results: List[IliasPageElement] = []
 
         # Each assignment is in an accordion container
@@ -205,6 +243,8 @@ class IliasPage:
         for container in assignment_containers:
             # Fetch the container name out of the header to use it in the path
             container_name = container.select_one(".ilAssignmentHeader").getText().strip()
+            log.explain(f"Found exercise container {container_name!r}")
+
             # Find all download links in the container (this will contain all the files)
             files: List[Tag] = container.findAll(
                 name="a",
@@ -213,8 +253,6 @@ class IliasPage:
                 text="Download"
             )
 
-            log.explain(f"Found exercise container {container_name!r}")
-
             # Grab each file as you now have the link
             for file_link in files:
                 # Two divs, side by side. Left is the name, right is the link ==> get left
@@ -231,6 +269,25 @@ class IliasPage:
                     None  # We do not have any timestamp
                 ))
 
+            # Find all links to file listings (e.g. "Submitted Files" for groups)
+            file_listings: List[Tag] = container.findAll(
+                name="a",
+                # download links contain the given command class
+                attrs={"href": lambda x: x and "cmdClass=ilexsubmissionfilegui" in x}
+            )
+
+            # Add each listing as a new
+            for listing in file_listings:
+                file_name = _sanitize_path_name(listing.getText().strip())
+                url = self._abs_url_from_link(listing)
+                log.explain(f"Found exercise detail {file_name!r} at {url}")
+                results.append(IliasPageElement(
+                    IliasElementType.EXERCISE_FILES,
+                    url,
+                    container_name + "/" + file_name,
+                    None  # we do not have any timestamp
+                ))
+
         return results
 
     def _find_normal_entries(self) -> List[IliasPageElement]:
@@ -349,7 +406,7 @@ class IliasPage:
 
         if found_parent is None:
             _unexpected_html_warning()
-            log.warn_contd(f"Tried to figure out element type, but did not find an icon for {url!r}")
+            log.warn_contd(f"Tried to figure out element type, but did not find an icon for {url}")
             return None
 
         # Find the small descriptive icon to figure out the type
@@ -357,7 +414,7 @@ class IliasPage:
 
         if img_tag is None:
             _unexpected_html_warning()
-            log.warn_contd(f"Tried to figure out element type, but did not find an image for {url!r}")
+            log.warn_contd(f"Tried to figure out element type, but did not find an image for {url}")
             return None
 
         if "opencast" in str(img_tag["alt"]).lower():
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index d488974..11b27d1 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -61,6 +61,7 @@ class KitIliasWebCrawlerSection(HttpCrawlerSection):
 
 _DIRECTORY_PAGES: Set[IliasElementType] = set([
     IliasElementType.EXERCISE,
+    IliasElementType.EXERCISE_FILES,
     IliasElementType.FOLDER,
     IliasElementType.MEETING,
     IliasElementType.VIDEO_FOLDER,

From 492ec6a93239ffc1924c7a56f527c346224764f1 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 24 May 2021 16:32:29 +0200
Subject: [PATCH 234/524] Detect and skip ILIAS tests

---
 PFERD/crawl/ilias/kit_ilias_html.py        | 9 ++++++---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 4 ++++
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 36da7d4..7bb7084 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -17,6 +17,7 @@ TargetType = Union[str, int]
 class IliasElementType(Enum):
     EXERCISE = "exercise"
     EXERCISE_FILES = "exercise_files"  # own submitted files
+    TEST = "test"                      # an online test. Will be ignored currently.
     FILE = "file"
     FOLDER = "folder"
     FORUM = "forum"
@@ -373,9 +374,8 @@ class IliasPage:
         if "target=file_" in parsed_url.query:
             return IliasElementType.FILE
 
-        # Skip forums
-        if "cmd=showThreads" in parsed_url.query:
-            return IliasElementType.FORUM
+        # TODO: Match based on CMD_CLASS or icon? The folder_like check looks at the icon,
+        # but we could also match the command class. I am not sure what's more stable.
 
         # Everything with a ref_id can *probably* be opened to reveal nested things
         # video groups, directories, exercises, etc
@@ -432,6 +432,9 @@ class IliasPage:
         if str(img_tag["src"]).endswith("sess.svg"):
             return IliasElementType.MEETING
 
+        if str(img_tag["src"]).endswith("icon_tst.svg"):
+            return IliasElementType.TEST
+
         return IliasElementType.FOLDER
 
     @staticmethod
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 11b27d1..0bd3202 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -274,6 +274,10 @@ class KitIliasWebCrawler(HttpCrawler):
             log.explain_topic(f"Decision: Crawl {fmt_path(element_path)}")
             log.explain("Forums are not supported")
             log.explain("Answer: No")
+        elif element.type == IliasElementType.TEST:
+            log.explain_topic(f"Decision: Crawl {fmt_path(element_path)}")
+            log.explain("Tests contain no relevant files")
+            log.explain("Answer: No")
         elif element.type == IliasElementType.LINK:
             await self._download_link(element, element_path)
         elif element.type == IliasElementType.VIDEO:

From 86ba47541b9217b70f686dc1e7ff84aec7a45325 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Mon, 24 May 2021 16:53:50 +0200
Subject: [PATCH 235/524] Fix cookie loading and saving

---
 PFERD/crawl/http_crawler.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index 45f9df2..f0370a3 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -38,7 +38,6 @@ class HttpCrawler(Crawler):
         self._cookie_jar_path = self._output_dir.resolve(self.COOKIE_FILE)
         self._shared_cookie_jar_paths: Optional[List[Path]] = None
         self._shared_auth = shared_auth
-        self._current_cookie_jar: Optional[aiohttp.CookieJar] = None
 
         self._output_dir.register_reserved(self.COOKIE_FILE)
 
@@ -106,6 +105,7 @@ class HttpCrawler(Crawler):
 
     def _load_cookies(self) -> None:
         log.explain_topic("Loading cookies")
+
         cookie_jar_path: Optional[Path] = None
 
         if self._shared_cookie_jar_paths is None:
@@ -132,32 +132,29 @@ class HttpCrawler(Crawler):
 
         log.explain(f"Loading cookies from {fmt_real_path(cookie_jar_path)}")
         try:
-            self._current_cookie_jar = aiohttp.CookieJar()
-            self._current_cookie_jar.load(cookie_jar_path)
+            self._cookie_jar.load(cookie_jar_path)
         except Exception as e:
             log.explain("Failed to load cookies")
             log.explain(str(e))
 
     def _save_cookies(self) -> None:
         log.explain_topic("Saving cookies")
-        if not self._current_cookie_jar:
-            log.explain("No cookie jar, save aborted")
-            return
 
         try:
             log.explain(f"Saving cookies to {fmt_real_path(self._cookie_jar_path)}")
-            self._current_cookie_jar.save(self._cookie_jar_path)
+            self._cookie_jar.save(self._cookie_jar_path)
         except Exception as e:
             log.warn(f"Failed to save cookies to {fmt_real_path(self._cookie_jar_path)}")
             log.warn(str(e))
 
     async def run(self) -> None:
         self._request_count = 0
+        self._cookie_jar = aiohttp.CookieJar()
         self._load_cookies()
 
         async with aiohttp.ClientSession(
                 headers={"User-Agent": f"{NAME}/{VERSION}"},
-                cookie_jar=self._current_cookie_jar,
+                cookie_jar=self._cookie_jar,
                 timeout=ClientTimeout(total=self._http_timeout)
         ) as session:
             self.session = session

From d8bd1f518ab25502299b44c84375261e6a4fc888 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Mon, 24 May 2021 15:43:53 +0200
Subject: [PATCH 236/524] Set up build and release workflow

---
 .github/workflows/build-and-release.yml | 78 +++++++++++++++++++++++++
 scripts/setup                           | 11 +++-
 2 files changed, 88 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/build-and-release.yml

diff --git a/.github/workflows/build-and-release.yml b/.github/workflows/build-and-release.yml
new file mode 100644
index 0000000..565c4e3
--- /dev/null
+++ b/.github/workflows/build-and-release.yml
@@ -0,0 +1,78 @@
+name: build-and-release
+
+on: push
+
+defaults:
+  run:
+    shell: bash
+
+jobs:
+
+  build:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-latest]
+        python: ["3.8"]
+    steps:
+
+      - uses: actions/checkout@v2
+
+      - uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python }}
+
+      - name: Set up project
+        if: matrix.os != 'windows-latest'
+        run: ./scripts/setup
+
+      - name: Set up project on windows
+        if: matrix.os == 'windows-latest'
+        # For some reason, `pip install --upgrade pip` doesn't work on
+        # 'windows-latest'. The installed pip version works fine however.
+        run: ./scripts/setup --no-pip
+
+      - name: Run checks
+        run: ./scripts/check
+
+      - name: Build
+        run: ./scripts/build
+
+      - name: Rename binary
+        # Glob in source location because on windows pyinstaller creates a file
+        # named "pferd.exe"
+        run: mv dist/pferd* dist/pferd-${{ matrix.os }}
+
+      - name: Upload binary
+        uses: actions/upload-artifact@v2
+        with:
+          name: Binaries
+          path: dist/pferd-${{ matrix.os }}
+
+  release:
+    runs-on: ubuntu-latest
+    if: startsWith(github.ref, 'refs/tags/v')
+    needs: build
+    steps:
+
+      - name: Download binaries
+        uses: actions/download-artifact@v2
+        with:
+          name: Binaries
+
+      - name: Rename binaries
+        run: |
+          mv pferd-ubuntu-latest pferd-linux
+          mv pferd-windows-latest pferd-windows.exe
+          mv pferd-macos-latest pferd-mac
+
+      - name: Create release
+        uses: softprops/action-gh-release@v1
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        with:
+          files: |
+            pferd-linux
+            pferd-windows.exe
+            pferd-mac
diff --git a/scripts/setup b/scripts/setup
index 46fe2f8..b48fb1a 100755
--- a/scripts/setup
+++ b/scripts/setup
@@ -2,7 +2,16 @@
 
 set -e
 
-pip install --upgrade pip setuptools
+# Updating pip and setuptools because some older versions don't recognize the
+# project setup correctly
+if [[ $1 != '--no-pip' ]]; then
+    pip install --upgrade pip
+fi
+pip install --upgrade setuptools
+
+# Installing PFERD itself
 pip install --editable .
+
+# Installing various tools
 pip install --upgrade mypy flake8 autopep8 isort
 pip install --upgrade pyinstaller

From 1739c5409158663fb27b9ed8bd1d595f8d9c77ec Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Mon, 24 May 2021 17:50:17 +0200
Subject: [PATCH 237/524] Add checklist for releasing new versions

---
 DEV.md | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/DEV.md b/DEV.md
index 212cec8..d530d1f 100644
--- a/DEV.md
+++ b/DEV.md
@@ -24,6 +24,9 @@ installs PFERD via `pip install --editable .`, which means that you can just run
 `--editable`, there is no need to re-run `pip install` when the source code is
 changed.
 
+If you get any errors because pip can't update itself, try running
+`./scripts/setup --no-pip` instead of `./scripts/setup`.
+
 For more details, see [this part of the Python Tutorial][venv-tut] and
 [this section on "development mode"][ppug-dev].
 
@@ -50,3 +53,33 @@ that `./scripts/check` returns no warnings and the code has been run through
 `./scripts/format`.
 
 In your first PR, please add your name to the `LICENSE` file.
+
+## Releasing a new version
+
+1. Update the version number in `PFERD/version.py`
+2. Update `CHANGELOG.md`
+3. Commit changes to `master` with message `Bump version to <version>` (e. g. `Bump version to 3.2.5`)
+4. Create annotated tag named `v<version>` (e. g. `v3.2.5`)
+    - Copy changes from changelog
+    - Remove `#` symbols (which git would interpret as comments)
+    - As the first line, add `Version <version> - <date>` (e. g. `Version 3.2.5 - 2021-05-24`)
+    - Leave the second line empty
+5. Fast-forward `latest` to `master`
+6. Push `master`, `latest` and the new tag
+
+Example tag annotation:
+```
+Version 3.2.5 - 2021-05-24
+
+Added
+- Support for concurrent downloads
+- Support for proper config files
+- This changelog
+
+Changed
+- Rewrote almost everything
+- Redesigned CLI
+
+Removed
+- Backwards compatibility with 2.x
+```

From 9ce20216b5f11949be7c818c3f78d956bb5e7162 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 24 May 2021 18:32:18 +0200
Subject: [PATCH 238/524] Do not set a timeout for whole HTTP request

Downloads might take longer!
---
 PFERD/crawl/http_crawler.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index f0370a3..177972b 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -155,7 +155,15 @@ class HttpCrawler(Crawler):
         async with aiohttp.ClientSession(
                 headers={"User-Agent": f"{NAME}/{VERSION}"},
                 cookie_jar=self._cookie_jar,
-                timeout=ClientTimeout(total=self._http_timeout)
+                timeout=ClientTimeout(
+                    # 30 minutes. No download in the history of downloads was longer than 30 minutes.
+                    # This is enough to transfer a 600 MB file over a 3 Mib/s connection.
+                    # Allowing an arbitrary value could be annoying for overnight batch jobs
+                    total=15 * 60,
+                    connect=self._http_timeout,
+                    sock_connect=self._http_timeout,
+                    sock_read=self._http_timeout,
+                )
         ) as session:
             self.session = session
             try:

From 85f89a7ff36aba8a82c94226c6829813a7b26e9e Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 24 May 2021 18:53:00 +0200
Subject: [PATCH 239/524] Interpret accordions and expandable headers as
 virtual folders

This allows us to find a file named "Test" in an accordion "Acc" as "Acc/Test".
---
 PFERD/crawl/ilias/kit_ilias_html.py | 50 +++++++++++++++++++++++++++--
 1 file changed, 48 insertions(+), 2 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 7bb7084..43d66b5 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -299,7 +299,13 @@ class IliasPage:
 
         for link in links:
             abs_url = self._abs_url_from_link(link)
-            element_name = _sanitize_path_name(link.getText())
+            parents = self._find_upwards_folder_hierarchy(link)
+
+            if parents:
+                element_name = "/".join(parents) + "/" + _sanitize_path_name(link.getText())
+            else:
+                element_name = _sanitize_path_name(link.getText())
+
             element_type = self._find_type_from_link(element_name, link, abs_url)
             description = self._find_link_description(link)
 
@@ -318,6 +324,47 @@ class IliasPage:
 
         return result
 
+    def _find_upwards_folder_hierarchy(self, tag: Tag) -> List[str]:
+        """
+        Interprets accordions and expandable blocks as virtual folders and returns them
+        in order. This allows us to find a file named "Test" in an accordion "Acc" as "Acc/Test"
+        """
+        found_titles = []
+
+        outer_accordion_content: Optional[Tag] = None
+
+        parents: List[Tag] = list(tag.parents)
+        for parent in parents:
+            if not parent.get("class"):
+                continue
+
+            # ILIAS has proper accordions and weird blocks that look like normal headings,
+            # but some JS later transforms them into an accordion.
+
+            # This is for these weird JS-y blocks
+            if "ilContainerItemsContainer" in parent.get("class"):
+                # I am currently under the impression that *only* those JS blocks have an
+                # ilNoDisplay class.
+                if "ilNoDisplay" not in parent.get("class"):
+                    continue
+                prev: Tag = parent.findPreviousSibling("div")
+                if "ilContainerBlockHeader" in prev.get("class"):
+                    found_titles.append(prev.find("h3").getText().strip())
+
+            # And this for real accordions
+            if "il_VAccordionContentDef" in parent.get("class"):
+                outer_accordion_content = parent
+                break
+
+        if outer_accordion_content:
+            accordion_tag: Tag = outer_accordion_content.parent
+            head_tag: Tag = accordion_tag.find(attrs={
+                "class": lambda x: x and "ilc_va_ihead_VAccordIHead" in x
+            })
+            found_titles.append(head_tag.getText().strip())
+
+        return [_sanitize_path_name(x) for x in reversed(found_titles)]
+
     def _find_link_description(self, link: Tag) -> Optional[str]:
         tile: Tag = link.findParent("div", {"class": lambda x: x and "il_ContainerListItem" in x})
         if not tile:
@@ -353,7 +400,6 @@ class IliasPage:
             modification_date = demangle_date(modification_date_str)
 
         # Grab the name from the link text
-        name = _sanitize_path_name(link_element.getText())
         full_path = name + "." + file_type
 
         log.explain(f"Found file {full_path!r}")

From c33de233dcfd9f943cbbaf1deb5cb65871b5bd67 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Mon, 24 May 2021 20:08:49 +0200
Subject: [PATCH 240/524] Add script for releasing new versions

---
 CHANGELOG.md         |   2 +-
 DEV.md               |   4 ++
 scripts/bump-version | 111 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 116 insertions(+), 1 deletion(-)
 create mode 100755 scripts/bump-version

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 14966d7..519c046 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,7 +5,7 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [Unreleased]
+## Unreleased
 
 ### Added
 - Support for concurrent downloads
diff --git a/DEV.md b/DEV.md
index d530d1f..f577b93 100644
--- a/DEV.md
+++ b/DEV.md
@@ -56,6 +56,10 @@ In your first PR, please add your name to the `LICENSE` file.
 
 ## Releasing a new version
 
+This section describes the steps required to release a new version of PFERD.
+Usually, they don't need to performed manually and `scripts/bump-version` can be
+used instead.
+
 1. Update the version number in `PFERD/version.py`
 2. Update `CHANGELOG.md`
 3. Commit changes to `master` with message `Bump version to <version>` (e. g. `Bump version to 3.2.5`)
diff --git a/scripts/bump-version b/scripts/bump-version
new file mode 100755
index 0000000..4479ef8
--- /dev/null
+++ b/scripts/bump-version
@@ -0,0 +1,111 @@
+#!/usr/bin/env python3
+
+import argparse
+import time
+import re
+from subprocess import run
+
+
+def load_changelog():
+    with open("CHANGELOG.md") as f:
+        return list(f)
+
+
+def extract_changes(lines):
+    lines = iter(lines)
+    changes = []
+
+    # Find "Unreleased" section
+    for line in lines:
+        if line.strip() == "## Unreleased":
+            break
+    next(lines)
+
+    # Read all lines from that section
+    for line in lines:
+        if line.startswith("## "):
+            # Found the beginning of the next section
+            break
+        elif line.startswith("### "):
+            # Found a heading in the current section
+            # Remove "#" symbols so git doesn't interpret the line as a comment later
+            changes.append(line[4:])
+        else:
+            changes.append(line)
+
+    # Remove trailing empty lines
+    while changes and not changes[-1].strip():
+        changes.pop()
+
+    return changes
+
+
+def update_version(version):
+    with open("PFERD/version.py") as f:
+        text = f.read()
+
+    text = re.sub(r'VERSION = ".*"', f'VERSION = "{version}"', text)
+
+    with open("PFERD/version.py", "w") as f:
+        f.write(text)
+
+
+def update_changelog(lines, version, date):
+    lines = iter(lines)
+    new_lines = []
+
+    # Find "Unreleased" section
+    for line in lines:
+        new_lines.append(line)
+        if line.strip() == "## Unreleased":
+            break
+
+    # Add new heading below that
+    new_lines.append("\n")
+    new_lines.append(f"## {version} - {date}\n")
+
+    # Add remaining lines
+    for line in lines:
+        new_lines.append(line)
+
+    with open("CHANGELOG.md", "w") as f:
+        f.write("".join(new_lines))
+
+
+def commit_changes(version):
+    run(["git", "add", "CHANGELOG.md", "PFERD/version.py"])
+    run(["git", "commit", "-m", f"Bump version to {version}"])
+
+
+def create_tag(version, annotation):
+    run(["git", "tag", "-am", annotation, f"v{version}"])
+
+
+def fastforward_latest():
+    run(["git", "branch", "-f", "latest", "HEAD"])
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("version")
+    args = parser.parse_args()
+
+    version = args.version
+    date = time.strftime("%Y-%m-%d")
+    changelog = load_changelog()
+    changes = extract_changes(changelog)
+    annotation = f"Version {version} - {date}\n\n{''.join(changes)}"
+
+    update_version(version)
+    update_changelog(changelog, version, date)
+    commit_changes(version)
+    create_tag(version, annotation)
+    fastforward_latest()
+
+    print()
+    print("Now the only thing left is to publish the changes:")
+    print(f"  $ git push origin master latest v{version}")
+
+
+if __name__ == "__main__":
+    main()

From 69cb2a77340bf1dc0dbce3967876be51e9917ebc Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 25 May 2021 11:33:45 +0200
Subject: [PATCH 241/524] Add Links option to ilias crawler

This allows you to configure what type the link files should have and
whether to create them at all.
---
 PFERD/crawl/ilias/file_templates.py        | 40 ++++++++++++++++++++--
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 29 +++++++++++++---
 2 files changed, 61 insertions(+), 8 deletions(-)

diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index e9e332e..1385ba4 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -1,5 +1,8 @@
-link_template_plain = "{{link}}"
-link_template_rich = """
+from enum import Enum
+from typing import Optional
+
+_link_template_plain = "{{link}}"
+_link_template_fancy = """
 <!DOCTYPE html>
 <html lang="en">
     <head>
@@ -84,4 +87,35 @@ link_template_rich = """
         </div>
     </body>
 </html>
-"""  # noqa: E501 line too long
+""".strip()  # noqa: E501 line too long
+
+_link_template_internet_shortcut = """
+[InternetShortcut]
+URL={{link}}
+""".strip()
+
+
+class Links(Enum):
+    IGNORE = "ignore"
+    PLAIN = "plain"
+    FANCY = "fancy"
+    INTERNET_SHORTCUT = "internet-shortcut"
+
+    def template(self) -> Optional[str]:
+        if self == self.FANCY:
+            return _link_template_fancy
+        elif self == self.PLAIN:
+            return _link_template_plain
+        elif self == self.INTERNET_SHORTCUT:
+            return _link_template_internet_shortcut
+        elif self == self.IGNORE:
+            return None
+        raise ValueError("Missing switch case")
+
+    @staticmethod
+    def from_string(string: str) -> "Links":
+        try:
+            return Links(string)
+        except ValueError:
+            raise ValueError("must be one of 'ignore', 'plain',"
+                             " 'html', 'internet-shortcut'")
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 0bd3202..283289e 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -14,7 +14,7 @@ from ...output_dir import FileSink, Redownload
 from ...utils import fmt_path, soupify, url_set_query_param
 from ..crawler import CrawlError, CrawlWarning, anoncritical
 from ..http_crawler import HttpCrawler, HttpCrawlerSection
-from .file_templates import link_template_plain, link_template_rich
+from .file_templates import Links
 from .kit_ilias_html import IliasElementType, IliasPage, IliasPageElement, deduplicate_element_names
 
 TargetType = Union[str, int]
@@ -52,8 +52,16 @@ class KitIliasWebCrawlerSection(HttpCrawlerSection):
     def link_file_redirect_delay(self) -> int:
         return self.s.getint("link_file_redirect_delay", fallback=-1)
 
-    def link_file_use_plaintext(self) -> bool:
-        return self.s.getboolean("link_file_plaintext", fallback=False)
+    def links(self) -> Links:
+        type_str: Optional[str] = self.s.get("links")
+
+        if type_str is None:
+            return Links.FANCY
+
+        try:
+            return Links.from_string(type_str)
+        except ValueError as e:
+            self.invalid_value("links", type_str, str(e).capitalize())
 
     def videos(self) -> bool:
         return self.s.getboolean("videos", fallback=False)
@@ -166,7 +174,7 @@ class KitIliasWebCrawler(HttpCrawler):
 
         self._target = section.target()
         self._link_file_redirect_delay = section.link_file_redirect_delay()
-        self._link_file_use_plaintext = section.link_file_use_plaintext()
+        self._links = section.links()
         self._videos = section.videos()
 
     async def _run(self) -> None:
@@ -292,6 +300,17 @@ class KitIliasWebCrawler(HttpCrawler):
             raise CrawlWarning(f"Unknown element type: {element.type!r}")
 
     async def _download_link(self, element: IliasPageElement, element_path: PurePath) -> None:
+        log.explain_topic(f"Decision: Crawl Link {fmt_path(element_path)}")
+        log.explain(f"Links type is {self._links}")
+
+        link_template_maybe = self._links.template()
+        if not link_template_maybe:
+            log.explain("Answer: No")
+            return
+        else:
+            log.explain("Answer: Yes")
+        link_template = link_template_maybe
+
         maybe_dl = await self.download(element_path, mtime=element.mtime)
         if not maybe_dl:
             return
@@ -303,7 +322,7 @@ class KitIliasWebCrawler(HttpCrawler):
                 export_url = element.url.replace("cmd=calldirectlink", "cmd=exportHTML")
                 real_url = await self._resolve_link_target(export_url)
 
-                content = link_template_plain if self._link_file_use_plaintext else link_template_rich
+                content = link_template
                 content = content.replace("{{link}}", real_url)
                 content = content.replace("{{name}}", element.name)
                 content = content.replace("{{description}}", str(element.description))

From ffda4e43df80961990b65bf99c9a96f66f7566b3 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 25 May 2021 11:40:41 +0200
Subject: [PATCH 242/524] Add extension to link files

---
 PFERD/crawl/ilias/file_templates.py        | 11 +++++++++++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py |  4 +++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index 1385ba4..6f2b1cd 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -112,6 +112,17 @@ class Links(Enum):
             return None
         raise ValueError("Missing switch case")
 
+    def extension(self) -> Optional[str]:
+        if self == self.FANCY:
+            return ".html"
+        elif self == self.PLAIN:
+            return ".txt"
+        elif self == self.INTERNET_SHORTCUT:
+            return ".url"
+        elif self == self.IGNORE:
+            return None
+        raise ValueError("Missing switch case")
+
     @staticmethod
     def from_string(string: str) -> "Links":
         try:
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 283289e..318fa5e 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -304,12 +304,14 @@ class KitIliasWebCrawler(HttpCrawler):
         log.explain(f"Links type is {self._links}")
 
         link_template_maybe = self._links.template()
-        if not link_template_maybe:
+        link_extension = self._links.extension()
+        if not link_template_maybe or not link_extension:
             log.explain("Answer: No")
             return
         else:
             log.explain("Answer: Yes")
         link_template = link_template_maybe
+        element_path = element_path.with_name(element_path.name + link_extension)
 
         maybe_dl = await self.download(element_path, mtime=element.mtime)
         if not maybe_dl:

From 4fefb98d719c0395a48c766b5833ed8baef913f8 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 25 May 2021 11:57:59 +0200
Subject: [PATCH 243/524] Add a wrapper to pretty-print ValueErrors in argparse
 parsers

---
 PFERD/cli/parser.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/PFERD/cli/parser.py b/PFERD/cli/parser.py
index 72abb76..f26390c 100644
--- a/PFERD/cli/parser.py
+++ b/PFERD/cli/parser.py
@@ -1,7 +1,8 @@
 import argparse
 import configparser
+from argparse import ArgumentTypeError
 from pathlib import Path
-from typing import Any, List, Optional, Sequence, Union
+from typing import Any, Callable, List, Optional, Sequence, Union
 
 from ..output_dir import OnConflict, Redownload
 from ..version import NAME, VERSION
@@ -57,6 +58,19 @@ class BooleanOptionalAction(argparse.Action):
         return "--[no-]" + self.name[2:]
 
 
+def show_value_error(inner: Callable[[str], Any]) -> Callable[[str], Any]:
+    """
+    Some validation functions (like the from_string in our enums) raise a ValueError.
+    Argparse only pretty-prints ArgumentTypeErrors though, so we need to wrap our ValueErrors.
+    """
+    def wrapper(input: str) -> Any:
+        try:
+            return inner(input)
+        except ValueError as e:
+            raise ArgumentTypeError(e)
+    return wrapper
+
+
 CRAWLER_PARSER = argparse.ArgumentParser(add_help=False)
 CRAWLER_PARSER_GROUP = CRAWLER_PARSER.add_argument_group(
     title="general crawler arguments",
@@ -64,13 +78,13 @@ CRAWLER_PARSER_GROUP = CRAWLER_PARSER.add_argument_group(
 )
 CRAWLER_PARSER_GROUP.add_argument(
     "--redownload",
-    type=Redownload.from_string,
+    type=show_value_error(Redownload.from_string),
     metavar="OPTION",
     help="when to redownload a file that's already present locally"
 )
 CRAWLER_PARSER_GROUP.add_argument(
     "--on-conflict",
-    type=OnConflict.from_string,
+    type=show_value_error(OnConflict.from_string),
     metavar="OPTION",
     help="what to do when local and remote files or directories differ"
 )

From c21ddf225b4da61787c651df39b1bb90e6815b51 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 25 May 2021 11:58:41 +0200
Subject: [PATCH 244/524] Add a CLI option to configure ILIAS links behaviour

---
 PFERD/cli/command_kit_ilias_web.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/PFERD/cli/command_kit_ilias_web.py b/PFERD/cli/command_kit_ilias_web.py
index 89da390..8323c5c 100644
--- a/PFERD/cli/command_kit_ilias_web.py
+++ b/PFERD/cli/command_kit_ilias_web.py
@@ -2,7 +2,8 @@ import argparse
 import configparser
 from pathlib import Path
 
-from .parser import CRAWLER_PARSER, SUBPARSERS, BooleanOptionalAction, load_crawler
+from ..crawl.ilias.file_templates import Links
+from .parser import CRAWLER_PARSER, SUBPARSERS, BooleanOptionalAction, load_crawler, show_value_error
 
 SUBPARSER = SUBPARSERS.add_parser(
     "kit-ilias-web",
@@ -41,17 +42,18 @@ GROUP.add_argument(
     action=BooleanOptionalAction,
     help="use the system keyring to store and retrieve passwords"
 )
+GROUP.add_argument(
+    "--links",
+    type=show_value_error(Links.from_string),
+    metavar="OPTION",
+    help="how to treat external links"
+)
 GROUP.add_argument(
     "--link-file-redirect-delay",
     type=int,
     metavar="SECONDS",
     help="delay before external link files redirect you to their target (-1 to disable)"
 )
-GROUP.add_argument(
-    "--link-file-plaintext",
-    action=BooleanOptionalAction,
-    help="use plain text files for external links"
-)
 GROUP.add_argument(
     "--http-timeout",
     type=float,
@@ -74,8 +76,8 @@ def load(
     section["auth"] = "auth:kit-ilias-web"
     if args.link_file_redirect_delay is not None:
         section["link_file_redirect_delay"] = str(args.link_file_redirect_delay)
-    if args.link_file_plaintext is not None:
-        section["link_file_plaintext"] = str(args.link_file_plaintext)
+    if args.links is not None:
+        section["links"] = str(args.links.value)
     if args.videos is not None:
         section["videos"] = str(False)
     if args.http_timeout is not None:

From bce3dc384d82763f0836c5c236b930fb9d8ce75d Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 25 May 2021 11:58:01 +0200
Subject: [PATCH 245/524] Deduplicate path names in crawler

Also rename files so they follow the restrictions for windows file names if
we're on windows.
---
 CONFIG.md                    |  2 +
 PFERD/crawl/crawler.py       | 18 ++++++++
 PFERD/crawl/local_crawler.py |  2 +-
 PFERD/deduplicator.py        | 79 ++++++++++++++++++++++++++++++++++++
 PFERD/output_dir.py          | 12 +++++-
 PFERD/report.py              |  3 ++
 6 files changed, 114 insertions(+), 2 deletions(-)
 create mode 100644 PFERD/deduplicator.py

diff --git a/CONFIG.md b/CONFIG.md
index 7e8a717..982f4fc 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -83,6 +83,8 @@ crawlers:
 - `delay_between_tasks`: Time (in seconds) that the crawler should wait between
   subsequent tasks. Can be used as a sort of rate limit to avoid unnecessary
   load for the crawl target. (Default: 0.0)
+- `windows_paths`: Whether PFERD should find alternative names for paths that
+  are invalid on Windows. (Default: `yes` on Windows, `no` otherwise)
 
 Some crawlers may also require credentials for authentication. To configure how
 the crawler obtains its credentials, the `auth` option is used. It is set to the
diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index d6d4abc..8bd29ad 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -1,4 +1,5 @@
 import asyncio
+import os
 from abc import ABC, abstractmethod
 from datetime import datetime
 from pathlib import Path, PurePath
@@ -8,6 +9,7 @@ from rich.markup import escape
 
 from ..auth import Authenticator
 from ..config import Config, Section
+from ..deduplicator import Deduplicator
 from ..limiter import Limiter
 from ..logging import ProgressBar, log
 from ..output_dir import FileSink, FileSinkToken, OnConflict, OutputDirectory, OutputDirError, Redownload
@@ -97,6 +99,10 @@ class CrawlToken(ReusableAsyncContextManager[ProgressBar]):
         self._limiter = limiter
         self._path = path
 
+    @property
+    def path(self) -> PurePath:
+        return self._path
+
     async def _on_aenter(self) -> ProgressBar:
         bar_desc = f"[bold bright_cyan]Crawling[/] {escape(fmt_path(self._path))}"
         after_desc = f"[bold cyan]Crawled[/] {escape(fmt_path(self._path))}"
@@ -116,6 +122,10 @@ class DownloadToken(ReusableAsyncContextManager[Tuple[ProgressBar, FileSink]]):
         self._fs_token = fs_token
         self._path = path
 
+    @property
+    def path(self) -> PurePath:
+        return self._path
+
     async def _on_aenter(self) -> Tuple[ProgressBar, FileSink]:
         bar_desc = f"[bold bright_cyan]Downloading[/] {escape(fmt_path(self._path))}"
         # The "Downloaded ..." message is printed in the output dir, not here
@@ -195,6 +205,10 @@ class CrawlerSection(Section):
             self.invalid_value("auth", value, "No such auth section exists")
         return auth
 
+    def windows_paths(self) -> bool:
+        on_windows = os.name == "nt"
+        return self.s.getboolean("windows_paths", fallback=on_windows)
+
 
 class Crawler(ABC):
     def __init__(
@@ -221,12 +235,14 @@ class Crawler(ABC):
             task_delay=section.delay_between_tasks(),
         )
 
+        self._deduplicator = Deduplicator(section.windows_paths())
         self._transformer = Transformer(section.transform())
 
         self._output_dir = OutputDirectory(
             config.default_section.working_dir() / section.output_dir(name),
             section.redownload(),
             section.on_conflict(),
+            section.windows_paths(),
         )
 
     @property
@@ -258,6 +274,7 @@ class Crawler(ABC):
 
     async def crawl(self, path: PurePath) -> Optional[CrawlToken]:
         log.explain_topic(f"Decision: Crawl {fmt_path(path)}")
+        path = self._deduplicator.mark(path)
 
         if self._transformer.transform(path) is None:
             log.explain("Answer: No")
@@ -274,6 +291,7 @@ class Crawler(ABC):
             on_conflict: Optional[OnConflict] = None,
     ) -> Optional[DownloadToken]:
         log.explain_topic(f"Decision: Download {fmt_path(path)}")
+        path = self._deduplicator.mark(path)
 
         transformed_path = self._transformer.transform(path)
         if transformed_path is None:
diff --git a/PFERD/crawl/local_crawler.py b/PFERD/crawl/local_crawler.py
index 7958169..f102bc9 100644
--- a/PFERD/crawl/local_crawler.py
+++ b/PFERD/crawl/local_crawler.py
@@ -80,7 +80,7 @@ class LocalCrawler(Crawler):
             ))
 
             for child in path.iterdir():
-                pure_child = pure / child.name
+                pure_child = cl.path / child.name
                 tasks.append(self._crawl_path(child, pure_child))
 
         await self.gather(tasks)
diff --git a/PFERD/deduplicator.py b/PFERD/deduplicator.py
new file mode 100644
index 0000000..ef62dcb
--- /dev/null
+++ b/PFERD/deduplicator.py
@@ -0,0 +1,79 @@
+from pathlib import PurePath
+from typing import Iterator, Set
+
+from .logging import log
+from .utils import fmt_path
+
+
+def name_variants(path: PurePath) -> Iterator[PurePath]:
+    separator = " " if " " in path.stem else "_"
+    i = 1
+    while True:
+        yield path.parent / f"{path.stem}{separator}{i}{path.suffix}"
+        i += 1
+
+
+class Deduplicator:
+    FORBIDDEN_CHARS = '<>:"/\\|?*'
+    FORBIDDEN_NAMES = {
+        "CON", "PRN", "AUX", "NUL",
+        "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9",
+        "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
+    }
+
+    def __init__(self, windows_paths: bool) -> None:
+        self._windows_paths = windows_paths
+
+        self._known: Set[PurePath] = set()
+
+    def _add(self, path: PurePath) -> None:
+        self._known.add(path)
+
+        # The last parent is just "."
+        for parent in list(path.parents)[:-1]:
+            self._known.add(parent)
+
+    def _fixup_element(self, name: str) -> str:
+        # For historical reasons, windows paths have some odd restrictions that
+        # we're trying to avoid. See:
+        # https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file
+
+        for char in self.FORBIDDEN_CHARS:
+            name = name.replace(char, "_")
+
+        path = PurePath(name)
+        if path.stem in self.FORBIDDEN_NAMES:
+            name = f"{path.stem}_{path.suffix}"
+
+        if name.endswith(" ") or name.endswith("."):
+            name += "_"
+
+        return name
+
+    def _fixup_for_windows(self, path: PurePath) -> PurePath:
+        new_path = PurePath(*[self._fixup_element(elem) for elem in path.parts])
+        if new_path != path:
+            log.explain(f"Changed path to {fmt_path(new_path)} for windows compatibility")
+        return new_path
+
+    def mark(self, path: PurePath) -> PurePath:
+        if self._windows_paths:
+            path = self._fixup_for_windows(path)
+
+        if path not in self._known:
+            self._add(path)
+            return path
+
+        log.explain(f"Path {fmt_path(path)} is already taken, finding a new name")
+
+        for variant in name_variants(path):
+            if variant in self._known:
+                log.explain(f"Path {fmt_path(variant)} is taken as well")
+                continue
+
+            log.explain(f"Found unused path {fmt_path(variant)}")
+            self._add(variant)
+            return variant
+
+        # The "name_variants" iterator returns infinitely many paths
+        raise RuntimeError("Unreachable")
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 17cb772..5f65316 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -142,8 +142,17 @@ class OutputDirectory:
             root: Path,
             redownload: Redownload,
             on_conflict: OnConflict,
+            windows_paths: bool,
     ):
-        self._root = root
+        if windows_paths:
+            # Windows limits the path length to 260 for some historical reason
+            # If you want longer paths, you will have to add the "\\?\" prefix
+            # in front of your path. See:
+            # https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file#maximum-path-length-limitation
+            self._root = Path("\\\\?\\" + str(root))
+        else:
+            self._root = root
+
         self._redownload = redownload
         self._on_conflict = on_conflict
 
@@ -181,6 +190,7 @@ class OutputDirectory:
             raise OutputDirError(f"Forbidden segment '..' in path {fmt_path(path)}")
         if "." in path.parts:
             raise OutputDirError(f"Forbidden segment '.' in path {fmt_path(path)}")
+
         return self._root / path
 
     def _should_download(
diff --git a/PFERD/report.py b/PFERD/report.py
index 619c621..4f15237 100644
--- a/PFERD/report.py
+++ b/PFERD/report.py
@@ -114,6 +114,9 @@ class Report:
             f.write("\n")  # json.dump doesn't do this
 
     def mark_reserved(self, path: PurePath) -> None:
+        if path in self.marked:
+            raise RuntimeError("Trying to reserve an already reserved file")
+
         self.reserved_files.add(path)
 
     def mark(self, path: PurePath) -> None:

From 651b0879320500927a13f732b0bc070afbfa3ac2 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 25 May 2021 12:15:38 +0200
Subject: [PATCH 246/524] Use cl/dl deduplication mechanism for ILIAS crawler

---
 PFERD/crawl/ilias/kit_ilias_html.py        | 52 ----------------------
 PFERD/crawl/ilias/kit_ilias_web_crawler.py |  6 +--
 2 files changed, 2 insertions(+), 56 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 43d66b5..032bb27 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -567,55 +567,3 @@ def _tomorrow() -> date:
 
 def _sanitize_path_name(name: str) -> str:
     return name.replace("/", "-").replace("\\", "-").strip()
-
-
-def deduplicate_element_names(elements: List[IliasPageElement]) -> List[IliasPageElement]:
-    """
-    De-duplicates element names by appending an incrementing number to later elements:
-      test.pdf
-      test.pdf
-    would result in
-      test.pdf
-      test_1.pdf
-
-    It is also space-aware:
-      "te st.pdf"
-      "te st.pdf"
-    would result in
-      "tes st.pdf"
-      "tes st 1.pdf"
-    """
-    known_names = dict()
-    result_elements = []
-
-    for element in elements:
-        # This file is new - add it and mark its name as used
-        if element.name not in known_names:
-            known_names[element.name] = 1
-            result_elements.append(element)
-            continue
-
-        # This file is a duplicate. Find a suitable suffix
-        current_counter = known_names[element.name]
-        adjusted_element = _append_number(element, current_counter)
-        # increment the counter so the next duplicate does not conflict
-        known_names[element.name] += 1
-        # also block the new name, so another file with the *renamed* name gets renamed as well
-        known_names[adjusted_element.name] = 1
-
-        result_elements.append(adjusted_element)
-
-    return result_elements
-
-
-def _append_number(element: IliasPageElement, number: int) -> IliasPageElement:
-    extension_index = element.name.rfind(".")
-    suffix = f" {number}" if " " in element.name else f"_{number}"
-    if extension_index < 0:
-        new_name = element.name + suffix
-    else:
-        new_name = element.name[:extension_index] + suffix + element.name[extension_index:]
-
-    return IliasPageElement(
-        element.type, element.url, new_name, element.mtime, element.description
-    )
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 318fa5e..daafc12 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -15,7 +15,7 @@ from ...utils import fmt_path, soupify, url_set_query_param
 from ..crawler import CrawlError, CrawlWarning, anoncritical
 from ..http_crawler import HttpCrawler, HttpCrawlerSection
 from .file_templates import Links
-from .kit_ilias_html import IliasElementType, IliasPage, IliasPageElement, deduplicate_element_names
+from .kit_ilias_html import IliasElementType, IliasPage, IliasPageElement
 
 TargetType = Union[str, int]
 
@@ -226,7 +226,6 @@ class KitIliasWebCrawler(HttpCrawler):
 
         # Fill up our task list with the found elements
         await gather_elements()
-        elements = deduplicate_element_names(elements)
         tasks = [self._handle_ilias_element(PurePath("."), element) for element in elements]
 
         # And execute them
@@ -253,8 +252,7 @@ class KitIliasWebCrawler(HttpCrawler):
 
         # Fill up our task list with the found elements
         await gather_elements()
-        elements = deduplicate_element_names(elements)
-        tasks = [self._handle_ilias_element(path, element) for element in elements]
+        tasks = [self._handle_ilias_element(cl.path, element) for element in elements]
 
         # And execute them
         await self.gather(tasks)

From 16d50b6626a1727edebdf8b2d309ce4b5ab702e9 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 25 May 2021 13:31:29 +0200
Subject: [PATCH 247/524] Document why /pferd.py exists

---
 pferd.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pferd.py b/pferd.py
index 21857f4..dfea7c2 100644
--- a/pferd.py
+++ b/pferd.py
@@ -1,3 +1,5 @@
+# File used by pyinstaller to create the executable
+
 from PFERD.__main__ import main
 
 if __name__ == "__main__":

From c15a1aecdfd6a12c80e243c5c12588845de3dea0 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 25 May 2021 13:32:00 +0200
Subject: [PATCH 248/524] Rename keyring authenticator file for consistency

---
 PFERD/auth/__init__.py                              | 2 +-
 PFERD/auth/{keyring_authenticator.py => keyring.py} | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename PFERD/auth/{keyring_authenticator.py => keyring.py} (100%)

diff --git a/PFERD/auth/__init__.py b/PFERD/auth/__init__.py
index 81ec31d..04ad587 100644
--- a/PFERD/auth/__init__.py
+++ b/PFERD/auth/__init__.py
@@ -3,7 +3,7 @@ from typing import Callable, Dict
 
 from ..config import Config
 from .authenticator import Authenticator, AuthSection
-from .keyring_authenticator import KeyringAuthenticator, KeyringAuthSection
+from .keyring import KeyringAuthenticator, KeyringAuthSection
 from .simple import SimpleAuthenticator, SimpleAuthSection
 from .tfa import TfaAuthenticator
 
diff --git a/PFERD/auth/keyring_authenticator.py b/PFERD/auth/keyring.py
similarity index 100%
rename from PFERD/auth/keyring_authenticator.py
rename to PFERD/auth/keyring.py

From 22c2259adbf25bfc26bb312f1f91380f1a5461da Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 25 May 2021 14:21:12 +0200
Subject: [PATCH 249/524] Clean up authenticator exceptions

- Renamed to *Error for consistency
- Treating AuthError like CrawlError
---
 PFERD/auth/__init__.py      |  2 +-
 PFERD/auth/authenticator.py | 12 ++++++------
 PFERD/auth/keyring.py       |  4 ++--
 PFERD/auth/simple.py        |  8 ++++----
 PFERD/auth/tfa.py           |  8 ++++----
 PFERD/pferd.py              |  4 ++--
 6 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/PFERD/auth/__init__.py b/PFERD/auth/__init__.py
index 04ad587..af38859 100644
--- a/PFERD/auth/__init__.py
+++ b/PFERD/auth/__init__.py
@@ -2,7 +2,7 @@ from configparser import SectionProxy
 from typing import Callable, Dict
 
 from ..config import Config
-from .authenticator import Authenticator, AuthSection
+from .authenticator import Authenticator, AuthError, AuthSection  # noqa: F401
 from .keyring import KeyringAuthenticator, KeyringAuthSection
 from .simple import SimpleAuthenticator, SimpleAuthSection
 from .tfa import TfaAuthenticator
diff --git a/PFERD/auth/authenticator.py b/PFERD/auth/authenticator.py
index 9217dcd..5f09f92 100644
--- a/PFERD/auth/authenticator.py
+++ b/PFERD/auth/authenticator.py
@@ -4,11 +4,11 @@ from typing import Tuple
 from ..config import Config, Section
 
 
-class AuthLoadException(Exception):
+class AuthLoadError(Exception):
     pass
 
 
-class AuthException(Exception):
+class AuthError(Exception):
     pass
 
 
@@ -30,7 +30,7 @@ class Authenticator(ABC):
         If you are writing your own constructor for your own authenticator,
         make sure to call this constructor first (via super().__init__).
 
-        May throw an AuthLoadException.
+        May throw an AuthLoadError.
         """
 
         self.name = name
@@ -56,7 +56,7 @@ class Authenticator(ABC):
         (e. g. prompting the user).
         """
 
-        raise AuthException("Invalid credentials")
+        raise AuthError("Invalid credentials")
 
     def invalidate_username(self) -> None:
         """
@@ -67,7 +67,7 @@ class Authenticator(ABC):
         (e. g. prompting the user).
         """
 
-        raise AuthException("Invalid username")
+        raise AuthError("Invalid username")
 
     def invalidate_password(self) -> None:
         """
@@ -78,4 +78,4 @@ class Authenticator(ABC):
         (e. g. prompting the user).
         """
 
-        raise AuthException("Invalid password")
+        raise AuthError("Invalid password")
diff --git a/PFERD/auth/keyring.py b/PFERD/auth/keyring.py
index 413c7ad..326f629 100644
--- a/PFERD/auth/keyring.py
+++ b/PFERD/auth/keyring.py
@@ -6,7 +6,7 @@ from ..config import Config
 from ..logging import log
 from ..utils import agetpass
 from ..version import NAME
-from .authenticator import Authenticator, AuthException, AuthSection
+from .authenticator import Authenticator, AuthError, AuthSection
 
 
 class KeyringAuthSection(AuthSection):
@@ -53,4 +53,4 @@ class KeyringAuthenticator(Authenticator):
         self.invalidate_password()
 
     def invalidate_password(self) -> None:
-        raise AuthException("Invalid password")
+        raise AuthError("Invalid password")
diff --git a/PFERD/auth/simple.py b/PFERD/auth/simple.py
index a12c359..7fbb60b 100644
--- a/PFERD/auth/simple.py
+++ b/PFERD/auth/simple.py
@@ -3,7 +3,7 @@ from typing import Optional, Tuple
 from ..config import Config
 from ..logging import log
 from ..utils import agetpass, ainput
-from .authenticator import Authenticator, AuthException, AuthSection
+from .authenticator import Authenticator, AuthError, AuthSection
 
 
 class SimpleAuthSection(AuthSection):
@@ -48,7 +48,7 @@ class SimpleAuthenticator(Authenticator):
 
     def invalidate_credentials(self) -> None:
         if self._username_fixed and self._password_fixed:
-            raise AuthException("Configured credentials are invalid")
+            raise AuthError("Configured credentials are invalid")
 
         if not self._username_fixed:
             self._username = None
@@ -57,12 +57,12 @@ class SimpleAuthenticator(Authenticator):
 
     def invalidate_username(self) -> None:
         if self._username_fixed:
-            raise AuthException("Configured username is invalid")
+            raise AuthError("Configured username is invalid")
         else:
             self._username = None
 
     def invalidate_password(self) -> None:
         if self._password_fixed:
-            raise AuthException("Configured password is invalid")
+            raise AuthError("Configured password is invalid")
         else:
             self._password = None
diff --git a/PFERD/auth/tfa.py b/PFERD/auth/tfa.py
index 670626d..3efabe1 100644
--- a/PFERD/auth/tfa.py
+++ b/PFERD/auth/tfa.py
@@ -3,7 +3,7 @@ from typing import Tuple
 from ..config import Config
 from ..logging import log
 from ..utils import ainput
-from .authenticator import Authenticator, AuthException, AuthSection
+from .authenticator import Authenticator, AuthError, AuthSection
 
 
 class TfaAuthenticator(Authenticator):
@@ -16,7 +16,7 @@ class TfaAuthenticator(Authenticator):
         super().__init__(name, section, config)
 
     async def username(self) -> str:
-        raise AuthException("TFA authenticator does not support usernames")
+        raise AuthError("TFA authenticator does not support usernames")
 
     async def password(self) -> str:
         async with log.exclusive_output():
@@ -24,10 +24,10 @@ class TfaAuthenticator(Authenticator):
             return code
 
     async def credentials(self) -> Tuple[str, str]:
-        raise AuthException("TFA authenticator does not support usernames")
+        raise AuthError("TFA authenticator does not support usernames")
 
     def invalidate_username(self) -> None:
-        raise AuthException("TFA authenticator does not support usernames")
+        raise AuthError("TFA authenticator does not support usernames")
 
     def invalidate_password(self) -> None:
         pass
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index bed7c66..dbb8983 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -3,7 +3,7 @@ from typing import Dict, List, Optional
 
 from rich.markup import escape
 
-from .auth import AUTHENTICATORS, Authenticator
+from .auth import AUTHENTICATORS, Authenticator, AuthError
 from .config import Config, ConfigOptionError
 from .crawl import CRAWLERS, Crawler, CrawlError, KitIliasWebCrawler
 from .logging import log
@@ -117,7 +117,7 @@ class Pferd:
 
             try:
                 await crawler.run()
-            except CrawlError as e:
+            except (CrawlError, AuthError) as e:
                 log.error(str(e))
             except Exception:
                 log.unexpected_exception()

From eb8b91581386f4d9dbd14b685c875f3376b29162 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 25 May 2021 14:21:52 +0200
Subject: [PATCH 250/524] Fix path prefix on windows

Previously, the path prefix was only set if "windows_paths" was true, regardless
of OS. Now the path prefix is always set on windows and never set on other OSes.
---
 PFERD/crawl/crawler.py | 1 -
 PFERD/output_dir.py    | 5 ++---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index 8bd29ad..420d088 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -242,7 +242,6 @@ class Crawler(ABC):
             config.default_section.working_dir() / section.output_dir(name),
             section.redownload(),
             section.on_conflict(),
-            section.windows_paths(),
         )
 
     @property
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 5f65316..fa7babe 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -142,10 +142,9 @@ class OutputDirectory:
             root: Path,
             redownload: Redownload,
             on_conflict: OnConflict,
-            windows_paths: bool,
     ):
-        if windows_paths:
-            # Windows limits the path length to 260 for some historical reason
+        if os.name == "nt":
+            # Windows limits the path length to 260 for some historical reason.
             # If you want longer paths, you will have to add the "\\?\" prefix
             # in front of your path. See:
             # https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file#maximum-path-length-limitation

From 61430c8739b5789596a7b8da085eca3b37b3ec83 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 25 May 2021 14:12:19 +0200
Subject: [PATCH 251/524] Overhaul config and CLI option names

---
 CONFIG.md                                  | 138 ++++++++++++---------
 PFERD/cli/command_kit_ilias_web.py         |  53 ++++----
 PFERD/cli/parser.py                        |  35 ++++--
 PFERD/crawl/crawler.py                     |  40 +++---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py |  17 ++-
 5 files changed, 154 insertions(+), 129 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index 982f4fc..2f2dbbe 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -1,10 +1,11 @@
 # Config file format
 
 A config file consists of sections. A section begins with a `[section]` header,
-which is followed by a list of `key = value` or `key: value` pairs. Comments
-must be on their own line and start with `#` or `;`. Multiline values must be
-indented beyond their key. For more details and some examples on the format, see
-the [configparser documentation][1] ([basic interpolation][2] is enabled).
+which is followed by a list of `key = value` pairs. Comments must be on their
+own line and start with `#`. Multiline values must be indented beyond their key.
+Boolean values can be `yes` or `no`. For more details and some examples on the
+format, see the [configparser documentation][1] ([basic interpolation][2] is
+enabled).
 
 [1]: <https://docs.python.org/3/library/configparser.html#supported-ini-file-structure> "Supported INI File Structure"
 [2]: <https://docs.python.org/3/library/configparser.html#configparser.BasicInterpolation> "BasicInterpolation"
@@ -15,21 +16,19 @@ This section contains global configuration values. It can also be used to set
 default values for the other sections.
 
 - `working_dir`: The directory PFERD operates in. Set to an absolute path to
-  make PFERD operate the same regardless of where it is executed. All other
+  make PFERD operate the same regardless of where it is executed from. All other
   paths in the config file are interpreted relative to this path. If this path
   is relative, it is interpreted relative to the script's working dir. `~` is
   expanded to the current user's home directory. (Default: `.`)
 - `explain`: Whether PFERD should log and explain its actions and decisions in
   detail. (Default: `no`)
-- `status`: Whether PFERD should print status updates while crawling. (Default:
-   `yes`)
+- `status`: Whether PFERD should print status updates (like `Crawled ...`,
+  `Added ...`) while running a crawler. (Default: `yes`)
 - `report`: Whether PFERD should print a report of added, changed and deleted
    local files for all crawlers before exiting. (Default: `yes`)
-- `share_cookies`: Whether crawlers should share cookies where applicable. By
-  default, crawlers are isolated and don't interact with each other. This
-  includes their cookies. However, in situations where multiple crawlers crawl
-  the same website using the same account, sharing cookies between crawlers can
-  make sense. (Default: `yes`)
+- `share_cookies`: Whether crawlers should share cookies where applicable. For
+  example, some crawlers share cookies if they crawl the same website using the
+  same account. (Default: `yes`)
 
 ## The `crawl:*` sections
 
@@ -42,17 +41,17 @@ courses or lecture websites.
 
 Each crawl section represents an instance of a specific type of crawler. The
 `type` option is used to specify the crawler type. The crawler's name is usually
-used as the name for the output directory. New crawlers can be created simply by
-adding a new crawl section to the config file.
+used as the output directory. New crawlers can be created simply by adding a new
+crawl section to the config file.
 
 Depending on a crawler's type, it may have different options. For more details,
-see the type's documentation below. The following options are common to all
-crawlers:
+see the type's [documentation](#crawler-types) below. The following options are
+common to all crawlers:
 
-- `type`: The types are specified in [this section](#crawler-types).
+- `type`: The available types are specified in [this section](#crawler-types).
 - `output_dir`: The directory the crawler synchronizes files to. A crawler will
   never place any files outside of this directory. (Default: the crawler's name)
-- `redownload`: When to download again a file that is already present locally.
+- `redownload`: When to download a file that is already present locally.
   (Default: `never-smart`)
     - `never`: If a file is present locally, it is not downloaded again.
     - `never-smart`: Like `never`, but PFERD tries to detect if an already
@@ -62,8 +61,8 @@ crawlers:
     - `always-smart`: Like `always`, but PFERD tries to avoid unnecessary
       downloads via some (unreliable) heuristics.
 - `on_conflict`: What to do when the local and remote versions of a file or
-  directory differ. Includes the cases where a file is replaced by a directory
-  or a directory by a file. (Default: `prompt`)
+  directory differ, including when a file is replaced by a directory or a
+  directory by a file. (Default: `prompt`)
     - `prompt`: Always ask the user before overwriting or deleting local files
       and directories.
     - `local-first`: Always keep the local file or directory. Equivalent to
@@ -75,14 +74,13 @@ crawlers:
       remote file is different.
 - `transform`: Rules for renaming and excluding certain files and directories.
   For more details, see [this section](#transformation-rules). (Default: empty)
-- `max_concurrent_tasks`: The maximum number of concurrent tasks (such as
-  crawling or downloading). (Default: 1)
-- `max_concurrent_downloads`: How many of those tasks can be download tasks at
-  the same time. Must not be greater than `max_concurrent_tasks`. When not set,
-  this is the same as `max_concurrent_tasks`. (Optional)
-- `delay_between_tasks`: Time (in seconds) that the crawler should wait between
+- `tasks`: The maximum number of concurrent tasks (such as crawling or
+  downloading). (Default: `1`)
+- `downloads`: How many of those tasks can be download tasks at the same time.
+  Must not be greater than `tasks`. (Default: Same as `tasks`)
+- `task_delay`: Time (in seconds) that the crawler should wait between
   subsequent tasks. Can be used as a sort of rate limit to avoid unnecessary
-  load for the crawl target. (Default: 0.0)
+  load for the crawl target. (Default: `0.0`)
 - `windows_paths`: Whether PFERD should find alternative names for paths that
   are invalid on Windows. (Default: `yes` on Windows, `no` otherwise)
 
@@ -101,6 +99,8 @@ password = bar
 [crawl:something]
 type = some-complex-crawler
 auth = auth:example
+on_conflict = no-delete
+tasks = 3
 ```
 
 ## The `auth:*` sections
@@ -109,12 +109,12 @@ Sections whose names start with `auth:` are used to configure authenticators. An
 authenticator provides a username and a password to one or more crawlers.
 
 Authenticators work similar to crawlers: A section represents an authenticator
-instance, whose name is the rest of the section name. The type is specified by
+instance whose name is the rest of the section name. The type is specified by
 the `type` option.
 
 Depending on an authenticator's type, it may have different options. For more
-details, see the type's documentation below. The only option common to all
-authenticators is `type`:
+details, see the type's [documentation](#authenticator-types) below. The only
+option common to all authenticators is `type`:
 
 - `type`: The types are specified in [this section](#authenticator-types).
 
@@ -127,28 +127,47 @@ testing different setups. The various delay options are meant to make the
 crawler simulate a slower, network-based crawler.
 
 - `target`: Path to the local directory to crawl. (Required)
-- `crawl_delay`: Maximum artificial delay (in seconds) to simulate for crawl
-  requests. (Default: 0.0)
-- `download_delay`: Maximum artificial delay (in seconds) to simulate for
-  download requests. (Default: 0.0)
+- `crawl_delay`: Artificial delay (in seconds) to simulate for crawl requests.
+  (Default: `0.0`)
+- `download_delay`: Artificial delay (in seconds) to simulate for download
+  requests. (Default: `0.0`)
 - `download_speed`: Download speed (in bytes per second) to simulate. (Optional)
 
-### The `kit-ilias` crawler
+### The `kit-ilias-web` crawler
 
-This crawler crawls the KIT ILIAS instance. It performs remote calls to a poor SCC-Server, so you should be nice and use reasonable delays and concurrent requests.
-- `target`: The ILIAS element to crawl. Can be:
-  - `desktop` if you want to crawl your personal desktop
-  - `<course id>` if you want to crawl the course with the given id
-  - `<url>` if you want to crawl a given element by URL (preferably the permanent URL linked at the bottom of an ILIAS page)
-- `tfa_auth`: Like `auth` but only used for two-factor authentication
-- `link_file_redirect_delay`: PFERD will create local HTML for external links. 
-   If this property is set to a non-negative value it configures the amount of seconds after which the local HTML
-   file will redirect you to the link target.
-- `link_file_plain_text`: If this is set to true, PFERD will generate plain-text files containing only the link
-   target for external links. If this is false or not specified, PFERD will generate a neat, pretty and functional 
-   HTML page instead.
-- `videos`: If this is set to false, PFERD will not crawl or download any videos.
-- `http_timeout`: The timeout for http requests
+This crawler crawls the KIT ILIAS instance.
+
+ILIAS is not great at handling too many concurrent requests. To avoid
+unnecessary load, please limit `tasks` to `1`.
+
+There is a spike in ILIAS usage at the beginning of lectures, so please don't
+run PFERD during those times.
+
+If you're automatically running PFERD periodically (e. g. via cron or a systemd
+timer), please randomize the start time or at least don't use the full hour. For
+systemd timers, this can be accomplished using the `RandomizedDelaySec` option.
+Also, please schedule the script to run in periods of low activity. Running the
+script once per day should be fine.
+
+- `target`: The ILIAS element to crawl. (Required)
+    - `desktop`: Crawl your personal desktop
+    - `<course id>`: Crawl the course with the given id
+    - `<url>`: Crawl a given element by URL (preferably the permanent URL linked
+      at the bottom of its ILIAS page)
+- `auth`: Name of auth section to use for login. (Required)
+- `tfa_auth`: Name of auth section to use for two-factor authentication. Only
+  uses the auth section's password. (Default: Anonymous `tfa` authenticator)
+- `links`: How to represent external links. (Default: `fancy`)
+    - `ignore`: Don't download links.
+    - `plaintext`: A text file containing only the URL.
+    - `fancy`: A HTML file looking like the ILIAS link element.
+    - `internet-shortcut`: An internet shortcut file (`.url` file).
+- `link_redirect_delay`: Time (in seconds) until `fancy` link files will
+  redirect to the actual URL. Set to a negative value to disable the automatic
+  redirect. (Default: `-1`)
+- `videos`: Whether to download videos. (Default: `no`)
+- `http_timeout`: The timeout (in seconds) for all HTTP requests. (Default:
+  `20.0`)
 
 ## Authenticator types
 
@@ -161,21 +180,24 @@ via the terminal.
 - `username`: The username. (Optional)
 - `password`: The password. (Optional)
 
+### The `keyring` authenticator
+
+This authenticator uses the system keyring to store passwords. The username can
+be set directly in the config file. If the username is not specified, the user
+is prompted via the terminal. If the keyring contains no entry or the entry is
+incorrect, the user is prompted for a password via the terminal and the password
+is stored in the keyring.
+
+- `username`: The username. (Optional)
+- `keyring_name`: The service name PFERD uses for storing credentials. (Default:
+  `PFERD`)
+
 ### The `tfa` authenticator
 
 This authenticator prompts the user on the console for a two-factor
 authentication token. The token is provided as password and it is not cached.
 This authenticator does not support usernames.
 
-### The `keyring` authenticator
-
-This authenticator uses the system keyring to store passwords. It expects a 
-username in the config and will prompt *once* for the password. After that it
-receives the password from the system keyring.
-
-- `username`: The username. (Required)
-- `keyring_name`: The service name PFERD uses for storing credentials. (Optional)
-
 ## Transformation rules
 
 Transformation rules are rules for renaming and excluding files and directories.
diff --git a/PFERD/cli/command_kit_ilias_web.py b/PFERD/cli/command_kit_ilias_web.py
index 8323c5c..ccb7134 100644
--- a/PFERD/cli/command_kit_ilias_web.py
+++ b/PFERD/cli/command_kit_ilias_web.py
@@ -11,14 +11,14 @@ SUBPARSER = SUBPARSERS.add_parser(
 )
 
 GROUP = SUBPARSER.add_argument_group(
-    title="KIT ILIAS web-crawler arguments",
+    title="kit-ilias-web crawler arguments",
     description="arguments for the 'kit-ilias-web' crawler",
 )
 GROUP.add_argument(
     "target",
     type=str,
     metavar="TARGET",
-    help="course id, 'desktop', or ILIAS https-URL to crawl"
+    help="course id, 'desktop', or ILIAS URL to crawl"
 )
 GROUP.add_argument(
     "output",
@@ -27,14 +27,9 @@ GROUP.add_argument(
     help="output directory"
 )
 GROUP.add_argument(
-    "--videos",
-    action=BooleanOptionalAction,
-    help="crawl and download videos"
-)
-GROUP.add_argument(
-    "--username",
+    "--username", "-u",
     type=str,
-    metavar="USER_NAME",
+    metavar="USERNAME",
     help="user name for authentication"
 )
 GROUP.add_argument(
@@ -46,19 +41,24 @@ GROUP.add_argument(
     "--links",
     type=show_value_error(Links.from_string),
     metavar="OPTION",
-    help="how to treat external links"
+    help="how to represent external links"
 )
 GROUP.add_argument(
-    "--link-file-redirect-delay",
+    "--link-redirect-delay",
     type=int,
     metavar="SECONDS",
-    help="delay before external link files redirect you to their target (-1 to disable)"
+    help="time before 'fancy' links redirect to to their target (-1 to disable)"
 )
 GROUP.add_argument(
-    "--http-timeout",
+    "--videos",
+    action=BooleanOptionalAction,
+    help="crawl and download videos"
+)
+GROUP.add_argument(
+    "--http-timeout", "-t",
     type=float,
     metavar="SECONDS",
-    help="the timeout to use for HTTP requests"
+    help="timeout for all HTTP requests"
 )
 
 
@@ -66,33 +66,30 @@ def load(
         args: argparse.Namespace,
         parser: configparser.ConfigParser,
 ) -> None:
-    parser["crawl:kit-ilias-web"] = {}
-    section = parser["crawl:kit-ilias-web"]
+    parser["crawl:ilias"] = {}
+    section = parser["crawl:ilias"]
     load_crawler(args, section)
 
     section["type"] = "kit-ilias-web"
     section["target"] = str(args.target)
     section["output_dir"] = str(args.output)
-    section["auth"] = "auth:kit-ilias-web"
-    if args.link_file_redirect_delay is not None:
-        section["link_file_redirect_delay"] = str(args.link_file_redirect_delay)
+    section["auth"] = "auth:ilias"
     if args.links is not None:
         section["links"] = str(args.links.value)
+    if args.link_redirect_delay is not None:
+        section["link_redirect_delay"] = str(args.link_redirect_delay)
     if args.videos is not None:
-        section["videos"] = str(False)
+        section["videos"] = "yes" if args.videos else "no"
     if args.http_timeout is not None:
         section["http_timeout"] = str(args.http_timeout)
 
-    parser["auth:kit-ilias-web"] = {}
-    auth_section = parser["auth:kit-ilias-web"]
-
+    parser["auth:ilias"] = {}
+    auth_section = parser["auth:ilias"]
+    auth_section["type"] = "simple"
+    if args.username is not None:
+        auth_section["username"] = args.username
     if args.keyring:
         auth_section["type"] = "keyring"
-    else:
-        auth_section["type"] = "simple"
-
-    if args.username is not None:
-        auth_section["username"] = str(args.username)
 
 
 SUBPARSER.set_defaults(command=load)
diff --git a/PFERD/cli/parser.py b/PFERD/cli/parser.py
index f26390c..4e3b425 100644
--- a/PFERD/cli/parser.py
+++ b/PFERD/cli/parser.py
@@ -77,10 +77,10 @@ CRAWLER_PARSER_GROUP = CRAWLER_PARSER.add_argument_group(
     description="arguments common to all crawlers",
 )
 CRAWLER_PARSER_GROUP.add_argument(
-    "--redownload",
+    "--redownload", "-r",
     type=show_value_error(Redownload.from_string),
     metavar="OPTION",
-    help="when to redownload a file that's already present locally"
+    help="when to download a file that's already present locally"
 )
 CRAWLER_PARSER_GROUP.add_argument(
     "--on-conflict",
@@ -89,30 +89,35 @@ CRAWLER_PARSER_GROUP.add_argument(
     help="what to do when local and remote files or directories differ"
 )
 CRAWLER_PARSER_GROUP.add_argument(
-    "--transform", "-t",
+    "--transform", "-T",
     action="append",
     type=str,
     metavar="RULE",
     help="add a single transformation rule. Can be specified multiple times"
 )
 CRAWLER_PARSER_GROUP.add_argument(
-    "--max-concurrent-tasks",
+    "--tasks", "-n",
     type=int,
     metavar="N",
     help="maximum number of concurrent tasks (crawling, downloading)"
 )
 CRAWLER_PARSER_GROUP.add_argument(
-    "--max-concurrent-downloads",
+    "--downloads", "-N",
     type=int,
     metavar="N",
     help="maximum number of tasks that may download data at the same time"
 )
 CRAWLER_PARSER_GROUP.add_argument(
-    "--delay-between-tasks",
+    "--task-delay", "-d",
     type=float,
     metavar="SECONDS",
     help="time the crawler should wait between subsequent tasks"
 )
+CRAWLER_PARSER_GROUP.add_argument(
+    "--windows-paths",
+    action=BooleanOptionalAction,
+    help="whether to repair invalid paths on windows"
+)
 
 
 def load_crawler(
@@ -125,12 +130,14 @@ def load_crawler(
         section["on_conflict"] = args.on_conflict.value
     if args.transform is not None:
         section["transform"] = "\n" + "\n".join(args.transform)
-    if args.max_concurrent_tasks is not None:
-        section["max_concurrent_tasks"] = str(args.max_concurrent_tasks)
-    if args.max_concurrent_downloads is not None:
-        section["max_concurrent_downloads"] = str(args.max_concurrent_downloads)
-    if args.delay_between_tasks is not None:
-        section["delay_between_tasks"] = str(args.delay_between_tasks)
+    if args.tasks is not None:
+        section["tasks"] = str(args.tasks)
+    if args.downloads is not None:
+        section["downloads"] = str(args.downloads)
+    if args.task_delay is not None:
+        section["task_delay"] = str(args.task_delay)
+    if args.windows_paths is not None:
+        section["windows_paths"] = "yes" if args.windows_paths else "no"
 
 
 PARSER = argparse.ArgumentParser()
@@ -200,6 +207,10 @@ def load_default_section(
         section["working_dir"] = str(args.working_dir)
     if args.explain is not None:
         section["explain"] = "yes" if args.explain else "no"
+    if args.status is not None:
+        section["status"] = "yes" if args.status else "no"
+    if args.report is not None:
+        section["report"] = "yes" if args.report else "no"
     if args.share_cookies is not None:
         section["share_cookies"] = "yes" if args.share_cookies else "no"
 
diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index 420d088..321daa2 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -169,33 +169,33 @@ class CrawlerSection(Section):
     def transform(self) -> str:
         return self.s.get("transform", "")
 
-    def max_concurrent_tasks(self) -> int:
-        value = self.s.getint("max_concurrent_tasks", fallback=1)
+    def tasks(self) -> int:
+        value = self.s.getint("tasks", fallback=1)
         if value <= 0:
-            self.invalid_value("max_concurrent_tasks", value,
-                               "Must be greater than 0")
+            self.invalid_value("tasks", value, "Must be greater than 0")
         return value
 
-    def max_concurrent_downloads(self) -> int:
-        tasks = self.max_concurrent_tasks()
-        value = self.s.getint("max_concurrent_downloads", fallback=None)
+    def downloads(self) -> int:
+        tasks = self.tasks()
+        value = self.s.getint("downloads", fallback=None)
         if value is None:
             return tasks
         if value <= 0:
-            self.invalid_value("max_concurrent_downloads", value,
-                               "Must be greater than 0")
+            self.invalid_value("downloads", value, "Must be greater than 0")
         if value > tasks:
-            self.invalid_value("max_concurrent_downloads", value,
-                               "Must not be greater than max_concurrent_tasks")
+            self.invalid_value("downloads", value, "Must not be greater than tasks")
         return value
 
-    def delay_between_tasks(self) -> float:
-        value = self.s.getfloat("delay_between_tasks", fallback=0.0)
+    def task_delay(self) -> float:
+        value = self.s.getfloat("task_delay", fallback=0.0)
         if value < 0:
-            self.invalid_value("delay_between_tasks", value,
-                               "Must not be negative")
+            self.invalid_value("task_delay", value, "Must not be negative")
         return value
 
+    def windows_paths(self) -> bool:
+        on_windows = os.name == "nt"
+        return self.s.getboolean("windows_paths", fallback=on_windows)
+
     def auth(self, authenticators: Dict[str, Authenticator]) -> Authenticator:
         value = self.s.get("auth")
         if value is None:
@@ -205,10 +205,6 @@ class CrawlerSection(Section):
             self.invalid_value("auth", value, "No such auth section exists")
         return auth
 
-    def windows_paths(self) -> bool:
-        on_windows = os.name == "nt"
-        return self.s.getboolean("windows_paths", fallback=on_windows)
-
 
 class Crawler(ABC):
     def __init__(
@@ -230,9 +226,9 @@ class Crawler(ABC):
         self.error_free = True
 
         self._limiter = Limiter(
-            task_limit=section.max_concurrent_tasks(),
-            download_limit=section.max_concurrent_downloads(),
-            task_delay=section.delay_between_tasks(),
+            task_limit=section.tasks(),
+            download_limit=section.downloads(),
+            task_delay=section.task_delay(),
         )
 
         self._deduplicator = Deduplicator(section.windows_paths())
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index daafc12..40db52c 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -40,18 +40,14 @@ class KitIliasWebCrawlerSection(HttpCrawlerSection):
         self.invalid_value("target", target, "Should be <course id | desktop | kit ilias URL>")
 
     def tfa_auth(self, authenticators: Dict[str, Authenticator]) -> Optional[Authenticator]:
-        value = self.s.get("tfa_auth")
-        if not value:
+        value: Optional[str] = self.s.get("tfa_auth")
+        if value is None:
             return None
-
-        auth = authenticators.get(f"auth:{value}")
+        auth = authenticators.get(value)
         if auth is None:
-            self.invalid_value("auth", value, "No such auth section exists")
+            self.invalid_value("tfa_auth", value, "No such auth section exists")
         return auth
 
-    def link_file_redirect_delay(self) -> int:
-        return self.s.getint("link_file_redirect_delay", fallback=-1)
-
     def links(self) -> Links:
         type_str: Optional[str] = self.s.get("links")
 
@@ -63,6 +59,9 @@ class KitIliasWebCrawlerSection(HttpCrawlerSection):
         except ValueError as e:
             self.invalid_value("links", type_str, str(e).capitalize())
 
+    def link_redirect_delay(self) -> int:
+        return self.s.getint("link_redirect_delay", fallback=-1)
+
     def videos(self) -> bool:
         return self.s.getboolean("videos", fallback=False)
 
@@ -173,7 +172,7 @@ class KitIliasWebCrawler(HttpCrawler):
         self._base_url = "https://ilias.studium.kit.edu"
 
         self._target = section.target()
-        self._link_file_redirect_delay = section.link_file_redirect_delay()
+        self._link_file_redirect_delay = section.link_redirect_delay()
         self._links = section.links()
         self._videos = section.videos()
 

From d905e95dbb6368678c37c09a187a4e571a818c4c Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 25 May 2021 15:02:35 +0200
Subject: [PATCH 252/524] Allow invalidation of keyring authenticator

---
 PFERD/auth/keyring.py | 49 ++++++++++++++++++++++++++++---------------
 1 file changed, 32 insertions(+), 17 deletions(-)

diff --git a/PFERD/auth/keyring.py b/PFERD/auth/keyring.py
index 326f629..b63bf90 100644
--- a/PFERD/auth/keyring.py
+++ b/PFERD/auth/keyring.py
@@ -4,17 +4,14 @@ import keyring
 
 from ..config import Config
 from ..logging import log
-from ..utils import agetpass
+from ..utils import agetpass, ainput
 from ..version import NAME
 from .authenticator import Authenticator, AuthError, AuthSection
 
 
 class KeyringAuthSection(AuthSection):
-    def username(self) -> str:
-        name = self.s.get("username")
-        if name is None:
-            self.missing_value("username")
-        return name
+    def username(self) -> Optional[str]:
+        return self.s.get("username")
 
     def keyring_name(self) -> str:
         return self.s.get("keyring_name", fallback=NAME)
@@ -34,23 +31,41 @@ class KeyringAuthenticator(Authenticator):
         self._password: Optional[str] = None
         self._keyring_name = section.keyring_name()
 
+        self._password_invalidated = False
+        self._username_fixed = section.username() is not None
+
     async def credentials(self) -> Tuple[str, str]:
-        if self._password is not None:
-            return self._username, self._password
-
-        password = keyring.get_password(self._keyring_name, self._username)
-
-        if not password:
+        # Request the username
+        if self._username is None:
             async with log.exclusive_output():
-                password = await agetpass("Password: ")
-                keyring.set_password(self._keyring_name, self._username, password)
+                self._username = await ainput("Username: ")
 
-        self._password = password
+        # First try looking it up in the keyring.
+        # Do not look it up if it was invalidated - we want to re-prompt in this case
+        if self._password is None and not self._password_invalidated:
+            self._password = keyring.get_password(self._keyring_name, self._username)
 
-        return self._username, password
+        # If that fails it wasn't saved in the keyring - we need to
+        # read it from the user and store it
+        if self._password is None:
+            async with log.exclusive_output():
+                self._password = await agetpass("Password: ")
+                keyring.set_password(self._keyring_name, self._username, self._password)
+
+        self._password_invalidated = False
+        return self._username, self._password
 
     def invalidate_credentials(self) -> None:
+        if not self._username_fixed:
+            self.invalidate_username()
         self.invalidate_password()
 
+    def invalidate_username(self) -> None:
+        if self._username_fixed:
+            raise AuthError("Configured username is invalid")
+        else:
+            self._username = None
+
     def invalidate_password(self) -> None:
-        raise AuthError("Invalid password")
+        self._password = None
+        self._password_invalidated = True

From 0096a0c07779d9ccd054d0dd1b98f045d2e6c13d Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 25 May 2021 15:11:33 +0200
Subject: [PATCH 253/524] Remove section and config parameter from
 Authenticator

---
 PFERD/auth/__init__.py      | 6 +++---
 PFERD/auth/authenticator.py | 6 ++----
 PFERD/auth/keyring.py       | 4 +---
 PFERD/auth/simple.py        | 4 +---
 PFERD/auth/tfa.py           | 7 ++-----
 5 files changed, 9 insertions(+), 18 deletions(-)

diff --git a/PFERD/auth/__init__.py b/PFERD/auth/__init__.py
index af38859..6e7fd3a 100644
--- a/PFERD/auth/__init__.py
+++ b/PFERD/auth/__init__.py
@@ -15,9 +15,9 @@ AuthConstructor = Callable[[
 
 AUTHENTICATORS: Dict[str, AuthConstructor] = {
     "simple": lambda n, s, c:
-        SimpleAuthenticator(n, SimpleAuthSection(s), c),
+        SimpleAuthenticator(n, SimpleAuthSection(s)),
     "tfa": lambda n, s, c:
-        TfaAuthenticator(n, AuthSection(s), c),
+        TfaAuthenticator(n),
     "keyring": lambda n, s, c:
-        KeyringAuthenticator(n, KeyringAuthSection(s), c)
+        KeyringAuthenticator(n, KeyringAuthSection(s))
 }
diff --git a/PFERD/auth/authenticator.py b/PFERD/auth/authenticator.py
index 5f09f92..fe14909 100644
--- a/PFERD/auth/authenticator.py
+++ b/PFERD/auth/authenticator.py
@@ -1,7 +1,7 @@
 from abc import ABC, abstractmethod
 from typing import Tuple
 
-from ..config import Config, Section
+from ..config import Section
 
 
 class AuthLoadError(Exception):
@@ -19,9 +19,7 @@ class AuthSection(Section):
 class Authenticator(ABC):
     def __init__(
             self,
-            name: str,
-            section: AuthSection,
-            config: Config,
+            name: str
     ) -> None:
         """
         Initialize an authenticator from its name and its section in the config
diff --git a/PFERD/auth/keyring.py b/PFERD/auth/keyring.py
index b63bf90..c7ca2c2 100644
--- a/PFERD/auth/keyring.py
+++ b/PFERD/auth/keyring.py
@@ -2,7 +2,6 @@ from typing import Optional, Tuple
 
 import keyring
 
-from ..config import Config
 from ..logging import log
 from ..utils import agetpass, ainput
 from ..version import NAME
@@ -23,9 +22,8 @@ class KeyringAuthenticator(Authenticator):
             self,
             name: str,
             section: KeyringAuthSection,
-            config: Config,
     ) -> None:
-        super().__init__(name, section, config)
+        super().__init__(name)
 
         self._username = section.username()
         self._password: Optional[str] = None
diff --git a/PFERD/auth/simple.py b/PFERD/auth/simple.py
index 7fbb60b..d2f4123 100644
--- a/PFERD/auth/simple.py
+++ b/PFERD/auth/simple.py
@@ -1,6 +1,5 @@
 from typing import Optional, Tuple
 
-from ..config import Config
 from ..logging import log
 from ..utils import agetpass, ainput
 from .authenticator import Authenticator, AuthError, AuthSection
@@ -19,9 +18,8 @@ class SimpleAuthenticator(Authenticator):
             self,
             name: str,
             section: SimpleAuthSection,
-            config: Config,
     ) -> None:
-        super().__init__(name, section, config)
+        super().__init__(name)
 
         self._username = section.username()
         self._password = section.password()
diff --git a/PFERD/auth/tfa.py b/PFERD/auth/tfa.py
index 3efabe1..28ba150 100644
--- a/PFERD/auth/tfa.py
+++ b/PFERD/auth/tfa.py
@@ -1,19 +1,16 @@
 from typing import Tuple
 
-from ..config import Config
 from ..logging import log
 from ..utils import ainput
-from .authenticator import Authenticator, AuthError, AuthSection
+from .authenticator import Authenticator, AuthError
 
 
 class TfaAuthenticator(Authenticator):
     def __init__(
             self,
             name: str,
-            section: AuthSection,
-            config: Config,
     ) -> None:
-        super().__init__(name, section, config)
+        super().__init__(name)
 
     async def username(self) -> str:
         raise AuthError("TFA authenticator does not support usernames")

From 486699cef3038e3cb00371383af22162e28631c4 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 25 May 2021 15:11:52 +0200
Subject: [PATCH 254/524] Create anonymous TFA authenticator in ilias crawler

This ensures that *some* TFA authenticator is always present when
authenticating, even if none is specified in the config.

The TfaAuthenticator does not depend on any configured values, so it can
be created on-demand.
---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 40db52c..6013d77 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -7,7 +7,7 @@ import aiohttp
 from aiohttp import hdrs
 from bs4 import BeautifulSoup, Tag
 
-from ...auth import Authenticator
+from ...auth import Authenticator, TfaAuthenticator
 from ...config import Config
 from ...logging import ProgressBar, log
 from ...output_dir import FileSink, Redownload
@@ -523,7 +523,7 @@ class KitShibbolethLogin:
             soup: BeautifulSoup
     ) -> BeautifulSoup:
         if not self._tfa_auth:
-            raise RuntimeError("No 'tfa_auth' present but you use two-factor authentication!")
+            self._tfa_auth = TfaAuthenticator("ilias-anon-tfa")
 
         tfa_token = await self._tfa_auth.password()
 

From 980578d05ae08103453a7683bad4312c56f5f7c5 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 25 May 2021 15:12:07 +0200
Subject: [PATCH 255/524] Avoid downloading in some cases

Depending on how on_conflict is set, we can determine a few situations where
downloading is never necessary.
---
 PFERD/output_dir.py | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index fa7babe..ea3a3e0 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -197,13 +197,30 @@ class OutputDirectory:
             local_path: Path,
             heuristics: Heuristics,
             redownload: Redownload,
+            on_conflict: OnConflict,
     ) -> bool:
-        # If we don't have a *file* at the local path, we'll always redownload
-        # since we know that the remote is different from the local files. This
-        # includes the case where no local file exists.
-        if not local_path.is_file():
+        if not local_path.exists():
             log.explain("No corresponding file present locally")
-            # TODO Don't download if on_conflict is LOCAL_FIRST or NO_DELETE
+            return True
+
+        if on_conflict == OnConflict.LOCAL_FIRST:
+            # Whatever is here, it will never be overwritten, so we don't need
+            # to download the file.
+            log.explain("Conflict resolution is 'local-first' and path exists")
+            return False
+
+        if not local_path.is_file():
+            # We know that there is *something* here that's not a file.
+            log.explain("Non-file (probably a directory) present locally")
+
+            # If on_conflict is LOCAL_FIRST or NO_DELETE, we know that it would
+            # never be overwritten. It also doesn't have any relevant stats to
+            # update. This means that we don't have to download the file
+            # because we'd just always throw it away again.
+            if on_conflict in {OnConflict.LOCAL_FIRST, OnConflict.NO_DELETE}:
+                log.explain(f"Conflict resolution is {on_conflict.value!r}")
+                return False
+
             return True
 
         log.explain(f"Redownload policy is {redownload.value}")
@@ -363,7 +380,7 @@ class OutputDirectory:
 
         self._report.mark(path)
 
-        if not self._should_download(local_path, heuristics, redownload):
+        if not self._should_download(local_path, heuristics, redownload, on_conflict):
             return None
 
         # Detect and solve local-dir-remote-file conflict

From edb52a989eb61bac99800950d4a31e604ebd96ae Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 25 May 2021 15:35:36 +0200
Subject: [PATCH 256/524] Print report even if exiting due to Ctrl+C

---
 PFERD/__main__.py | 22 +++++++++---
 PFERD/config.py   |  2 ++
 PFERD/pferd.py    | 92 +++++++++++++++++++++++------------------------
 3 files changed, 63 insertions(+), 53 deletions(-)

diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index 0fbce59..5fd9447 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -107,15 +107,22 @@ def main() -> None:
 
     try:
         pferd = Pferd(config, args.crawler)
-        asyncio.run(pferd.run())
-    except (PferdLoadError, ConfigOptionError) as e:
+    except PferdLoadError as e:
         log.unlock()
         log.error(str(e))
         exit(1)
+
+    error = False
+    try:
+        asyncio.run(pferd.run())
+    except ConfigOptionError as e:
+        log.unlock()
+        log.error(str(e))
+        error = True
     except RuleParseError as e:
         log.unlock()
         e.pretty_print()
-        exit(1)
+        error = True
     except KeyboardInterrupt:
         log.unlock()
         log.explain_topic("Interrupted, exiting immediately")
@@ -123,9 +130,14 @@ def main() -> None:
         log.explain("Temporary files are not cleaned up")
         # TODO Clean up tmp files
         # And when those files *do* actually get cleaned up properly,
-        # reconsider what exit code to use here.
-        exit(1)
+        # reconsider if this should be an error
+        error = True
     except Exception:
         log.unlock()
         log.unexpected_exception()
+        error = True
+
+    pferd.print_report()
+
+    if error:
         exit(1)
diff --git a/PFERD/config.py b/PFERD/config.py
index 8293331..4bfada7 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -170,6 +170,7 @@ class Config:
     def dump_to_stdout(self) -> None:
         self._parser.write(sys.stdout)
 
+    # TODO Rename to "crawl_sections"
     def crawler_sections(self) -> List[Tuple[str, SectionProxy]]:
         result = []
         for name, proxy in self._parser.items():
@@ -178,6 +179,7 @@ class Config:
 
         return result
 
+    # TODO Rename to "auth_sections"
     def authenticator_sections(self) -> List[Tuple[str, SectionProxy]]:
         result = []
         for name, proxy in self._parser.items():
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index dbb8983..c0b48a7 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -15,20 +15,53 @@ class PferdLoadError(Exception):
 
 
 class Pferd:
-    def __init__(self, config: Config, crawlers_to_run: Optional[List[str]]):
+    def __init__(self, config: Config, cli_crawlers: Optional[List[str]]):
         """
         May throw PferdLoadError.
         """
 
-        if crawlers_to_run is not None and len(crawlers_to_run) != len(set(crawlers_to_run)):
-            raise PferdLoadError("Some crawlers were selected multiple times")
-
         self._config = config
-        self._crawlers_to_run = crawlers_to_run
+        self._crawlers_to_run = self._find_crawlers_to_run(config, cli_crawlers)
 
         self._authenticators: Dict[str, Authenticator] = {}
         self._crawlers: Dict[str, Crawler] = {}
 
+    def _find_crawlers_to_run(self, config: Config, cli_crawlers: Optional[List[str]]) -> List[str]:
+        log.explain_topic("Deciding which crawlers to run")
+        crawl_sections = [name for name, _ in config.crawler_sections()]
+
+        if cli_crawlers is None:
+            log.explain("No crawlers specified on CLI")
+            log.explain("Running all crawlers specified in config")
+            return crawl_sections
+
+        if len(cli_crawlers) != len(set(cli_crawlers)):
+            raise PferdLoadError("Some crawlers were selected multiple times")
+
+        log.explain("Crawlers specified on CLI")
+
+        crawlers_to_run = []  # With crawl: prefix
+        unknown_names = []  # Without crawl: prefix
+
+        for name in cli_crawlers:
+            section_name = f"crawl:{name}"
+            if section_name in crawl_sections:
+                log.explain(f"Crawler section named {section_name!r} exists")
+                crawlers_to_run.append(section_name)
+            else:
+                log.explain(f"There's no crawler section named {section_name!r}")
+                unknown_names.append(name)
+
+        if unknown_names:
+            if len(unknown_names) == 1:
+                [name] = unknown_names
+                raise PferdLoadError(f"There is no crawler named {name!r}")
+            else:
+                names_str = ", ".join(repr(name) for name in unknown_names)
+                raise PferdLoadError(f"There are no crawlers named {names_str}")
+
+        return crawlers_to_run
+
     def _load_authenticators(self) -> None:
         for name, section in self._config.authenticator_sections():
             log.print(f"[bold bright_cyan]Loading[/] {escape(name)}")
@@ -40,15 +73,12 @@ class Pferd:
             authenticator = authenticator_constructor(name, section, self._config)
             self._authenticators[name] = authenticator
 
-    def _load_crawlers(self) -> List[str]:
-        names = []
-
+    def _load_crawlers(self) -> None:
         # Cookie sharing
         kit_ilias_web_paths: Dict[Authenticator, List[Path]] = {}
 
         for name, section in self._config.crawler_sections():
             log.print(f"[bold bright_cyan]Loading[/] {escape(name)}")
-            names.append(name)
 
             crawl_type = section.get("type")
             crawler_constructor = CRAWLERS.get(crawl_type)
@@ -62,55 +92,20 @@ class Pferd:
                 if isinstance(crawler, KitIliasWebCrawler):
                     crawler.share_cookies(kit_ilias_web_paths)
 
-        return names
-
-    def _find_crawlers_to_run(self, loaded_crawlers: List[str]) -> List[str]:
-        log.explain_topic("Deciding which crawlers to run")
-
-        if self._crawlers_to_run is None:
-            log.explain("No crawlers specified on CLI")
-            log.explain("Running all loaded crawlers")
-            return loaded_crawlers
-
-        log.explain("Crawlers specified on CLI")
-
-        names: List[str] = []  # With 'crawl:' prefix
-        unknown_names = []  # Without 'crawl:' prefix
-
-        for name in self._crawlers_to_run:
-            section_name = f"crawl:{name}"
-            if section_name in self._crawlers:
-                log.explain(f"Crawler section named {section_name!r} exists")
-                names.append(section_name)
-            else:
-                log.explain(f"There's no crawler section named {section_name!r}")
-                unknown_names.append(name)
-
-        if unknown_names:
-            if len(unknown_names) == 1:
-                [name] = unknown_names
-                raise PferdLoadError(f"There is no crawler named {name!r}")
-            else:
-                names_str = ", ".join(repr(name) for name in unknown_names)
-                raise PferdLoadError(f"There are no crawlers named {names_str}")
-
-        return names
-
     async def run(self) -> None:
         """
-        May throw PferdLoadError or ConfigOptionError.
+        May throw ConfigOptionError.
         """
 
         # These two functions must run inside the same event loop as the
         # crawlers, so that any new objects (like Conditions or Futures) can
         # obtain the correct event loop.
         self._load_authenticators()
-        loaded_crawlers = self._load_crawlers()
-        names = self._find_crawlers_to_run(loaded_crawlers)
+        self._load_crawlers()
 
         log.print("")
 
-        for name in names:
+        for name in self._crawlers_to_run:
             crawler = self._crawlers[name]
 
             log.print(f"[bold bright_cyan]Running[/] {escape(name)}")
@@ -122,7 +117,8 @@ class Pferd:
             except Exception:
                 log.unexpected_exception()
 
-        for name in names:
+    def print_report(self) -> None:
+        for name in self._crawlers_to_run:
             crawler = self._crawlers[name]
 
             log.report("")

From f68849c65f37d7fa3949466bdd67039ed67a07ae Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 25 May 2021 15:42:46 +0200
Subject: [PATCH 257/524] Fix rules not being parsed entirely

---
 PFERD/__main__.py    | 16 ++++++----------
 PFERD/pferd.py       |  4 +++-
 PFERD/transformer.py |  7 +++++++
 3 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index 5fd9447..b7c5fa9 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -112,32 +112,28 @@ def main() -> None:
         log.error(str(e))
         exit(1)
 
-    error = False
     try:
         asyncio.run(pferd.run())
     except ConfigOptionError as e:
         log.unlock()
         log.error(str(e))
-        error = True
+        exit(1)
     except RuleParseError as e:
         log.unlock()
         e.pretty_print()
-        error = True
+        exit(1)
     except KeyboardInterrupt:
         log.unlock()
         log.explain_topic("Interrupted, exiting immediately")
         log.explain("Open files and connections are left for the OS to clean up")
         log.explain("Temporary files are not cleaned up")
+        pferd.print_report()
         # TODO Clean up tmp files
         # And when those files *do* actually get cleaned up properly,
-        # reconsider if this should be an error
-        error = True
+        # reconsider if this should really exit with 1
+        exit(1)
     except Exception:
         log.unlock()
         log.unexpected_exception()
-        error = True
-
-    pferd.print_report()
-
-    if error:
+        pferd.print_report()
         exit(1)
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index c0b48a7..434407d 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -119,7 +119,9 @@ class Pferd:
 
     def print_report(self) -> None:
         for name in self._crawlers_to_run:
-            crawler = self._crawlers[name]
+            crawler = self._crawlers.get(name)
+            if crawler is None:
+                continue  # Crawler failed to load
 
             log.report("")
             log.report(f"[bold bright_cyan]Report[/] for {escape(name)}")
diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index 5a20207..0f2a3e6 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -266,6 +266,11 @@ def parse_whitespace(line: Line) -> None:
         line.advance()
 
 
+def parse_eol(line: Line) -> None:
+    if line.get() is not None:
+        raise RuleParseError(line, "Expected end of line")
+
+
 def parse_rule(line: Line) -> Rule:
     # Parse left side
     leftindex = line.index
@@ -291,6 +296,8 @@ def parse_rule(line: Line) -> Rule:
     else:
         rightpath = PurePath(right)
 
+    parse_eol(line)
+
     # Dispatch
     if arrowname == "":
         return NormalRule(PurePath(left), rightpath)

From 40144f8bd80379c39436e8dc24ba7afbf71081ed Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 25 May 2021 15:47:09 +0200
Subject: [PATCH 258/524] Fix rule error messages

---
 PFERD/transformer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index 0f2a3e6..23844f8 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -156,8 +156,8 @@ class Line:
         return self._line
 
     @property
-    def line_nr(self) -> str:
-        return self._line
+    def line_nr(self) -> int:
+        return self._line_nr
 
     @property
     def index(self) -> int:

From 5a331663e46058d3def8c3fc38af7ac075035bcf Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 25 May 2021 15:49:06 +0200
Subject: [PATCH 259/524] Rename functions for consistency

---
 PFERD/config.py | 6 ++----
 PFERD/pferd.py  | 6 +++---
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/PFERD/config.py b/PFERD/config.py
index 4bfada7..1462d82 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -170,8 +170,7 @@ class Config:
     def dump_to_stdout(self) -> None:
         self._parser.write(sys.stdout)
 
-    # TODO Rename to "crawl_sections"
-    def crawler_sections(self) -> List[Tuple[str, SectionProxy]]:
+    def crawl_sections(self) -> List[Tuple[str, SectionProxy]]:
         result = []
         for name, proxy in self._parser.items():
             if name.startswith("crawl:"):
@@ -179,8 +178,7 @@ class Config:
 
         return result
 
-    # TODO Rename to "auth_sections"
-    def authenticator_sections(self) -> List[Tuple[str, SectionProxy]]:
+    def auth_sections(self) -> List[Tuple[str, SectionProxy]]:
         result = []
         for name, proxy in self._parser.items():
             if name.startswith("auth:"):
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index 434407d..df48bd2 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -28,7 +28,7 @@ class Pferd:
 
     def _find_crawlers_to_run(self, config: Config, cli_crawlers: Optional[List[str]]) -> List[str]:
         log.explain_topic("Deciding which crawlers to run")
-        crawl_sections = [name for name, _ in config.crawler_sections()]
+        crawl_sections = [name for name, _ in config.crawl_sections()]
 
         if cli_crawlers is None:
             log.explain("No crawlers specified on CLI")
@@ -63,7 +63,7 @@ class Pferd:
         return crawlers_to_run
 
     def _load_authenticators(self) -> None:
-        for name, section in self._config.authenticator_sections():
+        for name, section in self._config.auth_sections():
             log.print(f"[bold bright_cyan]Loading[/] {escape(name)}")
             auth_type = section.get("type")
             authenticator_constructor = AUTHENTICATORS.get(auth_type)
@@ -77,7 +77,7 @@ class Pferd:
         # Cookie sharing
         kit_ilias_web_paths: Dict[Authenticator, List[Path]] = {}
 
-        for name, section in self._config.crawler_sections():
+        for name, section in self._config.crawl_sections():
             log.print(f"[bold bright_cyan]Loading[/] {escape(name)}")
 
             crawl_type = section.get("type")

From aabce764ace344a93c0876b6a304921b0ca09db6 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 25 May 2021 15:54:01 +0200
Subject: [PATCH 260/524] Clean up TODOs

---
 PFERD/crawl/ilias/kit_ilias_html.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 032bb27..4bc3161 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -393,8 +393,7 @@ class IliasPage:
         )
         if modification_date_match is None:
             modification_date = None
-            # TODO: Figure out if this is expected or *always* an error.
-            log.explain(f"Element {name} at {url} has no date. Properties: {all_properties_text!r}")
+            log.explain(f"Element {name} at {url} has no date.")
         else:
             modification_date_str = modification_date_match.group(1)
             modification_date = demangle_date(modification_date_str)
@@ -420,9 +419,6 @@ class IliasPage:
         if "target=file_" in parsed_url.query:
             return IliasElementType.FILE
 
-        # TODO: Match based on CMD_CLASS or icon? The folder_like check looks at the icon,
-        # but we could also match the command class. I am not sure what's more stable.
-
         # Everything with a ref_id can *probably* be opened to reveal nested things
         # video groups, directories, exercises, etc
         if "ref_id=" in parsed_url.query:

From a848194601cb731b80c7cefb690b87864df8a243 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 25 May 2021 17:15:13 +0200
Subject: [PATCH 261/524] Rename plaintext link option to "plaintext"

---
 PFERD/crawl/ilias/file_templates.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index 6f2b1cd..151a41b 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -97,14 +97,14 @@ URL={{link}}
 
 class Links(Enum):
     IGNORE = "ignore"
-    PLAIN = "plain"
+    PLAINTEXT = "plaintext"
     FANCY = "fancy"
     INTERNET_SHORTCUT = "internet-shortcut"
 
     def template(self) -> Optional[str]:
         if self == self.FANCY:
             return _link_template_fancy
-        elif self == self.PLAIN:
+        elif self == self.PLAINTEXT:
             return _link_template_plain
         elif self == self.INTERNET_SHORTCUT:
             return _link_template_internet_shortcut
@@ -115,7 +115,7 @@ class Links(Enum):
     def extension(self) -> Optional[str]:
         if self == self.FANCY:
             return ".html"
-        elif self == self.PLAIN:
+        elif self == self.PLAINTEXT:
             return ".txt"
         elif self == self.INTERNET_SHORTCUT:
             return ".url"
@@ -128,5 +128,5 @@ class Links(Enum):
         try:
             return Links(string)
         except ValueError:
-            raise ValueError("must be one of 'ignore', 'plain',"
+            raise ValueError("must be one of 'ignore', 'plaintext',"
                              " 'html', 'internet-shortcut'")

From 519a7ef435b8771214e910c2436830dd98ed8022 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 25 May 2021 16:57:14 +0200
Subject: [PATCH 262/524] Split --dump-config into two options

--dump-config with its optional argument tended to consume the command name, so
it had to be split up.
---
 PFERD/__main__.py   | 16 ++++++++++------
 PFERD/cli/parser.py |  9 ++++++---
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index b7c5fa9..b42c526 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -46,7 +46,7 @@ def configure_logging_from_args(args: argparse.Namespace) -> None:
 
     # We want to prevent any unnecessary output if we're printing the config to
     # stdout, otherwise it would not be a valid config file.
-    if args.dump_config == "-":
+    if args.dump_config_to == "-":
         log.output_explain = False
         log.output_status = False
         log.output_report = False
@@ -56,7 +56,7 @@ def configure_logging_from_config(args: argparse.Namespace, config: Config) -> N
     # In configure_logging_from_args(), all normal logging is already disabled
     # whenever we dump the config. We don't want to override that decision with
     # values from the config file.
-    if args.dump_config == "-":
+    if args.dump_config_to == "-":
         return
 
     try:
@@ -74,13 +74,17 @@ def configure_logging_from_config(args: argparse.Namespace, config: Config) -> N
 def dump_config(args: argparse.Namespace, config: Config) -> None:
     log.explain_topic("Dumping config")
 
+    if args.dump_config and args.dump_config_to is not None:
+        log.error("--dump-config and --dump-config-to can't be specified at the same time")
+        exit(1)
+
     try:
-        if args.dump_config is True:
+        if args.dump_config:
             config.dump()
-        elif args.dump_config == "-":
+        elif args.dump_config_to == "-":
             config.dump_to_stdout()
         else:
-            config.dump(Path(args.dump_config))
+            config.dump(Path(args.dump_config_to))
     except ConfigDumpError as e:
         log.error(str(e))
         log.error_contd(e.reason)
@@ -101,7 +105,7 @@ def main() -> None:
     # all places that were not already covered by CLI args.
     configure_logging_from_config(args, config)
 
-    if args.dump_config is not None:
+    if args.dump_config or args.dump_config_to is not None:
         dump_config(args, config)
         exit()
 
diff --git a/PFERD/cli/parser.py b/PFERD/cli/parser.py
index 4e3b425..e6b0671 100644
--- a/PFERD/cli/parser.py
+++ b/PFERD/cli/parser.py
@@ -155,11 +155,14 @@ PARSER.add_argument(
 )
 PARSER.add_argument(
     "--dump-config",
-    nargs="?",
-    const=True,
+    action="store_true",
+    help="dump current configuration to the default config path and exit"
+)
+PARSER.add_argument(
+    "--dump-config-to",
     metavar="PATH",
     help="dump current configuration to a file and exit."
-    " Uses default config file path if no path is specified"
+    " Use '-' as path to print to stdout instead"
 )
 PARSER.add_argument(
     "--crawler", "-C",

From c665c36d88d0d48791e5818effed2c12a83b81e8 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 25 May 2021 17:16:57 +0200
Subject: [PATCH 263/524] Update README, CHANGELOG

---
 CHANGELOG.md |  19 ++++++++-
 CONFIG.md    |   2 +-
 README.md    | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 134 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 519c046..3f032cf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,13 +8,28 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## Unreleased
 
 ### Added
-- Support for concurrent downloads
-- Support for proper config files
+- Proper config files
+- Concurrent crawling
+- Crawl external ILIAS links
+- Crawl uploaded exercise solutions
+- Explain what PFERD is doing and why (`--explain`)
+- More control over output (`--status`, `--report`)
+- Print report after exiting via Ctrl+C
+- Store crawler reports in `.report` JSON file
+- Extensive config file documentation (`CONFIG.md`)
+- Documentation for developers (`DEV.md`)
 - This changelog
 
 ### Changed
 - Rewrote almost everything
+- Better error messages
 - Redesigned CLI
+- Redesigned transform rules
+- ILIAS crawling logic (paths may be different)
+- Better support for weird paths on Windows
+- Set user agent (`PFERD/<version>`)
 
 ### Removed
 - Backwards compatibility with 2.x
+- Python files as config files
+- Some types of crawlers
diff --git a/CONFIG.md b/CONFIG.md
index 2f2dbbe..b48a2dd 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -90,7 +90,7 @@ full name of an auth section (including the `auth:` prefix).
 
 Here is a simple example:
 
-```
+```ini
 [auth:example]
 type = simple
 username = foo
diff --git a/README.md b/README.md
index f9d718e..8e3b387 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,14 @@ Other resources:
 - [Changelog](CHANGELOG.md)
 - [Development Guide](DEV.md)
 
-## Installation with pip
+## Installation
+
+### Direct download
+
+Binaries for Linux, Windows and Mac can be downloaded directly from the
+[latest release](https://github.com/Garmelon/PFERD/releases/latest).
+
+### With pip
 
 Ensure you have at least Python 3.8 installed. Run the following command to
 install PFERD or upgrade it to the latest version:
@@ -18,3 +25,111 @@ $ pip install --upgrade git+https://github.com/Garmelon/PFERD@latest
 ```
 
 The use of [venv](https://docs.python.org/3/library/venv.html) is recommended.
+
+## Basic usage
+
+PFERD can be run directly from the command line with no config file.
+Run `pferd -h` to get an overview of available commands and options.
+Run `pferd <command> -h` to see which options a command has.
+
+For example, you can download your personal desktop from the KIT ILIAS like
+this:
+
+```
+$ pferd kit-ilias-web desktop <output_directory>
+```
+
+Also, you can download most ILIAS pages directly like this:
+
+```
+$ pferd kit-ilias-web <url> <output_directory>
+```
+
+However, the CLI only lets you download a single thing at a time, and the
+resulting command can grow long quite quickly. Because of this, PFERD can also
+be used with a config file.
+
+To get started, just take a command you've been using and add `--dump-config`
+directly after `pferd`, like this:
+
+```
+$ pferd --dump-config kit-ilias-web <url> <output_directory>
+```
+
+This will make PFERD write its current configuration to its default config file
+path. You can then run `pferd` without a command and it will execute the config
+file. Alternatively, you can use `--dump-config-to` and specify a path yourself.
+Using `--dump-config-to -` will print the configuration to stdout instead of a
+file, which is a good way to see what is actually going on when using a CLI
+command.
+
+Another good way to see what PFERD is doing is the `--explain` option. When
+enabled, PFERD explains in detail what it is doing and why. This can help with
+debugging your own config, for example.
+
+If you don't want to run all crawlers from your config file, you can specify the
+crawlers you want to run with `--crawler` or `-C`, like this:
+
+```
+$ pferd -C crawler1 -C crawler2
+```
+
+## Advanced usage
+
+PFERD supports lots of different options. For example, you can configure PFERD
+to [use your system's keyring](CONFIG.md#the-keyring-authenticator) instead of
+prompting you for your username and password. PFERD also supports
+[transformation rules](CONFIG.md#transformation-rules) that let you rename or
+exclude certain files.
+
+For more details, see the comprehensive [config format documentation](CONFIG.md).
+
+## Example
+
+This example downloads a few courses from the KIT ILIAS with a common keyring
+authenticator. It reorganizes and ignores some files.
+
+```ini
+[DEFAULT]
+# All paths will be relative to this.
+# The crawler output directories will be <working_dir>/Foo and <working_dir>/Bar.
+working_dir = ~/stud
+# If files vanish from ILIAS the local files are not deleted, allowing us to
+# take a look at them before deleting them ourselves.
+on_conflict = no-delete
+
+[auth:ilias]
+type = keyring
+username = foo
+
+[crawl:Foo]
+type = kit-ilias-web
+auth = auth:ilias
+# Crawl a course by its ID (found as `ref_id=ID` in the URL)
+target = 1234567
+
+# Plaintext files are easier to read by other tools
+links = plaintext
+
+transform =
+  # Ignore unneeded folders
+  Online-Tests --> !
+  Vorlesungswerbung --> !
+
+  # Move exercises to own folder. Rename them to "Blatt-XX.pdf" to make them sort properly
+  "Übungsunterlagen/(\d+). Übungsblatt.pdf" -re-> Blätter/Blatt-{i1:02}.pdf
+  # Move solutions to own folder. Rename them to "Blatt-XX-Lösung.pdf" to make them sort properly
+  "Übungsunterlagen/(\d+). Übungsblatt.*Musterlösung.pdf" -re-> Blätter/Blatt-{i1:02}-Lösung.pdf
+
+  # The course has nested folders with the same name - flatten them
+  "Übungsunterlagen/(.+?)/\\1/(.*)" -re-> Übung/{g1}/{g2}
+
+  # Rename remaining folders
+  Übungsunterlagen --> Übung
+  Lehrbücher --> Vorlesung
+
+[crawl:Bar]
+type = kit-ilias-web
+auth = auth:ilias
+target = 1337420
+```

From 6644126b5dd87bc587edd4ec77344800d945566a Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 25 May 2021 17:29:39 +0200
Subject: [PATCH 264/524] Fix package discovery

---
 setup.cfg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.cfg b/setup.cfg
index 1cbfc6a..3b6e43b 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -3,7 +3,7 @@ name = PFERD
 version = attr: PFERD.version.VERSION
 
 [options]
-packages = PFERD
+packages = find:
 python_requires = >=3.8
 install_requires =
   aiohttp>=3.7.4.post0

From f85b75df8c3a945e1bf4a6a087705e51a54e5455 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 25 May 2021 17:33:05 +0200
Subject: [PATCH 265/524] Switch from exit() to sys.exit()

Pyinstaller doesn't recognize exit().
---
 PFERD/__main__.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index b42c526..9399a10 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -1,6 +1,7 @@
 import argparse
 import asyncio
 import configparser
+import sys
 from pathlib import Path
 
 from .cli import PARSER, load_default_section
@@ -33,7 +34,7 @@ def load_config(args: argparse.Namespace) -> Config:
     except ConfigLoadError as e:
         log.error(str(e))
         log.error_contd(e.reason)
-        exit(1)
+        sys.exit(1)
 
 
 def configure_logging_from_args(args: argparse.Namespace) -> None:
@@ -68,7 +69,7 @@ def configure_logging_from_config(args: argparse.Namespace, config: Config) -> N
             log.output_report = config.default_section.report()
     except ConfigOptionError as e:
         log.error(str(e))
-        exit(1)
+        sys.exit(1)
 
 
 def dump_config(args: argparse.Namespace, config: Config) -> None:
@@ -76,7 +77,7 @@ def dump_config(args: argparse.Namespace, config: Config) -> None:
 
     if args.dump_config and args.dump_config_to is not None:
         log.error("--dump-config and --dump-config-to can't be specified at the same time")
-        exit(1)
+        sys.exit(1)
 
     try:
         if args.dump_config:
@@ -88,7 +89,7 @@ def dump_config(args: argparse.Namespace, config: Config) -> None:
     except ConfigDumpError as e:
         log.error(str(e))
         log.error_contd(e.reason)
-        exit(1)
+        sys.exit(1)
 
 
 def main() -> None:
@@ -107,25 +108,25 @@ def main() -> None:
 
     if args.dump_config or args.dump_config_to is not None:
         dump_config(args, config)
-        exit()
+        sys.exit()
 
     try:
         pferd = Pferd(config, args.crawler)
     except PferdLoadError as e:
         log.unlock()
         log.error(str(e))
-        exit(1)
+        sys.exit(1)
 
     try:
         asyncio.run(pferd.run())
     except ConfigOptionError as e:
         log.unlock()
         log.error(str(e))
-        exit(1)
+        sys.exit(1)
     except RuleParseError as e:
         log.unlock()
         e.pretty_print()
-        exit(1)
+        sys.exit(1)
     except KeyboardInterrupt:
         log.unlock()
         log.explain_topic("Interrupted, exiting immediately")
@@ -135,9 +136,9 @@ def main() -> None:
         # TODO Clean up tmp files
         # And when those files *do* actually get cleaned up properly,
         # reconsider if this should really exit with 1
-        exit(1)
+        sys.exit(1)
     except Exception:
         log.unlock()
         log.unexpected_exception()
         pferd.print_report()
-        exit(1)
+        sys.exit(1)

From 07a75a37c31e80f8e6b90500cde97d356328d553 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 25 May 2021 15:57:03 +0000
Subject: [PATCH 266/524] Fix FileNotFoundError on Windows

---
 PFERD/output_dir.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index ea3a3e0..7883ee0 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -148,7 +148,7 @@ class OutputDirectory:
             # If you want longer paths, you will have to add the "\\?\" prefix
             # in front of your path. See:
             # https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file#maximum-path-length-limitation
-            self._root = Path("\\\\?\\" + str(root))
+            self._root = Path("\\\\?\\" + str(root.absolute()))
         else:
             self._root = root
 

From 263780e6a3429458dff17945fbac91c2a482451e Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 25 May 2021 18:09:51 +0200
Subject: [PATCH 267/524] Use certifi to ensure CA certificates are bundled in
 pyinstaller

---
 PFERD/crawl/http_crawler.py | 3 +++
 setup.cfg                   | 1 +
 2 files changed, 4 insertions(+)

diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index 177972b..9f52c66 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -1,8 +1,10 @@
 import asyncio
+import ssl
 from pathlib import Path, PurePath
 from typing import Dict, List, Optional
 
 import aiohttp
+import certifi
 from aiohttp.client import ClientTimeout
 
 from ..auth import Authenticator
@@ -155,6 +157,7 @@ class HttpCrawler(Crawler):
         async with aiohttp.ClientSession(
                 headers={"User-Agent": f"{NAME}/{VERSION}"},
                 cookie_jar=self._cookie_jar,
+                connector=aiohttp.TCPConnector(ssl=ssl.create_default_context(cafile=certifi.where())),
                 timeout=ClientTimeout(
                     # 30 minutes. No download in the history of downloads was longer than 30 minutes.
                     # This is enough to transfer a 600 MB file over a 3 Mib/s connection.
diff --git a/setup.cfg b/setup.cfg
index 3b6e43b..5758282 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -10,6 +10,7 @@ install_requires =
   beautifulsoup4>=4.9.3
   rich>=10.1.0
   keyring>=23.0.1
+  certifi>=2020.12.5
 
 [options.entry_points]
 console_scripts =

From 30be4e29fad0a6ef1abeef99db2c3c944ec46cbd Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 25 May 2021 16:34:18 +0000
Subject: [PATCH 268/524] Add workaround for RuntimeError after program
 finishes on Windows

---
 PFERD/__main__.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index 9399a10..69c819b 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -118,7 +118,10 @@ def main() -> None:
         sys.exit(1)
 
     try:
-        asyncio.run(pferd.run())
+        loop = asyncio.get_event_loop()
+        loop.run_until_complete(pferd.run())
+        loop.run_until_complete(asyncio.sleep(1))
+        loop.close()
     except ConfigOptionError as e:
         log.unlock()
         log.error(str(e))

From 66f0e398a10b4d0ec1595f20dcc169a295e56059 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 25 May 2021 19:19:51 +0200
Subject: [PATCH 269/524] Await result in tfa authenticate path

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 6013d77..fa68ee7 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -539,7 +539,7 @@ class KitShibbolethLogin:
             "_eventId_proceed": "",
             "j_tokenNumber": tfa_token
         }
-        return _post(session, url, data)
+        return await _post(session, url, data)
 
     @staticmethod
     def _login_successful(soup: BeautifulSoup) -> bool:

From 2d8dcc87ff19e913459845ccb8b173fc6da6abe2 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 25 May 2021 19:23:06 +0200
Subject: [PATCH 270/524] Send CSRF token in TFA request

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index fa68ee7..60be6d8 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -531,13 +531,15 @@ class KitShibbolethLogin:
         # credentials rather than after asking.
         form = soup.find("form", {"method": "post"})
         action = form["action"]
+        csrf_token = form.find("input", {"name": "csrf_token"})["value"]
 
         # Equivalent: Enter token in
         # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
         url = "https://idp.scc.kit.edu" + action
         data = {
             "_eventId_proceed": "",
-            "j_tokenNumber": tfa_token
+            "j_tokenNumber": tfa_token,
+            "csrf_token": csrf_token
         }
         return await _post(session, url, data)
 

From 915e42fd07b28bed722258be1386de669d928f3b Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 26 May 2021 10:51:41 +0200
Subject: [PATCH 271/524] Fix report not being printed if pferd exits normally

---
 PFERD/__main__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index 69c819b..55be1ea 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -145,3 +145,5 @@ def main() -> None:
         log.unexpected_exception()
         pferd.print_report()
         sys.exit(1)
+    else:
+        pferd.print_report()

From a879c6ab6eea4adcbe7d41a6fafe3b61c65b1e88 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 26 May 2021 10:52:04 +0200
Subject: [PATCH 272/524] Fix function being printed

---
 PFERD/__main__.py                  | 2 +-
 PFERD/cli/command_kit_ilias_web.py | 3 +++
 PFERD/cli/command_local.py         | 3 +++
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index 55be1ea..59004ae 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -19,7 +19,7 @@ def load_config_parser(args: argparse.Namespace) -> configparser.ConfigParser:
         log.explain("No CLI command specified, loading config from file")
         Config.load_parser(parser, path=args.config)
     else:
-        log.explain(f"CLI command specified, creating config for {args.command!r}")
+        log.explain("CLI command specified, loading config from its arguments")
         if args.command:
             args.command(args, parser)
 
diff --git a/PFERD/cli/command_kit_ilias_web.py b/PFERD/cli/command_kit_ilias_web.py
index ccb7134..c21b6a4 100644
--- a/PFERD/cli/command_kit_ilias_web.py
+++ b/PFERD/cli/command_kit_ilias_web.py
@@ -3,6 +3,7 @@ import configparser
 from pathlib import Path
 
 from ..crawl.ilias.file_templates import Links
+from ..logging import log
 from .parser import CRAWLER_PARSER, SUBPARSERS, BooleanOptionalAction, load_crawler, show_value_error
 
 SUBPARSER = SUBPARSERS.add_parser(
@@ -66,6 +67,8 @@ def load(
         args: argparse.Namespace,
         parser: configparser.ConfigParser,
 ) -> None:
+    log.explain("Creating config for command 'kit-ilias-web'")
+
     parser["crawl:ilias"] = {}
     section = parser["crawl:ilias"]
     load_crawler(args, section)
diff --git a/PFERD/cli/command_local.py b/PFERD/cli/command_local.py
index 73f9d43..309c42f 100644
--- a/PFERD/cli/command_local.py
+++ b/PFERD/cli/command_local.py
@@ -2,6 +2,7 @@ import argparse
 import configparser
 from pathlib import Path
 
+from ..logging import log
 from .parser import CRAWLER_PARSER, SUBPARSERS, load_crawler
 
 SUBPARSER = SUBPARSERS.add_parser(
@@ -49,6 +50,8 @@ def load(
         args: argparse.Namespace,
         parser: configparser.ConfigParser,
 ) -> None:
+    log.explain("Creating config for command 'local'")
+
     parser["crawl:local"] = {}
     section = parser["crawl:local"]
     load_crawler(args, section)

From adb5d4ade37deab0c42af67cd7d2fdbdc46bc483 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 26 May 2021 10:58:19 +0200
Subject: [PATCH 273/524] Print files that are *not* deleted by cleanup

These are files that are not present on the remote source any more, but still
present locally. They also show up in the report.
---
 PFERD/output_dir.py |  3 +++
 PFERD/pferd.py      |  3 +++
 PFERD/report.py     | 11 +++++++++++
 3 files changed, 17 insertions(+)

diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 7883ee0..304101a 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -493,6 +493,9 @@ class OutputDirectory:
                 self._report.delete_file(pure)
             except OSError:
                 pass
+        else:
+            log.status(f"[bold bright_magenta]Not deleted[/] {escape(fmt_path(pure))}")
+            self._report.not_delete_file(pure)
 
     def load_prev_report(self) -> None:
         log.explain_topic(f"Loading previous report from {fmt_real_path(self._report_path)}")
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index df48bd2..7f4d6ff 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -136,6 +136,9 @@ class Pferd:
             for path in sorted(crawler.report.deleted_files):
                 something_changed = True
                 log.report(f"  [bold bright_magenta]Deleted[/] {fmt_path(path)}")
+            for path in sorted(crawler.report.not_deleted_files):
+                something_changed = True
+                log.report(f"  [bold bright_magenta]Not deleted[/] {fmt_path(path)}")
 
             if not something_changed:
                 log.report("  Nothing changed")
diff --git a/PFERD/report.py b/PFERD/report.py
index 4f15237..b47490f 100644
--- a/PFERD/report.py
+++ b/PFERD/report.py
@@ -56,6 +56,7 @@ class Report:
         self.added_files: Set[PurePath] = set()
         self.changed_files: Set[PurePath] = set()
         self.deleted_files: Set[PurePath] = set()
+        self.not_deleted_files: Set[PurePath] = set()
 
     @staticmethod
     def _get_list_of_strs(data: Dict[str, Any], key: str) -> List[str]:
@@ -93,6 +94,8 @@ class Report:
             self.change_file(PurePath(elem))
         for elem in self._get_list_of_strs(data, "deleted"):
             self.delete_file(PurePath(elem))
+        for elem in self._get_list_of_strs(data, "not_deleted"):
+            self.not_delete_file(PurePath(elem))
 
         return self
 
@@ -107,6 +110,7 @@ class Report:
             "added": [str(path) for path in sorted(self.added_files)],
             "changed": [str(path) for path in sorted(self.changed_files)],
             "deleted": [str(path) for path in sorted(self.deleted_files)],
+            "not_deleted": [str(path) for path in sorted(self.not_deleted_files)],
         }
 
         with open(path, "w") as f:
@@ -163,3 +167,10 @@ class Report:
         """
 
         self.deleted_files.add(path)
+
+    def not_delete_file(self, path: PurePath) -> None:
+        """
+        Unlike mark(), this function accepts any paths.
+        """
+
+        self.not_deleted_files.add(path)

From 533f75ea71735dab602340317d09f1c1a3f8d559 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 26 May 2021 11:37:32 +0200
Subject: [PATCH 274/524] Add --debug-transforms flag

---
 PFERD/__main__.py      |  2 +-
 PFERD/cli/parser.py    |  5 +++++
 PFERD/crawl/crawler.py | 20 +++++++++++++++++++-
 PFERD/pferd.py         | 15 ++++++++++++++-
 4 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index 59004ae..26a1dc4 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -119,7 +119,7 @@ def main() -> None:
 
     try:
         loop = asyncio.get_event_loop()
-        loop.run_until_complete(pferd.run())
+        loop.run_until_complete(pferd.run(args.debug_transforms))
         loop.run_until_complete(asyncio.sleep(1))
         loop.close()
     except ConfigOptionError as e:
diff --git a/PFERD/cli/parser.py b/PFERD/cli/parser.py
index e6b0671..269a19a 100644
--- a/PFERD/cli/parser.py
+++ b/PFERD/cli/parser.py
@@ -164,6 +164,11 @@ PARSER.add_argument(
     help="dump current configuration to a file and exit."
     " Use '-' as path to print to stdout instead"
 )
+PARSER.add_argument(
+    "--debug-transforms",
+    action="store_true",
+    help="apply transform rules to files of previous run"
+)
 PARSER.add_argument(
     "--crawler", "-C",
     action="append",
diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index 321daa2..aa0f81c 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -3,7 +3,7 @@ import os
 from abc import ABC, abstractmethod
 from datetime import datetime
 from pathlib import Path, PurePath
-from typing import Any, Awaitable, Callable, Dict, List, Optional, Sequence, Tuple, TypeVar
+from typing import Any, Awaitable, Callable, Dict, List, Optional, Sequence, Set, Tuple, TypeVar
 
 from rich.markup import escape
 
@@ -334,3 +334,21 @@ class Crawler(ABC):
         """
 
         pass
+
+    def debug_transforms(self) -> None:
+        self._output_dir.load_prev_report()
+
+        if not self.prev_report:
+            log.warn("Couldn't find or load old report")
+            return
+
+        seen: Set[PurePath] = set()
+        for known in self.prev_report.known_files:
+            looking_at = list(reversed(known.parents)) + [known]
+            for path in looking_at:
+                if path in seen:
+                    continue
+
+                log.explain_topic(f"Transforming {fmt_path(path)}")
+                self._transformer.transform(path)
+                seen.add(path)
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index 7f4d6ff..ac373cf 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -92,7 +92,14 @@ class Pferd:
                 if isinstance(crawler, KitIliasWebCrawler):
                     crawler.share_cookies(kit_ilias_web_paths)
 
-    async def run(self) -> None:
+    def debug_transforms(self) -> None:
+        for name in self._crawlers_to_run:
+            crawler = self._crawlers[name]
+            log.print("")
+            log.print(f"[bold bright_cyan]Debugging transforms[/] for {escape(name)}")
+            crawler.debug_transforms()
+
+    async def run(self, debug_transforms: bool) -> None:
         """
         May throw ConfigOptionError.
         """
@@ -103,6 +110,12 @@ class Pferd:
         self._load_authenticators()
         self._load_crawlers()
 
+        if debug_transforms:
+            log.output_explain = True
+            log.output_report = False
+            self.debug_transforms()
+            return
+
         log.print("")
 
         for name in self._crawlers_to_run:

From 17207546e9cbd76dfb2dc2dd4cb18db72188239b Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 26 May 2021 11:47:51 +0200
Subject: [PATCH 275/524] Document --debug-transforms

---
 CHANGELOG.md | 1 +
 CONFIG.md    | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3f032cf..8460a94 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Crawl uploaded exercise solutions
 - Explain what PFERD is doing and why (`--explain`)
 - More control over output (`--status`, `--report`)
+- Debug transform rules with `--debug-transforms`
 - Print report after exiting via Ctrl+C
 - Store crawler reports in `.report` JSON file
 - Extensive config file documentation (`CONFIG.md`)
diff --git a/CONFIG.md b/CONFIG.md
index b48a2dd..06d7dab 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -205,6 +205,9 @@ They are specified line-by-line in a crawler's `transform` option. When a
 crawler needs to apply a rule to a path, it goes through this list top-to-bottom
 and choose the first matching rule.
 
+To see this process in action, you can use the `--debug-transforms` or flag or
+the `--explain` flag.
+
 Each line has the format `SOURCE ARROW TARGET` where `TARGET` is optional.
 `SOURCE` is either a normal path without spaces (e. g. `foo/bar`), or a string
 literal delimited by `"` or `'` (e. g. `"foo\" bar/baz"`). Python's string

From 2c72a9112cea5f812ee3e039297c01044a9cb534 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 27 May 2021 13:20:37 +0200
Subject: [PATCH 276/524] Reword `-name->` and `-name-re->` docs and remove
 `-name-exact->`

---
 CONFIG.md | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index 06d7dab..fcc263a 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -213,7 +213,7 @@ Each line has the format `SOURCE ARROW TARGET` where `TARGET` is optional.
 literal delimited by `"` or `'` (e. g. `"foo\" bar/baz"`). Python's string
 escape syntax is supported. Trailing slashes are ignored. `TARGET` can be
 formatted like `SOURCE`, but it can also be a single exclamation mark without
-quotes (`!`). `ARROW` is one of `-->`, `-exact->`, `-name->`, `-re->` and
+quotes (`!`). `ARROW` is one of `-->`, `-name->`, `-exact->`, `-re->` and
 `-name-re->`
 
 If a rule's target is `!`, this means that when the rule matches on a path, the
@@ -230,11 +230,15 @@ well as all its contents.
 
 ### The `-name->` arrow
 
-The `-name->` arrow works similar to the `-->` arrow, but pretends it is in the
-same directory as the file or directory it is applied to. For example, the rule
-`bar -name-> baz` would convert `foo/bar` into `foo/baz` and `foo/bar/xyz` into
-`foo/baz/xyz`. The rule `foo --> !` would ignore all files and directories named
-`foo` as well as their contents.
+The `-name->` arrow lets you rename files and directories by their name,
+regardless of where they appear in the file tree. Because of this, its `SOURCE`
+must not contain multiple path segments, only a single name. This restriction
+does not apply to its `TARGET`. The `-name->` arrow is not applied recursively
+to its own output to prevent infinite loops.
+
+For example, the rule `foo -name-> bar/baz` would convert `a/foo` into
+`a/bar/baz` and `a/foo/b/c/foo` into `a/bar/baz/b/c/bar/baz`. The rule `foo
+-name-> !` would ignore all directories and files named `foo`.
 
 ### The `-exact->` arrow
 
@@ -244,14 +248,6 @@ but `foo/bar/xyz` would be unaffected. Also, `foo -exact-> !` would only ignore
 `foo`, but not its contents (if it has any). The examples below show why this is
 useful.
 
-### The `-name-exact->` arrow
-
-The `-name-exact->` arrow works similar to the `-exact->` arrow, but pretends it
-is in the same directory as the file or directory it is applied to. For example,
-the rule `bar -name-exact-> baz` would convert `foo/bar` into `foo/baz` but
-`foo/bar/xyz` would be unaffected. The rule `foo --> !` would ignore only ignore
-files and directories named `foo`, but not their contents.
-
 ### The `-re->` arrow
 
 The `-re->` arrow uses regular expressions. `SOURCE` is a regular expression
@@ -275,8 +271,14 @@ example `{g2.lower()}` or `{g3.replace(' ', '_')}`.
 
 ### The `-name-re->` arrow
 
-The `-name-re>` arrow works similar to the `-re->` arrow, but pretends it is in
-the same directory as the file or directory it is applied to.
+The `-name-re>` arrow is like a combination of the `-name->` and `-re->` arrows.
+Instead of the `SOURCE` being the name of a directory or file, it's a regex that
+is matched against the names of directories and files. `TARGET` works like the
+`-re->` arrow's target.
+
+For example, the arrow `(.*)\.jpeg -name-re-> {g1}.jpg` will rename all `.jpeg`
+extensions into `.jpg`. The arrow `\..+ -name-re-> !` will ignore all files and
+directories starting with `.`.
 
 ### Example: Tutorials
 

From 80acc4b50d8e41f75ef83ad79214cc57cdd00c61 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 27 May 2021 13:42:49 +0200
Subject: [PATCH 277/524] Implement new name arrows

---
 PFERD/transformer.py | 39 +++++++++++++++++++++++++++++----------
 1 file changed, 29 insertions(+), 10 deletions(-)

diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index 23844f8..f147fb2 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -85,12 +85,29 @@ class NameRule(Rule):
         self._subrule = subrule
 
     def transform(self, path: PurePath) -> Union[PurePath, bool]:
-        name = PurePath(*path.parts[-1:])
-        result = self._subrule.transform(name)
-        if isinstance(result, PurePath):
-            return path.parent / result
-        else:
+        matched = False
+        result = PurePath()
+
+        for part in path.parts:
+            part_result = self._subrule.transform(PurePath(part))
+            if isinstance(part_result, PurePath):
+                matched = True
+                result /= part_result
+            elif part_result:
+                # If any subrule call ignores its path segment, the entire path
+                # should be ignored
+                return True
+            else:
+                # The subrule doesn't modify this segment, but maybe other
+                # segments
+                result /= part
+
+        if matched:
             return result
+        else:
+            # The subrule has modified no segments, so this name version of it
+            # doesn't match
+            return False
 
 
 class ReRule(Rule):
@@ -278,6 +295,7 @@ def parse_rule(line: Line) -> Rule:
     if isinstance(left, bool):
         line.index = leftindex
         raise RuleParseError(line, "Left side can't be '!'")
+    leftpath = PurePath(left)
 
     # Parse arrow
     parse_whitespace(line)
@@ -300,13 +318,14 @@ def parse_rule(line: Line) -> Rule:
 
     # Dispatch
     if arrowname == "":
-        return NormalRule(PurePath(left), rightpath)
+        return NormalRule(leftpath, rightpath)
     elif arrowname == "name":
-        return NameRule(NormalRule(PurePath(left), rightpath))
+        if len(leftpath.parts) > 1:
+            line.index = leftindex
+            raise RuleParseError(line, "SOURCE must be a single name, not multiple segments")
+        return NameRule(ExactRule(leftpath, rightpath))
     elif arrowname == "exact":
-        return ExactRule(PurePath(left), rightpath)
-    elif arrowname == "name-exact":
-        return NameRule(ExactRule(PurePath(left), rightpath))
+        return ExactRule(leftpath, rightpath)
     elif arrowname == "re":
         return ReRule(left, right)
     elif arrowname == "name-re":

From 6fa9cfd4c35566f530fc2a03a4713309f3475db1 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 27 May 2021 13:56:01 +0200
Subject: [PATCH 278/524] Fix error when capturing group is None

---
 CONFIG.md            |  4 +++-
 PFERD/transformer.py | 18 +++++++++++++++---
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index fcc263a..f31e7f6 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -258,7 +258,9 @@ are available in `TARGET` for formatting.
 be referred to as `{g<n>}` (e. g. `{g3}`). `{g0}` refers to the original path.
 If capturing group *n*'s contents are a valid integer, the integer value is
 available as `{i<n>}` (e. g. `{i3}`). If capturing group *n*'s contents are a
-valid float, the float value is available as `{f<n>}` (e. g. `{f3}`).
+valid float, the float value is available as `{f<n>}` (e. g. `{f3}`). If a
+capturing group is not present (e. g. when matching the string `cd` with the
+regex `(ab)?cd`), the corresponding variables are not defined.
 
 Python's format string syntax has rich options for formatting its arguments. For
 example, to left-pad the capturing group 3 with the digit `0` to width 5, you
diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index f147fb2..83ffde4 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -7,7 +7,7 @@ import ast
 import re
 from abc import ABC, abstractmethod
 from pathlib import PurePath
-from typing import Dict, Optional, Union
+from typing import Dict, Optional, Sequence, Union
 
 from .logging import log
 from .utils import fmt_path
@@ -122,8 +122,14 @@ class ReRule(Rule):
 
             vars: Dict[str, Union[str, int, float]] = {}
 
-            groups = [match[0]] + list(match.groups())
+            # For some reason, mypy thinks that "groups" has type List[str].
+            # But since elements of "match.groups()" can be None, mypy is
+            # wrong.
+            groups: Sequence[Optional[str]] = [match[0]] + list(match.groups())
             for i, group in enumerate(groups):
+                if group is None:
+                    continue
+
                 vars[f"g{i}"] = group
 
                 try:
@@ -352,7 +358,13 @@ class Transformer:
         for i, (line, rule) in enumerate(self._rules):
             log.explain(f"Testing rule {i+1}: {line}")
 
-            result = rule.transform(path)
+            try:
+                result = rule.transform(path)
+            except Exception as e:
+                log.warn(f"Error while testing rule {i+1}: {line}")
+                log.warn_contd(str(e))
+                continue
+
             if isinstance(result, PurePath):
                 log.explain(f"Match found, transformed path to {fmt_path(result)}")
                 return result

From 19eed5bdffa9e6f742c85459faf64b80173b4e07 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 27 May 2021 00:31:36 +0200
Subject: [PATCH 279/524] Fix authentication logic conflicts with videos

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 60be6d8..0b20d1c 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -360,7 +360,9 @@ class KitIliasWebCrawler(HttpCrawler):
                 page = IliasPage(await self._get_page(element.url), element.url, element)
                 real_element = page.get_child_elements()[0]
 
-                await self._stream_from_url(real_element.url, sink, bar)
+                log.explain(f"Streaming video from real url {real_element.url}")
+
+                await self._stream_from_url(real_element.url, sink, bar, is_video=True)
 
         await impl()
 
@@ -374,15 +376,19 @@ class KitIliasWebCrawler(HttpCrawler):
         async def impl() -> None:
             assert dl  # The function is only reached when dl is not None
             async with dl as (bar, sink):
-                await self._stream_from_url(element.url, sink, bar)
+                await self._stream_from_url(element.url, sink, bar, is_video=False)
 
         await impl()
 
-    async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar) -> None:
+    async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar, is_video: bool) -> None:
         async def try_stream() -> bool:
-            async with self.session.get(url, allow_redirects=False) as resp:
-                # Redirect means we weren't authenticated
-                if hdrs.LOCATION in resp.headers:
+            async with self.session.get(url, allow_redirects=is_video) as resp:
+                if not is_video:
+                    # Redirect means we weren't authenticated
+                    if hdrs.LOCATION in resp.headers:
+                        return False
+                # we wanted a video but got HTML
+                if is_video and "html" in resp.content_type:
                     return False
 
                 if resp.content_length:

From 5beb4d9a2d98949b01afd021f8c4b2157bd67281 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 27 May 2021 00:55:46 +0200
Subject: [PATCH 280/524] Fix renaming conflict with multi-stage video elements

---
 PFERD/crawl/ilias/kit_ilias_html.py        | 20 +++++++++++++++-----
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 10 +++++++---
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 4bc3161..afb7005 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -62,15 +62,17 @@ class IliasPage:
         log.explain("Page is a normal folder, searching for elements")
         return self._find_normal_entries()
 
+    def get_next_stage_url(self) -> Optional[str]:
+        if self._is_ilias_opencast_embedding():
+            return self.get_child_elements()[0].url
+        return None
+
     def _is_video_player(self) -> bool:
         return "paella_config_file" in str(self._soup)
 
     def _is_video_listing(self) -> bool:
-        # ILIAS fluff around it
-        if self._soup.find(id="headerimage"):
-            element: Tag = self._soup.find(id="headerimage")
-            if "opencast" in element.attrs["src"].lower():
-                return True
+        if self._is_ilias_opencast_embedding():
+            return True
 
         # Raw listing without ILIAS fluff
         video_element_table: Tag = self._soup.find(
@@ -78,6 +80,14 @@ class IliasPage:
         )
         return video_element_table is not None
 
+    def _is_ilias_opencast_embedding(self) -> bool:
+        # ILIAS fluff around the real opencast html
+        if self._soup.find(id="headerimage"):
+            element: Tag = self._soup.find(id="headerimage")
+            if "opencast" in element.attrs["src"].lower():
+                return True
+        return False
+
     def _is_exercise_file(self) -> bool:
         # we know it from before
         if self._page_type == IliasElementType.EXERCISE:
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 0b20d1c..12a6e79 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -242,10 +242,14 @@ class KitIliasWebCrawler(HttpCrawler):
         async def gather_elements() -> None:
             elements.clear()
             async with cl:
-                soup = await self._get_page(url)
+                next_stage_url: Optional[str] = url
                 log.explain_topic(f"Parsing HTML page for {fmt_path(path)}")
-                log.explain(f"URL: {url}")
-                page = IliasPage(soup, url, parent)
+
+                while next_stage_url:
+                    soup = await self._get_page(next_stage_url)
+                    log.explain(f"URL: {url}")
+                    page = IliasPage(soup, url, parent)
+                    next_stage_url = page.get_next_stage_url()
 
                 elements.extend(page.get_child_elements())
 

From 474aa7e1cc154b77899cef1c51023d29459f7ee5 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 27 May 2021 15:41:00 +0000
Subject: [PATCH 281/524] Use sorted path order when debugging transforms

---
 PFERD/crawl/crawler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index aa0f81c..87d362f 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -343,7 +343,7 @@ class Crawler(ABC):
             return
 
         seen: Set[PurePath] = set()
-        for known in self.prev_report.known_files:
+        for known in sorted(self.prev_report.known_files):
             looking_at = list(reversed(known.parents)) + [known]
             for path in looking_at:
                 if path in seen:

From 1ca6740e052166397b76b9eb9df3e7c33cf52efc Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 27 May 2021 17:59:22 +0200
Subject: [PATCH 282/524] Improve log messages when parsing ILIAS HTML

Previously some logs were split around an "await", which isn't a great
idea.
---
 PFERD/crawl/ilias/kit_ilias_html.py        | 6 +++---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index afb7005..a2f30e1 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -142,7 +142,7 @@ class IliasPage:
             url: str = self._abs_url_from_link(content_link)
             query_params = {"limit": "800", "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
             url = url_set_query_params(url, query_params)
-            log.explain("Found ILIAS redirection page, following it as a new entry")
+            log.explain("Found ILIAS video frame page, fetching actual content next")
             return [IliasPageElement(IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED, url, "")]
 
         is_paginated = self._soup.find(id=re.compile(r"tab_page_sel.+")) is not None
@@ -205,7 +205,7 @@ class IliasPage:
 
         video_url = self._abs_url_from_link(link)
 
-        log.explain(f"Found video {video_name!r} at {video_url!r}")
+        log.explain(f"Found video {video_name!r} at {video_url}")
         return IliasPageElement(IliasElementType.VIDEO_PLAYER, video_url, video_name, modification_time)
 
     def _find_exercise_entries(self) -> List[IliasPageElement]:
@@ -436,7 +436,7 @@ class IliasPage:
 
         _unexpected_html_warning()
         log.warn_contd(
-            f"Tried to figure out element type, but failed for {str(element_name)!r} / {link_element!r})"
+            f"Tried to figure out element type, but failed for {element_name!r} / {link_element!r})"
         )
         return None
 
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 12a6e79..fbbfc1b 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -243,11 +243,11 @@ class KitIliasWebCrawler(HttpCrawler):
             elements.clear()
             async with cl:
                 next_stage_url: Optional[str] = url
-                log.explain_topic(f"Parsing HTML page for {fmt_path(path)}")
 
                 while next_stage_url:
                     soup = await self._get_page(next_stage_url)
-                    log.explain(f"URL: {url}")
+                    log.explain_topic(f"Parsing HTML page for {fmt_path(path)}")
+                    log.explain(f"URL: {next_stage_url}")
                     page = IliasPage(soup, url, parent)
                     next_stage_url = page.get_next_stage_url()
 

From d65efed561f5131b95bed7ac61c5f0036c1609f3 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 28 May 2021 21:21:04 +0000
Subject: [PATCH 283/524] Slightly adjust phrasing

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 8e3b387..d25e86f 100644
--- a/README.md
+++ b/README.md
@@ -65,7 +65,7 @@ command.
 
 Another good way to see what PFERD is doing is the `--explain` option. When
 enabled, PFERD explains in detail what it is doing and why. This can help with
-debugging your own config, for example.
+debugging your own config.
 
 If you don't want to run all crawlers from your config file, you can specify the
 crawlers you want to run with `--crawler` or `-C`, like this:

From b78eb64f3d00f17a60ff68b6e96ec7356ad9eddc Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 29 May 2021 21:38:36 +0200
Subject: [PATCH 284/524] Document versioning scheme

---
 CHANGELOG.md | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8460a94..de912c9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,9 +1,24 @@
 # Changelog
 
-All notable changes to this project will be documented in this file.
+All notable changes to this project will be documented in this file. The format
+is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
-The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
-and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+This project has its own custom versioning scheme. Version numbers consist of
+three parts (e. g. `3.1.5`).
+- The first number is increased on major rewrites or changes. What classifies as
+  a major change is up to the maintainers. This is pretty rare and a PFERD
+  version 4 should hopefully not be necessary.
+- The second number is increased on backwards-incompatible changes in behaviour.
+  This refers to any change that would make an existing setup behave differently
+  (e. g. renaming options or changing crawler behaviour). If this number is
+  increased, it may be necessary for you to adapt your own setup.
+- The third number is increased on backwards-compatible changes (e. g. adding
+  new options or commands, changing documentation, fixing bugs). Updates that
+  only increase this number should be safe and not require manual intervention.
+
+We will try to correctly classify changes as backwards-compatible or
+backwards-incompatible, but may occasionally make mistakes or stumble across
+ambiguous situations.
 
 ## Unreleased
 

From 84f775013fcbf79e6a31d29f1fb8cbd5351dedfa Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Mon, 31 May 2021 11:41:20 +0200
Subject: [PATCH 285/524] Use event loop workaround only on windows

This avoids an unnecessary one-second sleep on other platforms. However, a
better "fix" for this sleep would be a less ugly workaround on windows.
---
 PFERD/__main__.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index 26a1dc4..9d61264 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -1,6 +1,7 @@
 import argparse
 import asyncio
 import configparser
+import os
 import sys
 from pathlib import Path
 
@@ -118,10 +119,18 @@ def main() -> None:
         sys.exit(1)
 
     try:
-        loop = asyncio.get_event_loop()
-        loop.run_until_complete(pferd.run(args.debug_transforms))
-        loop.run_until_complete(asyncio.sleep(1))
-        loop.close()
+        if os.name == "nt":
+            # A "workaround" for the windows event loop somehow crashing after
+            # asyncio.run() completes. See:
+            # https://bugs.python.org/issue39232
+            # https://github.com/encode/httpx/issues/914#issuecomment-780023632
+            # TODO Fix this properly
+            loop = asyncio.get_event_loop()
+            loop.run_until_complete(pferd.run(args.debug_transforms))
+            loop.run_until_complete(asyncio.sleep(1))
+            loop.close()
+        else:
+            asyncio.run(pferd.run(args.debug_transforms))
     except ConfigOptionError as e:
         log.unlock()
         log.error(str(e))

From 1dd24551a589ef4d2f2ae25c280ad57ab63aceae Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Mon, 31 May 2021 11:44:17 +0200
Subject: [PATCH 286/524] Add link to repo in --version output

---
 PFERD/cli/parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/cli/parser.py b/PFERD/cli/parser.py
index 269a19a..754b8ad 100644
--- a/PFERD/cli/parser.py
+++ b/PFERD/cli/parser.py
@@ -145,7 +145,7 @@ PARSER.set_defaults(command=None)
 PARSER.add_argument(
     "--version",
     action="version",
-    version=f"{NAME} {VERSION}",
+    version=f"{NAME} {VERSION} (https://github.com/Garmelon/PFERD)",
 )
 PARSER.add_argument(
     "--config", "-c",

From 17879a7f69c2746b080e696949201e4092c46f4f Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Mon, 31 May 2021 11:50:20 +0200
Subject: [PATCH 287/524] Print box around message for unexpected exceptions

---
 PFERD/logging.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/PFERD/logging.py b/PFERD/logging.py
index 5025d88..1a07b3e 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -8,6 +8,7 @@ from typing import AsyncIterator, ContextManager, Iterator, List, Optional
 from rich.console import Console, RenderGroup
 from rich.live import Live
 from rich.markup import escape
+from rich.panel import Panel
 from rich.progress import (BarColumn, DownloadColumn, Progress, TaskID, TextColumn, TimeRemainingColumn,
                            TransferSpeedColumn)
 from rich.table import Column
@@ -170,10 +171,13 @@ class Log:
             self.error_contd("")
             self.error_contd(traceback.format_exc())
 
-        self.error_contd("""
+        # Our print function doesn't take types other than strings, but the
+        # underlying rich.print function does. This call is a special case
+        # anyways, and we're calling it internally, so this should be fine.
+        self.print(Panel.fit("""
 Please copy your program output and send it to the PFERD maintainers, either
 directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
-        """.strip())
+        """.strip()))  # type: ignore
 
     def explain_topic(self, text: str) -> None:
         """

From 64a29607519fdaca1a17ee1bca3676fc83f33920 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Mon, 31 May 2021 12:21:05 +0200
Subject: [PATCH 288/524] Align paths in status messages and progress bars

Also print "Ignored" when paths are ignored due to transforms
---
 PFERD/crawl/crawler.py | 18 +++++++-----------
 PFERD/logging.py       | 32 +++++++++++++++++++++++++++-----
 PFERD/output_dir.py    | 12 +++++-------
 3 files changed, 39 insertions(+), 23 deletions(-)

diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index 87d362f..ce69967 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -5,8 +5,6 @@ from datetime import datetime
 from pathlib import Path, PurePath
 from typing import Any, Awaitable, Callable, Dict, List, Optional, Sequence, Set, Tuple, TypeVar
 
-from rich.markup import escape
-
 from ..auth import Authenticator
 from ..config import Config, Section
 from ..deduplicator import Deduplicator
@@ -104,12 +102,9 @@ class CrawlToken(ReusableAsyncContextManager[ProgressBar]):
         return self._path
 
     async def _on_aenter(self) -> ProgressBar:
-        bar_desc = f"[bold bright_cyan]Crawling[/] {escape(fmt_path(self._path))}"
-        after_desc = f"[bold cyan]Crawled[/] {escape(fmt_path(self._path))}"
-
-        self._stack.callback(lambda: log.status(after_desc))
+        self._stack.callback(lambda: log.status("[bold cyan]", "Crawled", fmt_path(self._path)))
         await self._stack.enter_async_context(self._limiter.limit_crawl())
-        bar = self._stack.enter_context(log.crawl_bar(bar_desc))
+        bar = self._stack.enter_context(log.crawl_bar("[bold bright_cyan]", "Crawling", fmt_path(self._path)))
 
         return bar
 
@@ -127,12 +122,11 @@ class DownloadToken(ReusableAsyncContextManager[Tuple[ProgressBar, FileSink]]):
         return self._path
 
     async def _on_aenter(self) -> Tuple[ProgressBar, FileSink]:
-        bar_desc = f"[bold bright_cyan]Downloading[/] {escape(fmt_path(self._path))}"
-        # The "Downloaded ..." message is printed in the output dir, not here
-
         await self._stack.enter_async_context(self._limiter.limit_download())
         sink = await self._stack.enter_async_context(self._fs_token)
-        bar = self._stack.enter_context(log.download_bar(bar_desc))
+        # The "Downloaded ..." message is printed in the output dir, not here
+        bar = self._stack.enter_context(log.download_bar("[bold bright_cyan]", "Downloading",
+                                                         fmt_path(self._path)))
 
         return bar, sink
 
@@ -273,6 +267,7 @@ class Crawler(ABC):
 
         if self._transformer.transform(path) is None:
             log.explain("Answer: No")
+            log.status("[bold bright_black]", "Ignored", fmt_path(path))
             return None
 
         log.explain("Answer: Yes")
@@ -291,6 +286,7 @@ class Crawler(ABC):
         transformed_path = self._transformer.transform(path)
         if transformed_path is None:
             log.explain("Answer: No")
+            log.status("[bold bright_black]", "Ignored", fmt_path(path))
             return None
 
         fs_token = await self._output_dir.download(path, transformed_path, mtime, redownload, on_conflict)
diff --git a/PFERD/logging.py b/PFERD/logging.py
index 1a07b3e..32e5268 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -28,6 +28,8 @@ class ProgressBar:
 
 
 class Log:
+    STATUS_WIDTH = 11
+
     def __init__(self) -> None:
         self.console = Console(highlight=False)
 
@@ -195,13 +197,15 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
         if self.output_explain:
             self.print(f"  {escape(text)}")
 
-    def status(self, text: str) -> None:
+    def status(self, style: str, action: str, text: str) -> None:
         """
-        Print a status update while crawling. Allows markup.
+        Print a status update while crawling. Allows markup in the "style"
+        argument which will be applied to the "action" string.
         """
 
         if self.output_status:
-            self.print(text)
+            action = escape(f"{action:<{self.STATUS_WIDTH}}")
+            self.print(f"{style}{action}[/] {escape(text)}")
 
     def report(self, text: str) -> None:
         """
@@ -233,16 +237,34 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
 
     def crawl_bar(
             self,
-            description: str,
+            style: str,
+            action: str,
+            text: str,
             total: Optional[float] = None,
     ) -> ContextManager[ProgressBar]:
+        """
+        Allows markup in the "style" argument which will be applied to the
+        "action" string.
+        """
+
+        action = escape(f"{action:<{self.STATUS_WIDTH}}")
+        description = f"{style}{action}[/] {text}"
         return self._bar(self._crawl_progress, description, total)
 
     def download_bar(
             self,
-            description: str,
+            style: str,
+            action: str,
+            text: str,
             total: Optional[float] = None,
     ) -> ContextManager[ProgressBar]:
+        """
+        Allows markup in the "style" argument which will be applied to the
+        "action" string.
+        """
+
+        action = escape(f"{action:<{self.STATUS_WIDTH}}")
+        description = f"{style}{action}[/] {text}"
         return self._bar(self._download_progress, description, total)
 
 
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 304101a..0fb9911 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -11,8 +11,6 @@ from enum import Enum
 from pathlib import Path, PurePath
 from typing import BinaryIO, Iterator, Optional, Tuple
 
-from rich.markup import escape
-
 from .logging import log
 from .report import Report, ReportLoadError
 from .utils import ReusableAsyncContextManager, fmt_path, fmt_real_path, prompt_yes_no
@@ -425,7 +423,7 @@ class OutputDirectory:
 
     async def _after_download(self, info: DownloadInfo) -> None:
         with self._ensure_deleted(info.tmp_path):
-            log.status(f"[bold cyan]Downloaded[/] {fmt_path(info.remote_path)}")
+            log.status("[bold cyan]", "Downloaded", fmt_path(info.remote_path))
             log.explain_topic(f"Processing downloaded file for {fmt_path(info.path)}")
 
             changed = False
@@ -456,10 +454,10 @@ class OutputDirectory:
             self._update_metadata(info)
 
             if changed:
-                log.status(f"[bold bright_yellow]Changed[/] {escape(fmt_path(info.path))}")
+                log.status("[bold bright_yellow]", "Changed", fmt_path(info.path))
                 self._report.change_file(info.path)
             else:
-                log.status(f"[bold bright_green]Added[/] {escape(fmt_path(info.path))}")
+                log.status("[bold bright_green]", "Added", fmt_path(info.path))
                 self._report.add_file(info.path)
 
     async def cleanup(self) -> None:
@@ -489,12 +487,12 @@ class OutputDirectory:
         if await self._conflict_delete_lf(self._on_conflict, pure):
             try:
                 path.unlink()
-                log.status(f"[bold bright_magenta]Deleted[/] {escape(fmt_path(pure))}")
+                log.status("[bold bright_magenta]", "Deleted", fmt_path(pure))
                 self._report.delete_file(pure)
             except OSError:
                 pass
         else:
-            log.status(f"[bold bright_magenta]Not deleted[/] {escape(fmt_path(pure))}")
+            log.status("[bold bright_magenta]", "Not deleted", fmt_path(pure))
             self._report.not_delete_file(pure)
 
     def load_prev_report(self) -> None:

From 7b062883f619238b9992834c39484e9973a172f9 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Mon, 31 May 2021 12:28:11 +0200
Subject: [PATCH 289/524] Use raw paths for --debug-transforms

Previously, the already-transformed paths were used, which meant that
--debug-transforms was cumbersome to use (as you had to remove all transforms
and crawl once before getting useful results).
---
 PFERD/crawl/crawler.py |  4 +++-
 PFERD/report.py        | 16 ++++++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index ce69967..e990f16 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -264,6 +264,7 @@ class Crawler(ABC):
     async def crawl(self, path: PurePath) -> Optional[CrawlToken]:
         log.explain_topic(f"Decision: Crawl {fmt_path(path)}")
         path = self._deduplicator.mark(path)
+        self._output_dir.report.found(path)
 
         if self._transformer.transform(path) is None:
             log.explain("Answer: No")
@@ -282,6 +283,7 @@ class Crawler(ABC):
     ) -> Optional[DownloadToken]:
         log.explain_topic(f"Decision: Download {fmt_path(path)}")
         path = self._deduplicator.mark(path)
+        self._output_dir.report.found(path)
 
         transformed_path = self._transformer.transform(path)
         if transformed_path is None:
@@ -339,7 +341,7 @@ class Crawler(ABC):
             return
 
         seen: Set[PurePath] = set()
-        for known in sorted(self.prev_report.known_files):
+        for known in sorted(self.prev_report.found_paths):
             looking_at = list(reversed(known.parents)) + [known]
             for path in looking_at:
                 if path in seen:
diff --git a/PFERD/report.py b/PFERD/report.py
index b47490f..919bb35 100644
--- a/PFERD/report.py
+++ b/PFERD/report.py
@@ -50,12 +50,22 @@ class Report:
     """
 
     def __init__(self) -> None:
+        # Paths found by the crawler, untransformed
+        self.found_paths: Set[PurePath] = set()
+
+        # Files reserved for metadata files (e. g. the report file or cookies)
+        # that can't be overwritten by user transforms and won't be cleaned up
+        # at the end.
         self.reserved_files: Set[PurePath] = set()
+
+        # Files found by the crawler, transformed. Only includes files that
+        # were downloaded (or a download was attempted)
         self.known_files: Set[PurePath] = set()
 
         self.added_files: Set[PurePath] = set()
         self.changed_files: Set[PurePath] = set()
         self.deleted_files: Set[PurePath] = set()
+        # Files that should have been deleted by the cleanup but weren't
         self.not_deleted_files: Set[PurePath] = set()
 
     @staticmethod
@@ -84,6 +94,8 @@ class Report:
             raise ReportLoadError("Incorrect format: Root is not an object")
 
         self = cls()
+        for elem in self._get_list_of_strs(data, "found"):
+            self.found(PurePath(elem))
         for elem in self._get_list_of_strs(data, "reserved"):
             self.mark_reserved(PurePath(elem))
         for elem in self._get_list_of_strs(data, "known"):
@@ -105,6 +117,7 @@ class Report:
         """
 
         data = {
+            "found": [str(path) for path in sorted(self.found_paths)],
             "reserved": [str(path) for path in sorted(self.reserved_files)],
             "known": [str(path) for path in sorted(self.known_files)],
             "added": [str(path) for path in sorted(self.added_files)],
@@ -117,6 +130,9 @@ class Report:
             json.dump(data, f, indent=2, sort_keys=True)
             f.write("\n")  # json.dump doesn't do this
 
+    def found(self, path: PurePath) -> None:
+        self.found_paths.add(path)
+
     def mark_reserved(self, path: PurePath) -> None:
         if path in self.marked:
             raise RuntimeError("Trying to reserve an already reserved file")

From 921cec7ddcd183414fa4f4d12cb5ae2dcd14150e Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Mon, 31 May 2021 12:49:04 +0200
Subject: [PATCH 290/524] Bump version to 3.0.0

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index de912c9..5f9ca72 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.0.0 - 2021-05-31
+
 ### Added
 - Proper config files
 - Concurrent crawling

From 1fba96abcb09c35aa47d3a94c0c758b2457efaa7 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 31 May 2021 18:00:42 +0200
Subject: [PATCH 291/524] Fix exercise date parsing for non-group submissions

ILIAS apparently changes the order of the fields as it sees fit, so we
now try to parse *every* column, starting at from the right, as a date.
The first column that parses successfully is then used.
---
 CHANGELOG.md                        |  3 +++
 PFERD/crawl/ilias/kit_ilias_html.py | 17 +++++++++++------
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5f9ca72..34c997a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Fixed
+- Date parsing now also works correctly in non-group exercises
+
 ## 3.0.0 - 2021-05-31
 
 ### Added
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index a2f30e1..64491f9 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -230,12 +230,16 @@ class IliasPage:
             parent_row: Tag = link.findParent("tr")
             children: List[Tag] = parent_row.findChildren("td")
 
-            # <checkbox> <name> <uploader> <date> <download>
-            #     0         1        2       3        4
             name = _sanitize_path_name(children[1].getText().strip())
-            date = demangle_date(children[3].getText().strip())
-
             log.explain(f"Found exercise detail entry {name!r}")
+
+            for child in reversed(children):
+                date = demangle_date(child.getText().strip(), fail_silently=True)
+                if date is not None:
+                    break
+            if date is None:
+                log.warn(f"Date parsing failed for exercise entry {name!r}")
+
             results.append(IliasPageElement(
                 IliasElementType.FILE,
                 self._abs_url_from_link(link),
@@ -522,7 +526,7 @@ german_months = ['Jan', 'Feb', 'Mär', 'Apr', 'Mai', 'Jun', 'Jul', 'Aug', 'Sep',
 english_months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
 
 
-def demangle_date(date_str: str) -> Optional[datetime]:
+def demangle_date(date_str: str, fail_silently: bool = False) -> Optional[datetime]:
     """
     Demangle a given date in one of the following formats:
     "Gestern, HH:MM"
@@ -554,7 +558,8 @@ def demangle_date(date_str: str) -> Optional[datetime]:
 
         return datetime(year, month, day, hour, minute)
     except Exception:
-        log.warn(f"Date parsing failed for {date_str!r}")
+        if not fail_silently:
+            log.warn(f"Date parsing failed for {date_str!r}")
         return None
 
 

From 9d5ec84b91fcc3ce546710eaf2f40c37679ea8a1 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Mon, 31 May 2021 17:55:56 +0200
Subject: [PATCH 292/524] Add credential file authenticator

---
 CHANGELOG.md                  |  3 +++
 CONFIG.md                     | 13 +++++++++++
 PFERD/auth/__init__.py        |  5 +++-
 PFERD/auth/credential_file.py | 43 +++++++++++++++++++++++++++++++++++
 4 files changed, 63 insertions(+), 1 deletion(-)
 create mode 100644 PFERD/auth/credential_file.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 34c997a..1dffa1e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Added
+- `credential-file` authenticator
+
 ### Fixed
 - Date parsing now also works correctly in non-group exercises
 
diff --git a/CONFIG.md b/CONFIG.md
index f31e7f6..7826b04 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -180,6 +180,19 @@ via the terminal.
 - `username`: The username. (Optional)
 - `password`: The password. (Optional)
 
+### The `credential-file` authenticator
+
+This authenticator reads a username and a password from a credential file. The
+credential file has exactly two lines (trailing newline optional). The first
+line starts with `username=` and contains the username, the second line starts
+with `password=` and contains the password. The username and password may
+contain any characters except a line break.
+
+```
+username=AzureDiamond
+password=hunter2
+```
+
 ### The `keyring` authenticator
 
 This authenticator uses the system keyring to store passwords. The username can
diff --git a/PFERD/auth/__init__.py b/PFERD/auth/__init__.py
index 6e7fd3a..39f7f5c 100644
--- a/PFERD/auth/__init__.py
+++ b/PFERD/auth/__init__.py
@@ -3,6 +3,7 @@ from typing import Callable, Dict
 
 from ..config import Config
 from .authenticator import Authenticator, AuthError, AuthSection  # noqa: F401
+from .credential_file import CredentialFileAuthenticator, CredentialFileAuthSection
 from .keyring import KeyringAuthenticator, KeyringAuthSection
 from .simple import SimpleAuthenticator, SimpleAuthSection
 from .tfa import TfaAuthenticator
@@ -14,10 +15,12 @@ AuthConstructor = Callable[[
 ], Authenticator]
 
 AUTHENTICATORS: Dict[str, AuthConstructor] = {
+    "credential-file": lambda n, s, c:
+        CredentialFileAuthenticator(n, CredentialFileAuthSection(s)),
     "simple": lambda n, s, c:
         SimpleAuthenticator(n, SimpleAuthSection(s)),
     "tfa": lambda n, s, c:
         TfaAuthenticator(n),
     "keyring": lambda n, s, c:
-        KeyringAuthenticator(n, KeyringAuthSection(s))
+        KeyringAuthenticator(n, KeyringAuthSection(s)),
 }
diff --git a/PFERD/auth/credential_file.py b/PFERD/auth/credential_file.py
new file mode 100644
index 0000000..540b65b
--- /dev/null
+++ b/PFERD/auth/credential_file.py
@@ -0,0 +1,43 @@
+from pathlib import Path
+from typing import Tuple
+
+from ..utils import fmt_real_path
+from .authenticator import Authenticator, AuthLoadError, AuthSection
+
+
+class CredentialFileAuthSection(AuthSection):
+    def path(self) -> Path:
+        value = self.s.get("path")
+        if value is None:
+            self.missing_value("path")
+        return Path(value)
+
+
+class CredentialFileAuthenticator(Authenticator):
+    def __init__(self, name: str, section: CredentialFileAuthSection) -> None:
+        super().__init__(name)
+
+        path = section.path()
+        try:
+            with open(path) as f:
+                lines = list(f)
+        except OSError as e:
+            raise AuthLoadError(f"No credential file at {fmt_real_path(path)}") from e
+
+        if len(lines) != 2:
+            raise AuthLoadError("Credential file must be two lines long")
+        [uline, pline] = lines
+        uline = uline[:-1]  # Remove trailing newline
+        if pline.endswith("\n"):
+            pline = pline[:-1]
+
+        if not uline.startswith("username="):
+            raise AuthLoadError("First line must start with 'username='")
+        if not pline.startswith("password="):
+            raise AuthLoadError("Second line must start with 'password='")
+
+        self._username = uline[:9]
+        self._password = pline[:9]
+
+    async def credentials(self) -> Tuple[str, str]:
+        return self._username, self._password

From 1ce32d2f18881d3484889c6a20758dbf0a8d59d9 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Mon, 31 May 2021 18:19:05 +0200
Subject: [PATCH 293/524] Add CLI option for credential file auth to
 kit-ilias-web

---
 CHANGELOG.md                       |  1 +
 PFERD/__main__.py                  |  5 ++++-
 PFERD/cli/__init__.py              |  5 +++--
 PFERD/cli/command_kit_ilias_web.py | 23 +++++++++++++++++++----
 PFERD/cli/parser.py                |  4 ++++
 5 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1dffa1e..451853b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ ambiguous situations.
 
 ### Added
 - `credential-file` authenticator
+- `--credential-file` option for `kit-ilias-web` command
 
 ### Fixed
 - Date parsing now also works correctly in non-group exercises
diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index 9d61264..1cca8b1 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -5,7 +5,7 @@ import os
 import sys
 from pathlib import Path
 
-from .cli import PARSER, load_default_section
+from .cli import PARSER, ParserLoadError, load_default_section
 from .config import Config, ConfigDumpError, ConfigLoadError, ConfigOptionError
 from .logging import log
 from .pferd import Pferd, PferdLoadError
@@ -36,6 +36,9 @@ def load_config(args: argparse.Namespace) -> Config:
         log.error(str(e))
         log.error_contd(e.reason)
         sys.exit(1)
+    except ParserLoadError as e:
+        log.error(str(e))
+        sys.exit(1)
 
 
 def configure_logging_from_args(args: argparse.Namespace) -> None:
diff --git a/PFERD/cli/__init__.py b/PFERD/cli/__init__.py
index f9cb5d2..d70ecd9 100644
--- a/PFERD/cli/__init__.py
+++ b/PFERD/cli/__init__.py
@@ -1,11 +1,12 @@
 # isort: skip_file
 
 # The order of imports matters because each command module registers itself
-# with the parser from ".parser". Because of this, isort is disabled for this
+# with the parser from ".parser" and the import order affects the order in
+# which they appear in the help. Because of this, isort is disabled for this
 # file. Also, since we're reexporting or just using the side effect of
 # importing itself, we get a few linting warnings, which we're disabling as
 # well.
 
 from . import command_local  # noqa: F401 imported but unused
 from . import command_kit_ilias_web  # noqa: F401 imported but unused
-from .parser import PARSER, load_default_section  # noqa: F401 imported but unused
+from .parser import PARSER, ParserLoadError, load_default_section  # noqa: F401 imported but unused
diff --git a/PFERD/cli/command_kit_ilias_web.py b/PFERD/cli/command_kit_ilias_web.py
index c21b6a4..12803a6 100644
--- a/PFERD/cli/command_kit_ilias_web.py
+++ b/PFERD/cli/command_kit_ilias_web.py
@@ -4,7 +4,8 @@ from pathlib import Path
 
 from ..crawl.ilias.file_templates import Links
 from ..logging import log
-from .parser import CRAWLER_PARSER, SUBPARSERS, BooleanOptionalAction, load_crawler, show_value_error
+from .parser import (CRAWLER_PARSER, SUBPARSERS, BooleanOptionalAction, ParserLoadError, load_crawler,
+                     show_value_error)
 
 SUBPARSER = SUBPARSERS.add_parser(
     "kit-ilias-web",
@@ -38,6 +39,12 @@ GROUP.add_argument(
     action=BooleanOptionalAction,
     help="use the system keyring to store and retrieve passwords"
 )
+GROUP.add_argument(
+    "--credential-file",
+    type=Path,
+    metavar="PATH",
+    help="read username and password from a credential file"
+)
 GROUP.add_argument(
     "--links",
     type=show_value_error(Links.from_string),
@@ -88,11 +95,19 @@ def load(
 
     parser["auth:ilias"] = {}
     auth_section = parser["auth:ilias"]
-    auth_section["type"] = "simple"
+    if args.credential_file is not None:
+        if args.username is not None:
+            raise ParserLoadError("--credential-file and --username can't be used together")
+        if args.keyring:
+            raise ParserLoadError("--credential-file and --keyring can't be used together")
+        auth_section["type"] = "credential-file"
+        auth_section["path"] = str(args.credential_file)
+    elif args.keyring:
+        auth_section["type"] = "keyring"
+    else:
+        auth_section["type"] = "simple"
     if args.username is not None:
         auth_section["username"] = args.username
-    if args.keyring:
-        auth_section["type"] = "keyring"
 
 
 SUBPARSER.set_defaults(command=load)
diff --git a/PFERD/cli/parser.py b/PFERD/cli/parser.py
index 754b8ad..f5fb215 100644
--- a/PFERD/cli/parser.py
+++ b/PFERD/cli/parser.py
@@ -8,6 +8,10 @@ from ..output_dir import OnConflict, Redownload
 from ..version import NAME, VERSION
 
 
+class ParserLoadError(Exception):
+    pass
+
+
 # TODO Replace with argparse version when updating to 3.9?
 class BooleanOptionalAction(argparse.Action):
     def __init__(

From 49ad1b6e463ecd135b13ae6a548eb87e95ea6c55 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Mon, 31 May 2021 18:21:18 +0200
Subject: [PATCH 294/524] Clean up authenticator code formatting

---
 PFERD/auth/__init__.py      | 4 ++--
 PFERD/auth/authenticator.py | 5 +----
 PFERD/auth/keyring.py       | 6 +-----
 PFERD/auth/simple.py        | 6 +-----
 PFERD/auth/tfa.py           | 5 +----
 5 files changed, 6 insertions(+), 20 deletions(-)

diff --git a/PFERD/auth/__init__.py b/PFERD/auth/__init__.py
index 39f7f5c..eff8370 100644
--- a/PFERD/auth/__init__.py
+++ b/PFERD/auth/__init__.py
@@ -17,10 +17,10 @@ AuthConstructor = Callable[[
 AUTHENTICATORS: Dict[str, AuthConstructor] = {
     "credential-file": lambda n, s, c:
         CredentialFileAuthenticator(n, CredentialFileAuthSection(s)),
+    "keyring": lambda n, s, c:
+        KeyringAuthenticator(n, KeyringAuthSection(s)),
     "simple": lambda n, s, c:
         SimpleAuthenticator(n, SimpleAuthSection(s)),
     "tfa": lambda n, s, c:
         TfaAuthenticator(n),
-    "keyring": lambda n, s, c:
-        KeyringAuthenticator(n, KeyringAuthSection(s)),
 }
diff --git a/PFERD/auth/authenticator.py b/PFERD/auth/authenticator.py
index fe14909..f588bc4 100644
--- a/PFERD/auth/authenticator.py
+++ b/PFERD/auth/authenticator.py
@@ -17,10 +17,7 @@ class AuthSection(Section):
 
 
 class Authenticator(ABC):
-    def __init__(
-            self,
-            name: str
-    ) -> None:
+    def __init__(self, name: str) -> None:
         """
         Initialize an authenticator from its name and its section in the config
         file.
diff --git a/PFERD/auth/keyring.py b/PFERD/auth/keyring.py
index c7ca2c2..c14f6fb 100644
--- a/PFERD/auth/keyring.py
+++ b/PFERD/auth/keyring.py
@@ -18,11 +18,7 @@ class KeyringAuthSection(AuthSection):
 
 class KeyringAuthenticator(Authenticator):
 
-    def __init__(
-            self,
-            name: str,
-            section: KeyringAuthSection,
-    ) -> None:
+    def __init__(self, name: str, section: KeyringAuthSection) -> None:
         super().__init__(name)
 
         self._username = section.username()
diff --git a/PFERD/auth/simple.py b/PFERD/auth/simple.py
index d2f4123..831c12f 100644
--- a/PFERD/auth/simple.py
+++ b/PFERD/auth/simple.py
@@ -14,11 +14,7 @@ class SimpleAuthSection(AuthSection):
 
 
 class SimpleAuthenticator(Authenticator):
-    def __init__(
-            self,
-            name: str,
-            section: SimpleAuthSection,
-    ) -> None:
+    def __init__(self, name: str, section: SimpleAuthSection) -> None:
         super().__init__(name)
 
         self._username = section.username()
diff --git a/PFERD/auth/tfa.py b/PFERD/auth/tfa.py
index 28ba150..26b1383 100644
--- a/PFERD/auth/tfa.py
+++ b/PFERD/auth/tfa.py
@@ -6,10 +6,7 @@ from .authenticator import Authenticator, AuthError
 
 
 class TfaAuthenticator(Authenticator):
-    def __init__(
-            self,
-            name: str,
-    ) -> None:
+    def __init__(self, name: str) -> None:
         super().__init__(name)
 
     async def username(self) -> str:

From f40820c41fd110465ad5cb2172e3f547a2b6fea6 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Mon, 31 May 2021 21:07:13 +0200
Subject: [PATCH 295/524] Warn if using concurrent tasks with kit-ilias-web

---
 CHANGELOG.md                               | 1 +
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 7 ++++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 451853b..51e9a5b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ ambiguous situations.
 ### Added
 - `credential-file` authenticator
 - `--credential-file` option for `kit-ilias-web` command
+- Warning if using concurrent tasks with `kit-ilias-web`
 
 ### Fixed
 - Date parsing now also works correctly in non-group exercises
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index fbbfc1b..78428e0 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -21,7 +21,6 @@ TargetType = Union[str, int]
 
 
 class KitIliasWebCrawlerSection(HttpCrawlerSection):
-
     def target(self) -> TargetType:
         target = self.s.get("target")
         if not target:
@@ -164,6 +163,12 @@ class KitIliasWebCrawler(HttpCrawler):
         auth = section.auth(authenticators)
         super().__init__(name, section, config, shared_auth=auth)
 
+        if section.tasks() > 1:
+            log.warn("""
+Please avoid using too many parallel requests as these are the KIT ILIAS
+instance's greatest bottleneck.
+            """.strip())
+
         self._shibboleth_login = KitShibbolethLogin(
             auth,
             section.tfa_auth(authenticators),

From 722970a2556e0c24584bc46fd088b24eea8fc406 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Mon, 31 May 2021 20:04:56 +0000
Subject: [PATCH 296/524] Store cookies in text-based format

Using the stdlib's http.cookie module, cookies are now stored as one
"Set-Cookie" header per line. Previously, the aiohttp.CookieJar's save() and
load() methods were used (which use pickling).
---
 CHANGELOG.md                |  3 +++
 PFERD/crawl/http_crawler.py | 26 +++++++++++++++++++++++---
 2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 51e9a5b..f7e33ae 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,9 @@ ambiguous situations.
 - `--credential-file` option for `kit-ilias-web` command
 - Warning if using concurrent tasks with `kit-ilias-web`
 
+### Changed
+- Cookies are now stored in a text-based format
+
 ### Fixed
 - Date parsing now also works correctly in non-group exercises
 
diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index 9f52c66..fa4cf29 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -1,7 +1,8 @@
 import asyncio
+import http.cookies
 import ssl
 from pathlib import Path, PurePath
-from typing import Dict, List, Optional
+from typing import Any, Dict, List, Optional
 
 import aiohttp
 import certifi
@@ -105,6 +106,25 @@ class HttpCrawler(Crawler):
 
         self._shared_cookie_jar_paths.append(self._cookie_jar_path)
 
+    def _load_cookies_from_file(self, path: Path) -> None:
+        jar: Any = http.cookies.SimpleCookie()
+        with open(path) as f:
+            for i, line in enumerate(f):
+                # Names of headers are case insensitive
+                if line[:11].lower() == "set-cookie:":
+                    jar.load(line[11:])
+                else:
+                    log.explain(f"Line {i} doesn't start with 'Set-Cookie:', ignoring it")
+        self._cookie_jar.update_cookies(jar)
+
+    def _save_cookies_to_file(self, path: Path) -> None:
+        jar: Any = http.cookies.SimpleCookie()
+        for morsel in self._cookie_jar:
+            jar[morsel.key] = morsel
+        with open(path, "w") as f:
+            f.write(jar.output(sep="\n"))
+            f.write("\n")  # A trailing newline is just common courtesy
+
     def _load_cookies(self) -> None:
         log.explain_topic("Loading cookies")
 
@@ -134,7 +154,7 @@ class HttpCrawler(Crawler):
 
         log.explain(f"Loading cookies from {fmt_real_path(cookie_jar_path)}")
         try:
-            self._cookie_jar.load(cookie_jar_path)
+            self._load_cookies_from_file(cookie_jar_path)
         except Exception as e:
             log.explain("Failed to load cookies")
             log.explain(str(e))
@@ -144,7 +164,7 @@ class HttpCrawler(Crawler):
 
         try:
             log.explain(f"Saving cookies to {fmt_real_path(self._cookie_jar_path)}")
-            self._cookie_jar.save(self._cookie_jar_path)
+            self._save_cookies_to_file(self._cookie_jar_path)
         except Exception as e:
             log.warn(f"Failed to save cookies to {fmt_real_path(self._cookie_jar_path)}")
             log.warn(str(e))

From f6b26f4eade09f65986e87019b392f3f2c019b88 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 1 Jun 2021 09:10:58 +0000
Subject: [PATCH 297/524] Fix unexpected exception when credential file not
 found

---
 PFERD/__main__.py      | 3 ++-
 PFERD/auth/__init__.py | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index 1cca8b1..5ae62bb 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -5,6 +5,7 @@ import os
 import sys
 from pathlib import Path
 
+from .auth import AuthLoadError
 from .cli import PARSER, ParserLoadError, load_default_section
 from .config import Config, ConfigDumpError, ConfigLoadError, ConfigOptionError
 from .logging import log
@@ -134,7 +135,7 @@ def main() -> None:
             loop.close()
         else:
             asyncio.run(pferd.run(args.debug_transforms))
-    except ConfigOptionError as e:
+    except (ConfigOptionError, AuthLoadError) as e:
         log.unlock()
         log.error(str(e))
         sys.exit(1)
diff --git a/PFERD/auth/__init__.py b/PFERD/auth/__init__.py
index eff8370..06b3ba4 100644
--- a/PFERD/auth/__init__.py
+++ b/PFERD/auth/__init__.py
@@ -2,7 +2,7 @@ from configparser import SectionProxy
 from typing import Callable, Dict
 
 from ..config import Config
-from .authenticator import Authenticator, AuthError, AuthSection  # noqa: F401
+from .authenticator import Authenticator, AuthError, AuthLoadError, AuthSection  # noqa: F401
 from .credential_file import CredentialFileAuthenticator, CredentialFileAuthSection
 from .keyring import KeyringAuthenticator, KeyringAuthSection
 from .simple import SimpleAuthenticator, SimpleAuthSection

From e1bda94329f5f0ff6c4a94a86f25d13efdb0d66d Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 1 Jun 2021 09:18:08 +0000
Subject: [PATCH 298/524] Load credential file from correct path

---
 PFERD/auth/__init__.py        | 2 +-
 PFERD/auth/credential_file.py | 5 +++--
 PFERD/config.py               | 1 +
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/PFERD/auth/__init__.py b/PFERD/auth/__init__.py
index 06b3ba4..277cade 100644
--- a/PFERD/auth/__init__.py
+++ b/PFERD/auth/__init__.py
@@ -16,7 +16,7 @@ AuthConstructor = Callable[[
 
 AUTHENTICATORS: Dict[str, AuthConstructor] = {
     "credential-file": lambda n, s, c:
-        CredentialFileAuthenticator(n, CredentialFileAuthSection(s)),
+        CredentialFileAuthenticator(n, CredentialFileAuthSection(s), c),
     "keyring": lambda n, s, c:
         KeyringAuthenticator(n, KeyringAuthSection(s)),
     "simple": lambda n, s, c:
diff --git a/PFERD/auth/credential_file.py b/PFERD/auth/credential_file.py
index 540b65b..30a56ba 100644
--- a/PFERD/auth/credential_file.py
+++ b/PFERD/auth/credential_file.py
@@ -1,6 +1,7 @@
 from pathlib import Path
 from typing import Tuple
 
+from ..config import Config
 from ..utils import fmt_real_path
 from .authenticator import Authenticator, AuthLoadError, AuthSection
 
@@ -14,10 +15,10 @@ class CredentialFileAuthSection(AuthSection):
 
 
 class CredentialFileAuthenticator(Authenticator):
-    def __init__(self, name: str, section: CredentialFileAuthSection) -> None:
+    def __init__(self, name: str, section: CredentialFileAuthSection, config: Config) -> None:
         super().__init__(name)
 
-        path = section.path()
+        path = config.default_section.working_dir() / section.path()
         try:
             with open(path) as f:
                 lines = list(f)
diff --git a/PFERD/config.py b/PFERD/config.py
index 1462d82..0ea7abc 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -69,6 +69,7 @@ class Section:
 
 class DefaultSection(Section):
     def working_dir(self) -> Path:
+        # TODO Change to working dir instead of manually prepending it to paths
         pathstr = self.s.get("working_dir", ".")
         return Path(pathstr).expanduser()
 

From f656e3ff34b05b486a4f5ad63ec6174e4080cb0d Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 1 Jun 2021 09:18:17 +0000
Subject: [PATCH 299/524] Fix credential parsing

---
 PFERD/auth/credential_file.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/PFERD/auth/credential_file.py b/PFERD/auth/credential_file.py
index 30a56ba..d0fcdda 100644
--- a/PFERD/auth/credential_file.py
+++ b/PFERD/auth/credential_file.py
@@ -37,8 +37,8 @@ class CredentialFileAuthenticator(Authenticator):
         if not pline.startswith("password="):
             raise AuthLoadError("Second line must start with 'password='")
 
-        self._username = uline[:9]
-        self._password = pline[:9]
+        self._username = uline[9:]
+        self._password = pline[9:]
 
     async def credentials(self) -> Tuple[str, str]:
         return self._username, self._password

From 85b9f45085b409357f3c509da7f2719f63e5d2f6 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 1 Jun 2021 09:49:30 +0000
Subject: [PATCH 300/524] Bump version to 3.0.1

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f7e33ae..87c1d05 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.0.1 - 2021-06-01
+
 ### Added
 - `credential-file` authenticator
 - `--credential-file` option for `kit-ilias-web` command
diff --git a/PFERD/version.py b/PFERD/version.py
index e26dabb..2aae99d 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.0.0"
+VERSION = "3.0.1"

From 1fc8e9eb7ad99ad8c950c76398aab64b05c7d801 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 1 Jun 2021 10:00:59 +0000
Subject: [PATCH 301/524] Document credential file authenticator config options

---
 CONFIG.md | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index 7826b04..feeade3 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -182,8 +182,11 @@ via the terminal.
 
 ### The `credential-file` authenticator
 
-This authenticator reads a username and a password from a credential file. The
-credential file has exactly two lines (trailing newline optional). The first
+This authenticator reads a username and a password from a credential file.
+
+- `path`: Path to the credential file. (Required)
+
+The credential file has exactly two lines (trailing newline optional). The first
 line starts with `username=` and contains the username, the second line starts
 with `password=` and contains the password. The username and password may
 contain any characters except a line break.

From 31b6311e993439b2bbb087511ca012e140003d9e Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 1 Jun 2021 19:02:55 +0200
Subject: [PATCH 302/524] Remove incorrect tmp file explain message

---
 PFERD/__main__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index 5ae62bb..b274b6b 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -147,7 +147,6 @@ def main() -> None:
         log.unlock()
         log.explain_topic("Interrupted, exiting immediately")
         log.explain("Open files and connections are left for the OS to clean up")
-        log.explain("Temporary files are not cleaned up")
         pferd.print_report()
         # TODO Clean up tmp files
         # And when those files *do* actually get cleaned up properly,

From fc31100a0f6e1933cf084e46898ad20d33d892b9 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 4 Jun 2021 18:02:45 +0200
Subject: [PATCH 303/524] Always use '/' as path separator for regex rules

Previously, regex-matching paths on windows would, in some cases, require four
backslashes ('\\\\') to escape a single path separator. That's just too much.

With this commit, regex transforms now use '/' instead of '\' as path separator,
meaning rules can more easily be shared between platforms (although they are not
guaranteed to be 100% compatible since on Windows, '\' is still recognized as a
path separator).

To make rules more intuitive to write, local relative paths are now also printed
with '/' as path separator on Windows. Since Windows also accepts '/' as path
separator, this change doesn't really affect other rules that parse their sides
as paths.
---
 CHANGELOG.md         | 3 +++
 PFERD/transformer.py | 4 ++--
 PFERD/utils.py       | 8 +++++++-
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 87c1d05..980f96e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Changed
+- Use `/` instead of `\` as path separator for (regex) rules on Windows
+
 ## 3.0.1 - 2021-06-01
 
 ### Added
diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index 83ffde4..ed123eb 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -10,7 +10,7 @@ from pathlib import PurePath
 from typing import Dict, Optional, Sequence, Union
 
 from .logging import log
-from .utils import fmt_path
+from .utils import fmt_path, str_path
 
 
 class Rule(ABC):
@@ -116,7 +116,7 @@ class ReRule(Rule):
         self._right = right
 
     def transform(self, path: PurePath) -> Union[PurePath, bool]:
-        if match := re.fullmatch(self._left, str(path)):
+        if match := re.fullmatch(self._left, str_path(path)):
             if isinstance(self._right, bool):
                 return self._right or path
 
diff --git a/PFERD/utils.py b/PFERD/utils.py
index 397feda..7c7b6f4 100644
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -91,8 +91,14 @@ def url_set_query_params(url: str, params: Dict[str, str]) -> str:
     return result
 
 
+def str_path(path: PurePath) -> str:
+    if not path.parts:
+        return "."
+    return "/".join(path.parts)
+
+
 def fmt_path(path: PurePath) -> str:
-    return repr(str(path))
+    return repr(str_path(path))
 
 
 def fmt_real_path(path: Path) -> str:

From df3ad3d890e0c7e21fbb68305f3c1016f58c2523 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 4 Jun 2021 18:33:02 +0200
Subject: [PATCH 304/524] Add 'skip' option to crawlers

---
 CHANGELOG.md                |  3 +++
 CONFIG.md                   |  3 +++
 PFERD/auth/authenticator.py |  6 +++++-
 PFERD/crawl/__init__.py     |  2 +-
 PFERD/crawl/crawler.py      |  9 +++++++++
 PFERD/pferd.py              | 39 +++++++++++++++++++++++++------------
 6 files changed, 48 insertions(+), 14 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 980f96e..32cbe77 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Added
+- `skip` option for crawlers
+
 ### Changed
 - Use `/` instead of `\` as path separator for (regex) rules on Windows
 
diff --git a/CONFIG.md b/CONFIG.md
index feeade3..2f18be1 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -49,6 +49,9 @@ see the type's [documentation](#crawler-types) below. The following options are
 common to all crawlers:
 
 - `type`: The available types are specified in [this section](#crawler-types).
+- `skip`: Whether the crawler should be skipped during normal execution. The
+  crawler can still be executed manually using the `--crawler` or `-C` flags.
+  (Default: `no`)
 - `output_dir`: The directory the crawler synchronizes files to. A crawler will
   never place any files outside of this directory. (Default: the crawler's name)
 - `redownload`: When to download a file that is already present locally.
diff --git a/PFERD/auth/authenticator.py b/PFERD/auth/authenticator.py
index f588bc4..643a2d5 100644
--- a/PFERD/auth/authenticator.py
+++ b/PFERD/auth/authenticator.py
@@ -13,7 +13,11 @@ class AuthError(Exception):
 
 
 class AuthSection(Section):
-    pass
+    def type(self) -> str:
+        value = self.s.get("type")
+        if value is None:
+            self.missing_value("type")
+        return value
 
 
 class Authenticator(ABC):
diff --git a/PFERD/crawl/__init__.py b/PFERD/crawl/__init__.py
index 297c490..7eb2fb1 100644
--- a/PFERD/crawl/__init__.py
+++ b/PFERD/crawl/__init__.py
@@ -3,7 +3,7 @@ from typing import Callable, Dict
 
 from ..auth import Authenticator
 from ..config import Config
-from .crawler import Crawler, CrawlError  # noqa: F401
+from .crawler import Crawler, CrawlError, CrawlerSection  # noqa: F401
 from .ilias import KitIliasWebCrawler, KitIliasWebCrawlerSection
 from .local_crawler import LocalCrawler, LocalCrawlerSection
 
diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index e990f16..d61783f 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -132,6 +132,15 @@ class DownloadToken(ReusableAsyncContextManager[Tuple[ProgressBar, FileSink]]):
 
 
 class CrawlerSection(Section):
+    def type(self) -> str:
+        value = self.s.get("type")
+        if value is None:
+            self.missing_value("type")
+        return value
+
+    def skip(self) -> bool:
+        return self.s.getboolean("skip", fallback=False)
+
     def output_dir(self, name: str) -> Path:
         # TODO Use removeprefix() after switching to 3.9
         if name.startswith("crawl:"):
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index ac373cf..d98b426 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -3,9 +3,9 @@ from typing import Dict, List, Optional
 
 from rich.markup import escape
 
-from .auth import AUTHENTICATORS, Authenticator, AuthError
+from .auth import AUTHENTICATORS, Authenticator, AuthError, AuthSection
 from .config import Config, ConfigOptionError
-from .crawl import CRAWLERS, Crawler, CrawlError, KitIliasWebCrawler
+from .crawl import CRAWLERS, Crawler, CrawlError, CrawlerSection, KitIliasWebCrawler
 from .logging import log
 from .utils import fmt_path
 
@@ -26,19 +26,22 @@ class Pferd:
         self._authenticators: Dict[str, Authenticator] = {}
         self._crawlers: Dict[str, Crawler] = {}
 
-    def _find_crawlers_to_run(self, config: Config, cli_crawlers: Optional[List[str]]) -> List[str]:
-        log.explain_topic("Deciding which crawlers to run")
-        crawl_sections = [name for name, _ in config.crawl_sections()]
+    def _find_config_crawlers(self, config: Config) -> List[str]:
+        crawl_sections = []
 
-        if cli_crawlers is None:
-            log.explain("No crawlers specified on CLI")
-            log.explain("Running all crawlers specified in config")
-            return crawl_sections
+        for name, section in config.crawl_sections():
+            if CrawlerSection(section).skip():
+                log.explain(f"Skipping {name!r}")
+            else:
+                crawl_sections.append(name)
 
+        return crawl_sections
+
+    def _find_cli_crawlers(self, config: Config, cli_crawlers: List[str]) -> List[str]:
         if len(cli_crawlers) != len(set(cli_crawlers)):
             raise PferdLoadError("Some crawlers were selected multiple times")
 
-        log.explain("Crawlers specified on CLI")
+        crawl_sections = [name for name, _ in config.crawl_sections()]
 
         crawlers_to_run = []  # With crawl: prefix
         unknown_names = []  # Without crawl: prefix
@@ -62,10 +65,22 @@ class Pferd:
 
         return crawlers_to_run
 
+    def _find_crawlers_to_run(self, config: Config, cli_crawlers: Optional[List[str]]) -> List[str]:
+        log.explain_topic("Deciding which crawlers to run")
+
+        if cli_crawlers is None:
+            log.explain("No crawlers specified on CLI")
+            log.explain("Running crawlers specified in config")
+            return self._find_config_crawlers(config)
+        else:
+            log.explain("Crawlers specified on CLI")
+            return self._find_cli_crawlers(config, cli_crawlers)
+
     def _load_authenticators(self) -> None:
         for name, section in self._config.auth_sections():
             log.print(f"[bold bright_cyan]Loading[/] {escape(name)}")
-            auth_type = section.get("type")
+
+            auth_type = AuthSection(section).type()
             authenticator_constructor = AUTHENTICATORS.get(auth_type)
             if authenticator_constructor is None:
                 raise ConfigOptionError(name, "type", f"Unknown authenticator type: {auth_type!r}")
@@ -80,7 +95,7 @@ class Pferd:
         for name, section in self._config.crawl_sections():
             log.print(f"[bold bright_cyan]Loading[/] {escape(name)}")
 
-            crawl_type = section.get("type")
+            crawl_type = CrawlerSection(section).type()
             crawler_constructor = CRAWLERS.get(crawl_type)
             if crawler_constructor is None:
                 raise ConfigOptionError(name, "type", f"Unknown crawler type: {crawl_type!r}")

From 8ab462fb87e8bdfac8bfd6821645dd9f4617e898 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Fri, 4 Jun 2021 19:23:33 +0200
Subject: [PATCH 305/524] Use the exercise label instead of the button name as
 path

---
 CHANGELOG.md                        | 2 ++
 PFERD/crawl/ilias/kit_ilias_html.py | 8 +++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 32cbe77..171a61c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,8 @@ ambiguous situations.
 
 ### Changed
 - Use `/` instead of `\` as path separator for (regex) rules on Windows
+- Use the label to the left for exercises instead of the button name to
+  determine the folder name
 
 ## 3.0.1 - 2021-06-01
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 64491f9..db9a303 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -293,7 +293,13 @@ class IliasPage:
 
             # Add each listing as a new
             for listing in file_listings:
-                file_name = _sanitize_path_name(listing.getText().strip())
+                parent_container: Tag = listing.findParent(
+                    "div", attrs={"class": lambda x: x and "form-group" in x}
+                )
+                label_container: Tag = parent_container.find(
+                    attrs={"class": lambda x: x and "control-label" in x}
+                )
+                file_name = _sanitize_path_name(label_container.getText().strip())
                 url = self._abs_url_from_link(listing)
                 log.explain(f"Found exercise detail {file_name!r} at {url}")
                 results.append(IliasPageElement(

From 61d902d7153f2942e24f92bd9e0a35e39be05563 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 9 Jun 2021 17:42:38 +0200
Subject: [PATCH 306/524] Overhaul transform logic

-re-> arrows now rename their parent directories (like -->) and don't require a
full match (like -exact->). Their old behaviour is available as -exact-re->.

Also, this change adds the ">>" arrow head, which modifies the current path and
continues to the next rule when it matches.
---
 CHANGELOG.md         |   3 +
 PFERD/transformer.py | 540 +++++++++++++++++++++++--------------------
 2 files changed, 298 insertions(+), 245 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 171a61c..ffc6e81 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,8 +24,11 @@ ambiguous situations.
 
 ### Added
 - `skip` option for crawlers
+- Rules with `>>` instead of `>` as arrow head
+- `-exact-re->` arrow (behaves like `-re->` did previously)
 
 ### Changed
+- The `-re->` arrow can now rename directories (like `-->`)
 - Use `/` instead of `\` as path separator for (regex) rules on Windows
 - Use the label to the left for exercises instead of the button name to
   determine the folder name
diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index ed123eb..bf51d6a 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -1,151 +1,159 @@
-# I'm sorry that this code has become a bit dense and unreadable. While
-# reading, it is important to remember what True and False mean. I'd love to
-# have some proper sum-types for the inputs and outputs, they'd make this code
-# a lot easier to understand.
-
 import ast
 import re
 from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from enum import Enum
 from pathlib import PurePath
-from typing import Dict, Optional, Sequence, Union
+from typing import Callable, Dict, List, Optional, Sequence, TypeVar, Union
 
 from .logging import log
 from .utils import fmt_path, str_path
 
 
-class Rule(ABC):
-    @abstractmethod
-    def transform(self, path: PurePath) -> Union[PurePath, bool]:
-        """
-        Try to apply this rule to the path. Returns another path if the rule
-        was successfully applied, True if the rule matched but resulted in an
-        exclamation mark, and False if the rule didn't match at all.
-        """
+class ArrowHead(Enum):
+    NORMAL = 0
+    SEQUENCE = 1
 
+
+class Ignore:
+    pass
+
+
+class Empty:
+    pass
+
+
+RightSide = Union[str, Ignore, Empty]
+
+
+@dataclass
+class Transformed:
+    path: PurePath
+
+
+class Ignored:
+    pass
+
+
+TransformResult = Optional[Union[Transformed, Ignored]]
+
+
+@dataclass
+class Rule:
+    left: str
+    name: str
+    head: ArrowHead
+    right: RightSide
+
+    def right_result(self, path: PurePath) -> Union[str, Transformed, Ignored]:
+        if isinstance(self.right, str):
+            return self.right
+        elif isinstance(self.right, Ignore):
+            return Ignored()
+        elif isinstance(self.right, Empty):
+            return Transformed(path)
+        else:
+            raise RuntimeError(f"Right side has invalid type {type(self.right)}")
+
+
+class Transformation(ABC):
+    def __init__(self, rule: Rule):
+        self.rule = rule
+
+    @abstractmethod
+    def transform(self, path: PurePath) -> TransformResult:
         pass
 
 
-# These rules all use a Union[T, bool] for their right side. They are passed a
-# T if the arrow's right side was a normal string, True if it was an
-# exclamation mark and False if it was missing entirely.
-
-class NormalRule(Rule):
-    def __init__(self, left: PurePath, right: Union[PurePath, bool]):
-
-        self._left = left
-        self._right = right
-
-    def _match_prefix(self, path: PurePath) -> Optional[PurePath]:
-        left_parts = list(reversed(self._left.parts))
-        path_parts = list(reversed(path.parts))
-
-        if len(left_parts) > len(path_parts):
+class ExactTf(Transformation):
+    def transform(self, path: PurePath) -> TransformResult:
+        if path != PurePath(self.rule.left):
             return None
 
-        while left_parts and path_parts:
-            left_part = left_parts.pop()
-            path_part = path_parts.pop()
+        right = self.rule.right_result(path)
+        if not isinstance(right, str):
+            return right
 
-            if left_part != path_part:
-                return None
+        return Transformed(PurePath(right))
 
-        if left_parts:
+
+class ExactReTf(Transformation):
+    def transform(self, path: PurePath) -> TransformResult:
+        match = re.fullmatch(self.rule.left, str_path(path))
+        if not match:
             return None
 
-        path_parts.reverse()
-        return PurePath(*path_parts)
+        right = self.rule.right_result(path)
+        if not isinstance(right, str):
+            return right
 
-    def transform(self, path: PurePath) -> Union[PurePath, bool]:
-        if rest := self._match_prefix(path):
-            if isinstance(self._right, bool):
-                return self._right or path
+        # For some reason, mypy thinks that "groups" has type List[str]. But
+        # since elements of "match.groups()" can be None, mypy is wrong.
+        groups: Sequence[Optional[str]] = [match[0]] + list(match.groups())
+
+        locals_dir: Dict[str, Union[str, int, float]] = {}
+        for i, group in enumerate(groups):
+            if group is None:
+                continue
+
+            locals_dir[f"g{i}"] = group
+
+            try:
+                locals_dir[f"i{i}"] = int(group)
+            except ValueError:
+                pass
+
+            try:
+                locals_dir[f"f{i}"] = float(group)
+            except ValueError:
+                pass
+
+        result = eval(f"f{right!r}", {}, locals_dir)
+        return Transformed(PurePath(result))
+
+
+class RenamingParentsTf(Transformation):
+    def __init__(self, sub_tf: Transformation):
+        super().__init__(sub_tf.rule)
+        self.sub_tf = sub_tf
+
+    def transform(self, path: PurePath) -> TransformResult:
+        for i in range(len(path.parts), -1, -1):
+            parent = PurePath(*path.parts[:i])
+            child = PurePath(*path.parts[i:])
+
+            transformed = self.sub_tf.transform(parent)
+            if not transformed:
+                continue
+            elif isinstance(transformed, Transformed):
+                return Transformed(transformed.path / child)
+            elif isinstance(transformed, Ignored):
+                return transformed
             else:
-                return self._right / rest
+                raise RuntimeError(f"Invalid transform result of type {type(transformed)}: {transformed}")
 
-        return False
+        return None
 
 
-class ExactRule(Rule):
-    def __init__(self, left: PurePath, right: Union[PurePath, bool]):
-        self._left = left
-        self._right = right
+class RenamingPartsTf(Transformation):
+    def __init__(self, sub_tf: Transformation):
+        super().__init__(sub_tf.rule)
+        self.sub_tf = sub_tf
 
-    def transform(self, path: PurePath) -> Union[PurePath, bool]:
-        if path == self._left:
-            if isinstance(self._right, bool):
-                return self._right or path
-            else:
-                return self._right
-
-        return False
-
-
-class NameRule(Rule):
-    def __init__(self, subrule: Rule):
-        self._subrule = subrule
-
-    def transform(self, path: PurePath) -> Union[PurePath, bool]:
-        matched = False
+    def transform(self, path: PurePath) -> TransformResult:
         result = PurePath()
-
         for part in path.parts:
-            part_result = self._subrule.transform(PurePath(part))
-            if isinstance(part_result, PurePath):
-                matched = True
-                result /= part_result
-            elif part_result:
-                # If any subrule call ignores its path segment, the entire path
-                # should be ignored
-                return True
-            else:
-                # The subrule doesn't modify this segment, but maybe other
-                # segments
+            transformed = self.sub_tf.transform(PurePath(part))
+            if not transformed:
                 result /= part
+            elif isinstance(transformed, Transformed):
+                result /= transformed.path
+            elif isinstance(transformed, Ignored):
+                return transformed
+            else:
+                raise RuntimeError(f"Invalid transform result of type {type(transformed)}: {transformed}")
 
-        if matched:
-            return result
-        else:
-            # The subrule has modified no segments, so this name version of it
-            # doesn't match
-            return False
-
-
-class ReRule(Rule):
-    def __init__(self, left: str, right: Union[str, bool]):
-        self._left = left
-        self._right = right
-
-    def transform(self, path: PurePath) -> Union[PurePath, bool]:
-        if match := re.fullmatch(self._left, str_path(path)):
-            if isinstance(self._right, bool):
-                return self._right or path
-
-            vars: Dict[str, Union[str, int, float]] = {}
-
-            # For some reason, mypy thinks that "groups" has type List[str].
-            # But since elements of "match.groups()" can be None, mypy is
-            # wrong.
-            groups: Sequence[Optional[str]] = [match[0]] + list(match.groups())
-            for i, group in enumerate(groups):
-                if group is None:
-                    continue
-
-                vars[f"g{i}"] = group
-
-                try:
-                    vars[f"i{i}"] = int(group)
-                except ValueError:
-                    pass
-
-                try:
-                    vars[f"f{i}"] = float(group)
-                except ValueError:
-                    pass
-
-            result = eval(f"f{self._right!r}", vars)
-            return PurePath(result)
-
-        return False
+        return None
 
 
 class RuleParseError(Exception):
@@ -162,18 +170,15 @@ class RuleParseError(Exception):
         log.error_contd(f"{spaces}^--- {self.reason}")
 
 
+T = TypeVar("T")
+
+
 class Line:
     def __init__(self, line: str, line_nr: int):
         self._line = line
         self._line_nr = line_nr
         self._index = 0
 
-    def get(self) -> Optional[str]:
-        if self._index < len(self._line):
-            return self._line[self._index]
-
-        return None
-
     @property
     def line(self) -> str:
         return self._line
@@ -190,155 +195,192 @@ class Line:
     def index(self, index: int) -> None:
         self._index = index
 
-    def advance(self) -> None:
-        self._index += 1
+    @property
+    def rest(self) -> str:
+        return self.line[self.index:]
 
-    def expect(self, string: str) -> None:
-        for char in string:
-            if self.get() == char:
-                self.advance()
-            else:
-                raise RuleParseError(self, f"Expected {char!r}")
+    def peek(self, amount: int = 1) -> str:
+        return self.rest[:amount]
+
+    def take(self, amount: int = 1) -> str:
+        string = self.peek(amount)
+        self.index += len(string)
+        return string
+
+    def expect(self, string: str) -> str:
+        if self.peek(len(string)) == string:
+            return self.take(len(string))
+        else:
+            raise RuleParseError(self, f"Expected {string!r}")
+
+    def expect_with(self, string: str, value: T) -> T:
+        self.expect(string)
+        return value
+
+    def one_of(self, parsers: List[Callable[[], T]], description: str) -> T:
+        for parser in parsers:
+            index = self.index
+            try:
+                return parser()
+            except RuleParseError:
+                self.index = index
+
+        raise RuleParseError(self, description)
+
+
+# RULE = LEFT SPACE '-' NAME '-' HEAD (SPACE RIGHT)?
+# SPACE = ' '+
+# NAME = '' | 'exact' | 'name' | 're' | 'exact-re' | 'name-re'
+# HEAD = '>' | '>>'
+# LEFT = STR | QUOTED_STR
+# RIGHT = STR | QUOTED_STR | '!'
+
+
+def parse_zero_or_more_spaces(line: Line) -> None:
+    while line.peek() == " ":
+        line.take()
+
+
+def parse_one_or_more_spaces(line: Line) -> None:
+    line.expect(" ")
+    parse_zero_or_more_spaces(line)
+
+
+def parse_str(line: Line) -> str:
+    result = []
+    while c := line.peek():
+        if c == " ":
+            break
+        else:
+            line.take()
+            result.append(c)
+
+    if result:
+        return "".join(result)
+    else:
+        raise RuleParseError(line, "Expected non-space character")
 
 
 QUOTATION_MARKS = {'"', "'"}
 
 
-def parse_string_literal(line: Line) -> str:
+def parse_quoted_str(line: Line) -> str:
     escaped = False
 
     # Points to first character of string literal
     start_index = line.index
 
-    quotation_mark = line.get()
+    quotation_mark = line.peek()
     if quotation_mark not in QUOTATION_MARKS:
-        # This should never happen as long as this function is only called from
-        # parse_string.
-        raise RuleParseError(line, "Invalid quotation mark")
-    line.advance()
+        raise RuleParseError(line, "Expected quotation mark")
+    line.take()
 
-    while c := line.get():
+    while c := line.peek():
         if escaped:
             escaped = False
-            line.advance()
+            line.take()
         elif c == quotation_mark:
-            line.advance()
+            line.take()
             stop_index = line.index
             literal = line.line[start_index:stop_index]
-            return ast.literal_eval(literal)
+            try:
+                return ast.literal_eval(literal)
+            except SyntaxError as e:
+                line.index = start_index
+                raise RuleParseError(line, str(e)) from e
         elif c == "\\":
             escaped = True
-            line.advance()
+            line.take()
         else:
-            line.advance()
+            line.take()
 
     raise RuleParseError(line, "Expected end of string literal")
 
 
-def parse_until_space_or_eol(line: Line) -> str:
-    result = []
-    while c := line.get():
-        if c == " ":
-            break
-        result.append(c)
-        line.advance()
-
-    return "".join(result)
-
-
-def parse_string(line: Line) -> Union[str, bool]:
-    if line.get() in QUOTATION_MARKS:
-        return parse_string_literal(line)
+def parse_left(line: Line) -> str:
+    if line.peek() in QUOTATION_MARKS:
+        return parse_quoted_str(line)
     else:
-        string = parse_until_space_or_eol(line)
+        return parse_str(line)
+
+
+def parse_right(line: Line) -> Union[str, Ignore]:
+    c = line.peek()
+    if c in QUOTATION_MARKS:
+        return parse_quoted_str(line)
+    else:
+        string = parse_str(line)
         if string == "!":
-            return True
+            return Ignore()
         return string
 
 
-def parse_arrow(line: Line) -> str:
-    line.expect("-")
-
-    name = []
-    while True:
-        c = line.get()
-        if not c:
-            raise RuleParseError(line, "Expected rest of arrow")
-        elif c == "-":
-            line.advance()
-            c = line.get()
-            if not c:
-                raise RuleParseError(line, "Expected rest of arrow")
-            elif c == ">":
-                line.advance()
-                break  # End of arrow
-            else:
-                name.append("-")
-                continue
-        else:
-            name.append(c)
-
-        line.advance()
-
-    return "".join(name)
+def parse_arrow_name(line: Line) -> str:
+    return line.one_of([
+        lambda: line.expect("exact-re"),
+        lambda: line.expect("exact"),
+        lambda: line.expect("name-re"),
+        lambda: line.expect("name"),
+        lambda: line.expect("re"),
+        lambda: line.expect(""),
+    ], "Expected arrow name")
 
 
-def parse_whitespace(line: Line) -> None:
-    line.expect(" ")
-    while line.get() == " ":
-        line.advance()
+def parse_arrow_head(line: Line) -> ArrowHead:
+    return line.one_of([
+        lambda: line.expect_with(">>", ArrowHead.SEQUENCE),
+        lambda: line.expect_with(">", ArrowHead.NORMAL),
+    ], "Expected arrow head")
 
 
 def parse_eol(line: Line) -> None:
-    if line.get() is not None:
+    if line.peek():
         raise RuleParseError(line, "Expected end of line")
 
 
 def parse_rule(line: Line) -> Rule:
-    # Parse left side
-    leftindex = line.index
-    left = parse_string(line)
-    if isinstance(left, bool):
-        line.index = leftindex
-        raise RuleParseError(line, "Left side can't be '!'")
-    leftpath = PurePath(left)
+    parse_zero_or_more_spaces(line)
+    left = parse_left(line)
 
-    # Parse arrow
-    parse_whitespace(line)
-    arrowindex = line.index
-    arrowname = parse_arrow(line)
+    parse_one_or_more_spaces(line)
 
-    # Parse right side
-    if line.get():
-        parse_whitespace(line)
-        right = parse_string(line)
+    line.expect("-")
+    name = parse_arrow_name(line)
+    line.expect("-")
+    head = parse_arrow_head(line)
+
+    index = line.index
+    right: RightSide
+    try:
+        parse_zero_or_more_spaces(line)
+        parse_eol(line)
+        right = Empty()
+    except RuleParseError:
+        line.index = index
+        parse_one_or_more_spaces(line)
+        right = parse_right(line)
+        parse_eol(line)
+
+    return Rule(left, name, head, right)
+
+
+def parse_transformation(line: Line) -> Transformation:
+    rule = parse_rule(line)
+
+    if rule.name == "":
+        return RenamingParentsTf(ExactTf(rule))
+    elif rule.name == "exact":
+        return ExactTf(rule)
+    elif rule.name == "name":
+        return RenamingPartsTf(ExactTf(rule))
+    elif rule.name == "re":
+        return RenamingParentsTf(ExactReTf(rule))
+    elif rule.name == "exact-re":
+        return ExactReTf(rule)
+    elif rule.name == "name-re":
+        return RenamingPartsTf(ExactReTf(rule))
     else:
-        right = False
-    rightpath: Union[PurePath, bool]
-    if isinstance(right, bool):
-        rightpath = right
-    else:
-        rightpath = PurePath(right)
-
-    parse_eol(line)
-
-    # Dispatch
-    if arrowname == "":
-        return NormalRule(leftpath, rightpath)
-    elif arrowname == "name":
-        if len(leftpath.parts) > 1:
-            line.index = leftindex
-            raise RuleParseError(line, "SOURCE must be a single name, not multiple segments")
-        return NameRule(ExactRule(leftpath, rightpath))
-    elif arrowname == "exact":
-        return ExactRule(leftpath, rightpath)
-    elif arrowname == "re":
-        return ReRule(left, right)
-    elif arrowname == "name-re":
-        return NameRule(ReRule(left, right))
-    else:
-        line.index = arrowindex + 1  # For nicer error message
-        raise RuleParseError(line, f"Invalid arrow name {arrowname!r}")
+        raise RuntimeError(f"Invalid arrow name {rule.name!r}")
 
 
 class Transformer:
@@ -347,32 +389,40 @@ class Transformer:
         May throw a RuleParseException.
         """
 
-        self._rules = []
+        self._tfs = []
         for i, line in enumerate(rules.split("\n")):
             line = line.strip()
             if line:
-                rule = parse_rule(Line(line, i))
-                self._rules.append((line, rule))
+                tf = parse_transformation(Line(line, i))
+                self._tfs.append((line, tf))
 
     def transform(self, path: PurePath) -> Optional[PurePath]:
-        for i, (line, rule) in enumerate(self._rules):
+        for i, (line, tf) in enumerate(self._tfs):
             log.explain(f"Testing rule {i+1}: {line}")
 
             try:
-                result = rule.transform(path)
+                result = tf.transform(path)
             except Exception as e:
                 log.warn(f"Error while testing rule {i+1}: {line}")
                 log.warn_contd(str(e))
                 continue
 
-            if isinstance(result, PurePath):
-                log.explain(f"Match found, transformed path to {fmt_path(result)}")
-                return result
-            elif result:  # Exclamation mark
-                log.explain("Match found, path ignored")
-                return None
-            else:
+            if not result:
                 continue
 
-        log.explain("No rule matched, path is unchanged")
+            if isinstance(result, Ignored):
+                log.explain("Match found, path ignored")
+                return None
+
+            if tf.rule.head == ArrowHead.NORMAL:
+                log.explain(f"Match found, transformed path to {fmt_path(result.path)}")
+                path = result.path
+                break
+            elif tf.rule.head == ArrowHead.SEQUENCE:
+                log.explain(f"Match found, updated path to {fmt_path(result.path)}")
+                path = result.path
+            else:
+                raise RuntimeError(f"Invalid transform result of type {type(result)}: {result}")
+
+        log.explain(f"Final result: {fmt_path(path)}")
         return path

From f28bbe6b0c11c165ad604b6ab33730a37800604a Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 9 Jun 2021 22:22:40 +0200
Subject: [PATCH 307/524] Update transform rule documentation

It's still missing an example that uses rules with ">>" arrows.
---
 CONFIG.md | 128 +++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 88 insertions(+), 40 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index 2f18be1..1793ddc 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -222,56 +222,87 @@ This authenticator does not support usernames.
 Transformation rules are rules for renaming and excluding files and directories.
 They are specified line-by-line in a crawler's `transform` option. When a
 crawler needs to apply a rule to a path, it goes through this list top-to-bottom
-and choose the first matching rule.
+and applies the first matching rule.
 
 To see this process in action, you can use the `--debug-transforms` or flag or
 the `--explain` flag.
 
-Each line has the format `SOURCE ARROW TARGET` where `TARGET` is optional.
-`SOURCE` is either a normal path without spaces (e. g. `foo/bar`), or a string
-literal delimited by `"` or `'` (e. g. `"foo\" bar/baz"`). Python's string
-escape syntax is supported. Trailing slashes are ignored. `TARGET` can be
-formatted like `SOURCE`, but it can also be a single exclamation mark without
-quotes (`!`). `ARROW` is one of `-->`, `-name->`, `-exact->`, `-re->` and
-`-name-re->`
+Each rule has the format `SOURCE ARROW TARGET` (e. g. `foo/bar --> foo/baz`).
+The arrow specifies how the source and target are interpreted. The different
+kinds of arrows are documented below.
 
-If a rule's target is `!`, this means that when the rule matches on a path, the
-corresponding file or directory is ignored. If a rule's target is missing, the
-path is matched but not modified.
+`SOURCE` and `TARGET` are either a bunch of characters without spaces (e. g.
+`foo/bar`) or string literals (e. g, `"foo/b a r"`). The former syntax has no
+concept of escaping characters, so the backslash is just another character. The
+string literals however support Python's escape syntax (e. g.
+`"foo\\bar\tbaz"`). This also means that in string literals, backslashes must be
+escaped.
+
+`TARGET` can additionally be a single exclamation mark `!` (*not* `"!"`). When a
+rule with a `!` as target matches a path, the corresponding file or directory is
+ignored by the crawler instead of renamed.
+
+`TARGET` can also be omitted entirely. When a rule without target matches a
+path, the path is returned unmodified. This is useful to prevent rules further
+down from matching instead.
+
+Each arrow's behaviour can be modified slightly by changing the arrow's head
+from `>` to `>>`. When a rule with a `>>` arrow head matches a path, it doesn't
+return immediately like a normal arrow. Instead, it replaces the current path
+with its output and continues on to the next rule. In effect, this means that
+multiple rules can be applied sequentially.
 
 ### The `-->` arrow
 
-The `-->` arrow is a basic renaming operation. If a path begins with `SOURCE`,
-that part of the path is replaced with `TARGET`. This means that the rule
-`foo/bar --> baz` would convert `foo/bar` into `baz`, but also `foo/bar/xyz`
-into `baz/xyz`. The rule `foo --> !` would ignore a directory named `foo` as
-well as all its contents.
+The `-->` arrow is a basic renaming operation for files and directories. If a
+path matches `SOURCE`, it is renamed to `TARGET`.
+
+Example: `foo/bar --> baz`
+- Doesn't match `foo`, `a/foo/bar` or `foo/baz`
+- Converts `foo/bar` into `baz`
+- Converts `foo/bar/wargl` into `bar/wargl`
+
+Example: `foo/bar --> !`
+- Doesn't match `foo`, `a/foo/bar` or `foo/baz`
+- Ignores `foo/bar` and any of its children
 
 ### The `-name->` arrow
 
 The `-name->` arrow lets you rename files and directories by their name,
 regardless of where they appear in the file tree. Because of this, its `SOURCE`
 must not contain multiple path segments, only a single name. This restriction
-does not apply to its `TARGET`. The `-name->` arrow is not applied recursively
-to its own output to prevent infinite loops.
+does not apply to its `TARGET`.
 
-For example, the rule `foo -name-> bar/baz` would convert `a/foo` into
-`a/bar/baz` and `a/foo/b/c/foo` into `a/bar/baz/b/c/bar/baz`. The rule `foo
--name-> !` would ignore all directories and files named `foo`.
+Example: `foo -name-> bar/baz`
+- Doesn't match `a/foobar/b` or `x/Foo/y/z`
+- Converts `hello/foo` into `hello/bar/baz`
+- Converts `foo/world` into `bar/baz/world`
+- Converts `a/foo/b/c/foo` into `a/bar/baz/b/c/bar/baz`
+
+Example: `foo -name-> !`
+- Doesn't match `a/foobar/b` or `x/Foo/y/z`
+- Ignores any path containing a segment `foo`
 
 ### The `-exact->` arrow
 
-The `-exact->` arrow requires the path to match `SOURCE` exactly. This means
-that the rule `foo/bar -exact-> baz` would still convert `foo/bar` into `baz`,
-but `foo/bar/xyz` would be unaffected. Also, `foo -exact-> !` would only ignore
-`foo`, but not its contents (if it has any). The examples below show why this is
-useful.
+The `-exact->` arrow requires the path to match `SOURCE` exactly. The examples
+below show why this is useful.
+
+Example: `foo/bar -exact-> baz`
+- Doesn't match `foo`, `a/foo/bar` or `foo/baz`
+- Converts `foo/bar` into `baz`
+- Doesn't match `foo/bar/wargl`
+
+Example: `foo/bar -exact-> !`
+- Doesn't match `foo`, `a/foo/bar` or `foo/baz`
+- Ignores only `foo/bar`, not its children
 
 ### The `-re->` arrow
 
-The `-re->` arrow uses regular expressions. `SOURCE` is a regular expression
-that must match the entire path. If this is the case, then the capturing groups
-are available in `TARGET` for formatting.
+The `-re->` arrow is like the `-->` arrow but with regular expressions. `SOURCE`
+is a regular expression and `TARGET` an f-string based template. If a path
+matches `SOURCE`, the output path is created using `TARGET` as template.
+`SOURCE` is automatically anchored.
 
 `TARGET` uses Python's [format string syntax][3]. The *n*-th capturing group can
 be referred to as `{g<n>}` (e. g. `{g3}`). `{g0}` refers to the original path.
@@ -288,18 +319,36 @@ can use `{i3:05}`.
 PFERD even allows you to write entire expressions inside the curly braces, for
 example `{g2.lower()}` or `{g3.replace(' ', '_')}`.
 
+Example: `f(oo+)/be?ar -re-> B{g1.upper()}H/fear`
+- Doesn't match `a/foo/bar`, `foo/abc/bar`, `afoo/bar` or `foo/bars`
+- Converts `foo/bar` into `BOOH/fear`
+- Converts `fooooo/bear` into `BOOOOOH/fear`
+- Converts `foo/bar/baz` into `BOOH/fear/baz`
+
 [3]: <https://docs.python.org/3/library/string.html#format-string-syntax> "Format String Syntax"
 
 ### The `-name-re->` arrow
 
 The `-name-re>` arrow is like a combination of the `-name->` and `-re->` arrows.
-Instead of the `SOURCE` being the name of a directory or file, it's a regex that
-is matched against the names of directories and files. `TARGET` works like the
-`-re->` arrow's target.
 
-For example, the arrow `(.*)\.jpeg -name-re-> {g1}.jpg` will rename all `.jpeg`
-extensions into `.jpg`. The arrow `\..+ -name-re-> !` will ignore all files and
-directories starting with `.`.
+Example: `(.*)\.jpeg -name-re-> {g1}.jpg`
+- Doesn't match `foo/bar.png`, `baz.JPEG` or `hello,jpeg`
+- Converts `foo/bar.jpeg` into `foo/bar.jpg`
+- Converts `foo.jpeg/bar/baz.jpeg` into `foo.jpg/bar/baz.jpg`
+
+Example: `\..+ -name-re-> !`
+- Doesn't match `.`, `test`, `a.b`
+- Ignores all files and directories starting with `.`.
+
+### The `-exact-re->` arrow
+
+The `-exact-re>` arrow is like a combination of the `-exact->` and `-re->` arrows.
+
+Example: `f(oo+)/be?ar -exactre-> B{g1.upper()}H/fear`
+- Doesn't match `a/foo/bar`, `foo/abc/bar`, `afoo/bar` or `foo/bars`
+- Converts `foo/bar` into `BOOH/fear`
+- Converts `fooooo/bear` into `BOOOOOH/fear`
+- Doesn't match `foo/bar/baz`
 
 ### Example: Tutorials
 
@@ -327,7 +376,7 @@ The second rule is required for many crawlers since they use the rules to decide
 which directories to crawl. If it was missing when the crawler looks at
 `tutorials/`, the third rule would match. This means the crawler would not crawl
 the `tutorials/` directory and thus not discover that `tutorials/tut02/`
-existed.
+exists.
 
 Since the second rule is only relevant for crawling, the `TARGET` is left out.
 
@@ -352,9 +401,9 @@ To do this, you can use the most powerful of arrows: The regex arrow.
 
 Note the escaped backslashes on the `SOURCE` side.
 
-### Example: Crawl a python project
+### Example: Crawl a Python project
 
-You are crawling a python project and want to ignore all hidden files (files
+You are crawling a Python project and want to ignore all hidden files (files
 whose name starts with a `.`), all `__pycache__` directories and all markdown
 files (for some weird reason).
 
@@ -374,8 +423,7 @@ README.md
 ...
 ```
 
-For this task, the name arrows can be used. They are variants of the normal
-arrows that only look at the file name instead of the entire path.
+For this task, the name arrows can be used.
 
 ```
 \..*        -name-re-> !

From bc65ea7ab696bf3f455c49bad4ae4375a75182a8 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 9 Jun 2021 22:35:55 +0200
Subject: [PATCH 308/524] Fix mypy complaining about missing type hints

---
 scripts/setup | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/setup b/scripts/setup
index b48fb1a..f6680bb 100755
--- a/scripts/setup
+++ b/scripts/setup
@@ -12,6 +12,6 @@ pip install --upgrade setuptools
 # Installing PFERD itself
 pip install --editable .
 
-# Installing various tools
-pip install --upgrade mypy flake8 autopep8 isort
-pip install --upgrade pyinstaller
+# Installing tools and type hints
+pip install --upgrade mypy flake8 autopep8 isort pyinstaller
+pip install --upgrade types-chardet types-certifi

From a292c4c437d631d7eae3a0adfd98adbefd52c2eb Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 12 Jun 2021 14:57:29 +0200
Subject: [PATCH 309/524] Add example for ">>" arrow heads

---
 CONFIG.md | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/CONFIG.md b/CONFIG.md
index 1793ddc..f2710e1 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -430,3 +430,14 @@ For this task, the name arrows can be used.
 __pycache__ -name->    !
 .*\.md      -name-re-> !
 ```
+
+### Example: Clean up names
+
+You want to convert all paths into lowercase and replace spaces with underscores
+before applying any rules. This can be achieved using the `>>` arrow heads.
+
+```
+(.*) -re->> "{g1.lower().replace(' ', '_')}"
+
+<other rules go here>
+```

From 601e4b936b320e766c0de18d384a92a5750f72b9 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 12 Jun 2021 15:00:52 +0200
Subject: [PATCH 310/524] Use new arrow logic in README example config

---
 README.md | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index d25e86f..681bdf7 100644
--- a/README.md
+++ b/README.md
@@ -116,17 +116,18 @@ transform =
   Online-Tests --> !
   Vorlesungswerbung --> !
 
+  # Rename folders
+  Lehrbücher --> Vorlesung
+  # Note the ">>" arrow head which lets us apply further rules to files moved to "Übung"
+  Übungsunterlagen -->> Übung
+
   # Move exercises to own folder. Rename them to "Blatt-XX.pdf" to make them sort properly
-  "Übungsunterlagen/(\d+). Übungsblatt.pdf" -re-> Blätter/Blatt-{i1:02}.pdf
+  "Übung/(\d+). Übungsblatt.pdf" -re-> Blätter/Blatt-{i1:02}.pdf
   # Move solutions to own folder. Rename them to "Blatt-XX-Lösung.pdf" to make them sort properly
-  "Übungsunterlagen/(\d+). Übungsblatt.*Musterlösung.pdf" -re-> Blätter/Blatt-{i1:02}-Lösung.pdf
+  "Übung/(\d+). Übungsblatt.*Musterlösung.pdf" -re-> Blätter/Blatt-{i1:02}-Lösung.pdf
 
   # The course has nested folders with the same name - flatten them
-  "Übungsunterlagen/(.+?)/\\1/(.*)" -re-> Übung/{g1}/{g2}
-
-  # Rename remaining folders
-  Übungsunterlagen --> Übung
-  Lehrbücher --> Vorlesung
+  "Übung/(.+?)/\\1" -re-> Übung/{g1}
 
 [crawl:Bar]
 type = kit-ilias-web

From 70b33ecfd9ca3230303cc17f39fd8bc634737e2b Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 13 Jun 2021 15:06:50 +0200
Subject: [PATCH 311/524] Add migration notes to changelog

Also clean up some other formatting for consistency
---
 CHANGELOG.md | 5 +++++
 CONFIG.md    | 6 +++---
 README.md    | 6 +++---
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ffc6e81..d6049d2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,11 @@ ambiguous situations.
 
 ## Unreleased
 
+If your config file doesn't do weird things with transforms, it should continue
+to work. If your `-re->` arrows behave weirdly, try replacing them with
+`-exact-re->` arrows. If you're on Windows, you might need to switch from `\`
+path separators to `/` in your regex rules.
+
 ### Added
 - `skip` option for crawlers
 - Rules with `>>` instead of `>` as arrow head
diff --git a/CONFIG.md b/CONFIG.md
index f2710e1..19afbd2 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -342,7 +342,8 @@ Example: `\..+ -name-re-> !`
 
 ### The `-exact-re->` arrow
 
-The `-exact-re>` arrow is like a combination of the `-exact->` and `-re->` arrows.
+The `-exact-re>` arrow is like a combination of the `-exact->` and `-re->`
+arrows.
 
 Example: `f(oo+)/be?ar -exactre-> B{g1.upper()}H/fear`
 - Doesn't match `a/foo/bar`, `foo/abc/bar`, `afoo/bar` or `foo/bars`
@@ -375,8 +376,7 @@ tutorials --> !
 The second rule is required for many crawlers since they use the rules to decide
 which directories to crawl. If it was missing when the crawler looks at
 `tutorials/`, the third rule would match. This means the crawler would not crawl
-the `tutorials/` directory and thus not discover that `tutorials/tut02/`
-exists.
+the `tutorials/` directory and thus not discover that `tutorials/tut02/` exists.
 
 Since the second rule is only relevant for crawling, the `TARGET` is left out.
 
diff --git a/README.md b/README.md
index 681bdf7..836147f 100644
--- a/README.md
+++ b/README.md
@@ -28,9 +28,9 @@ The use of [venv](https://docs.python.org/3/library/venv.html) is recommended.
 
 ## Basic usage
 
-PFERD can be run directly from the command line with no config file.
-Run `pferd -h` to get an overview of available commands and options.
-Run `pferd <command> -h` to see which options a command has.
+PFERD can be run directly from the command line with no config file. Run `pferd
+-h` to get an overview of available commands and options. Run `pferd <command>
+-h` to see which options a command has.
 
 For example, you can download your personal desktop from the KIT ILIAS like
 this:

From 70ec64a48ba8a56a819dfdbacba974f108d1206e Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 13 Jun 2021 15:39:22 +0200
Subject: [PATCH 312/524] Fix wrong base URL for multi-stage pages

---
 CHANGELOG.md                               | 3 +++
 PFERD/crawl/ilias/kit_ilias_html.py        | 2 +-
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 2 +-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d6049d2..c09f921 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -38,6 +38,9 @@ path separators to `/` in your regex rules.
 - Use the label to the left for exercises instead of the button name to
   determine the folder name
 
+### Fixed
+- Video pagination handling in ILIAS crawler
+
 ## 3.0.1 - 2021-06-01
 
 ### Added
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index db9a303..384f0de 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -480,7 +480,7 @@ class IliasPage:
             return None
 
         if "opencast" in str(img_tag["alt"]).lower():
-            return IliasElementType.VIDEO_FOLDER
+            return IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED
 
         if str(img_tag["src"]).endswith("icon_exc.svg"):
             return IliasElementType.EXERCISE
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 78428e0..6495da9 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -253,7 +253,7 @@ instance's greatest bottleneck.
                     soup = await self._get_page(next_stage_url)
                     log.explain_topic(f"Parsing HTML page for {fmt_path(path)}")
                     log.explain(f"URL: {next_stage_url}")
-                    page = IliasPage(soup, url, parent)
+                    page = IliasPage(soup, next_stage_url, parent)
                     next_stage_url = page.get_next_stage_url()
 
                 elements.extend(page.get_child_elements())

From 57aef262179f72795e30f1c93254a32f084c0e23 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 13 Jun 2021 16:32:22 +0200
Subject: [PATCH 313/524] Fix name arrows

I seem to have (re-)implemented them incorrectly and never tested them.
---
 PFERD/transformer.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index bf51d6a..a37443a 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -142,18 +142,23 @@ class RenamingPartsTf(Transformation):
 
     def transform(self, path: PurePath) -> TransformResult:
         result = PurePath()
+        any_part_matched = False
         for part in path.parts:
             transformed = self.sub_tf.transform(PurePath(part))
             if not transformed:
                 result /= part
             elif isinstance(transformed, Transformed):
                 result /= transformed.path
+                any_part_matched = True
             elif isinstance(transformed, Ignored):
                 return transformed
             else:
                 raise RuntimeError(f"Invalid transform result of type {type(transformed)}: {transformed}")
 
-        return None
+        if any_part_matched:
+            return Transformed(result)
+        else:
+            return None
 
 
 class RuleParseError(Exception):

From 6e4d423c812c52aff95249ad992dc4889d971208 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 13 Jun 2021 16:50:29 +0200
Subject: [PATCH 314/524] Crawl all video stages in one crawl bar

This ensures folders are not renamed, as they are crawled twice
---
 PFERD/crawl/ilias/kit_ilias_html.py        | 6 ++++--
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 9 +++++++--
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 384f0de..41f45e2 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -62,9 +62,11 @@ class IliasPage:
         log.explain("Page is a normal folder, searching for elements")
         return self._find_normal_entries()
 
-    def get_next_stage_url(self) -> Optional[str]:
+    def get_next_stage_element(self) -> Optional[IliasPageElement]:
         if self._is_ilias_opencast_embedding():
-            return self.get_child_elements()[0].url
+            return self.get_child_elements()[0]
+        if self._page_type == IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED:
+            return self._find_video_entries_paginated()[0]
         return None
 
     def _is_video_player(self) -> bool:
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 6495da9..41c301c 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -248,13 +248,18 @@ instance's greatest bottleneck.
             elements.clear()
             async with cl:
                 next_stage_url: Optional[str] = url
+                current_parent = parent
 
                 while next_stage_url:
                     soup = await self._get_page(next_stage_url)
                     log.explain_topic(f"Parsing HTML page for {fmt_path(path)}")
                     log.explain(f"URL: {next_stage_url}")
-                    page = IliasPage(soup, next_stage_url, parent)
-                    next_stage_url = page.get_next_stage_url()
+                    page = IliasPage(soup, next_stage_url, current_parent)
+                    if next_element := page.get_next_stage_element():
+                        current_parent = next_element
+                        next_stage_url = next_element.url
+                    else:
+                        next_stage_url = None
 
                 elements.extend(page.get_child_elements())
 

From 75fde870c2cc4b0f8b87c80cae87e61f9379ddd2 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 13 Jun 2021 17:23:18 +0200
Subject: [PATCH 315/524] Bump version to 3.1.0

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c09f921..427219e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.1.0 - 2021-06-13
+
 If your config file doesn't do weird things with transforms, it should continue
 to work. If your `-re->` arrows behave weirdly, try replacing them with
 `-exact-re->` arrows. If you're on Windows, you might need to switch from `\`
diff --git a/PFERD/version.py b/PFERD/version.py
index 2aae99d..8ce7ae4 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.0.1"
+VERSION = "3.1.0"

From 80eeb8fe97e28437dcce0e148ffba202fde6a156 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 1 Jul 2021 11:01:55 +0200
Subject: [PATCH 316/524] Add --skip option

---
 PFERD/__main__.py   |  2 +-
 PFERD/cli/parser.py |  8 ++++++++
 PFERD/pferd.py      | 24 +++++++++++++++++++-----
 3 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index b274b6b..b665feb 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -116,7 +116,7 @@ def main() -> None:
         sys.exit()
 
     try:
-        pferd = Pferd(config, args.crawler)
+        pferd = Pferd(config, args.crawler, args.skip)
     except PferdLoadError as e:
         log.unlock()
         log.error(str(e))
diff --git a/PFERD/cli/parser.py b/PFERD/cli/parser.py
index f5fb215..e753023 100644
--- a/PFERD/cli/parser.py
+++ b/PFERD/cli/parser.py
@@ -181,6 +181,14 @@ PARSER.add_argument(
     help="only execute a single crawler."
     " Can be specified multiple times to execute multiple crawlers"
 )
+PARSER.add_argument(
+    "--skip", "-S",
+    action="append",
+    type=str,
+    metavar="NAME",
+    help="don't execute this particular crawler."
+    " Can be specified multiple times to skip multiple crawlers"
+)
 PARSER.add_argument(
     "--working-dir",
     type=Path,
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index d98b426..726ed45 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -15,13 +15,13 @@ class PferdLoadError(Exception):
 
 
 class Pferd:
-    def __init__(self, config: Config, cli_crawlers: Optional[List[str]]):
+    def __init__(self, config: Config, cli_crawlers: Optional[List[str]], cli_skips: Optional[List[str]]):
         """
         May throw PferdLoadError.
         """
 
         self._config = config
-        self._crawlers_to_run = self._find_crawlers_to_run(config, cli_crawlers)
+        self._crawlers_to_run = self._find_crawlers_to_run(config, cli_crawlers, cli_skips)
 
         self._authenticators: Dict[str, Authenticator] = {}
         self._crawlers: Dict[str, Crawler] = {}
@@ -65,16 +65,30 @@ class Pferd:
 
         return crawlers_to_run
 
-    def _find_crawlers_to_run(self, config: Config, cli_crawlers: Optional[List[str]]) -> List[str]:
+    def _find_crawlers_to_run(
+            self,
+            config: Config,
+            cli_crawlers: Optional[List[str]],
+            cli_skips: Optional[List[str]],
+    ) -> List[str]:
         log.explain_topic("Deciding which crawlers to run")
 
+        crawlers: List[str]
         if cli_crawlers is None:
             log.explain("No crawlers specified on CLI")
             log.explain("Running crawlers specified in config")
-            return self._find_config_crawlers(config)
+            crawlers = self._find_config_crawlers(config)
         else:
             log.explain("Crawlers specified on CLI")
-            return self._find_cli_crawlers(config, cli_crawlers)
+            crawlers = self._find_cli_crawlers(config, cli_crawlers)
+
+        skips = {f"crawl:{name}" for name in cli_skips} if cli_skips else set()
+        for crawler in crawlers:
+            if crawler in skips:
+                log.explain(f"Skipping crawler {crawler!r}")
+        crawlers = [crawler for crawler in crawlers if crawler not in skips]
+
+        return crawlers
 
     def _load_authenticators(self) -> None:
         for name, section in self._config.auth_sections():

From 9ffd6033575ed0ed603663e60bd00b8adb5b8295 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 1 Jul 2021 11:14:50 +0200
Subject: [PATCH 317/524] Error when using multiple segments with -name->

Previously, PFERD just silently never matched the -name-> arrow. Now, it errors
when loading the config file.
---
 PFERD/transformer.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index a37443a..1a56e27 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -41,9 +41,11 @@ TransformResult = Optional[Union[Transformed, Ignored]]
 @dataclass
 class Rule:
     left: str
+    left_index: int
     name: str
     head: ArrowHead
     right: RightSide
+    right_index: int
 
     def right_result(self, path: PurePath) -> Union[str, Transformed, Ignored]:
         if isinstance(self.right, str):
@@ -345,6 +347,7 @@ def parse_eol(line: Line) -> None:
 
 def parse_rule(line: Line) -> Rule:
     parse_zero_or_more_spaces(line)
+    left_index = line.index
     left = parse_left(line)
 
     parse_one_or_more_spaces(line)
@@ -354,19 +357,19 @@ def parse_rule(line: Line) -> Rule:
     line.expect("-")
     head = parse_arrow_head(line)
 
-    index = line.index
+    right_index = line.index
     right: RightSide
     try:
         parse_zero_or_more_spaces(line)
         parse_eol(line)
         right = Empty()
     except RuleParseError:
-        line.index = index
+        line.index = right_index
         parse_one_or_more_spaces(line)
         right = parse_right(line)
         parse_eol(line)
 
-    return Rule(left, name, head, right)
+    return Rule(left, left_index, name, head, right, right_index)
 
 
 def parse_transformation(line: Line) -> Transformation:
@@ -377,6 +380,9 @@ def parse_transformation(line: Line) -> Transformation:
     elif rule.name == "exact":
         return ExactTf(rule)
     elif rule.name == "name":
+        if len(PurePath(rule.left).parts) > 1:
+            line.index = rule.left_index
+            raise RuleParseError(line, "Expected name, not multiple segments")
         return RenamingPartsTf(ExactTf(rule))
     elif rule.name == "re":
         return RenamingParentsTf(ExactReTf(rule))

From 91200f3684973f40d6409ce38368eceb6e73da0f Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 3 Jul 2021 12:07:18 +0200
Subject: [PATCH 318/524] Fix nondeterministic name deduplication

---
 PFERD/crawl/crawler.py                     |   8 +-
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 145 +++++++++++++--------
 2 files changed, 93 insertions(+), 60 deletions(-)

diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index d61783f..d798bc3 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -56,7 +56,7 @@ def noncritical(f: Wrapped) -> Wrapped:
     return wrapper  # type: ignore
 
 
-AWrapped = TypeVar("AWrapped", bound=Callable[..., Awaitable[None]])
+AWrapped = TypeVar("AWrapped", bound=Callable[..., Awaitable[Optional[Any]]])
 
 
 def anoncritical(f: AWrapped) -> AWrapped:
@@ -72,14 +72,14 @@ def anoncritical(f: AWrapped) -> AWrapped:
     Warning: Must only be applied to member functions of the Crawler class!
     """
 
-    async def wrapper(*args: Any, **kwargs: Any) -> None:
+    async def wrapper(*args: Any, **kwargs: Any) -> Optional[Any]:
         if not (args and isinstance(args[0], Crawler)):
             raise RuntimeError("@anoncritical must only applied to Crawler methods")
 
         crawler = args[0]
 
         try:
-            await f(*args, **kwargs)
+            return await f(*args, **kwargs)
         except (CrawlWarning, OutputDirError, MarkDuplicateError, MarkConflictError) as e:
             log.warn(str(e))
             crawler.error_free = False
@@ -87,6 +87,8 @@ def anoncritical(f: AWrapped) -> AWrapped:
             crawler.error_free = False
             raise
 
+        return None
+
     return wrapper  # type: ignore
 
 
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 41c301c..a61eb4e 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -12,7 +12,7 @@ from ...config import Config
 from ...logging import ProgressBar, log
 from ...output_dir import FileSink, Redownload
 from ...utils import fmt_path, soupify, url_set_query_param
-from ..crawler import CrawlError, CrawlWarning, anoncritical
+from ..crawler import CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
 from ..http_crawler import HttpCrawler, HttpCrawlerSection
 from .file_templates import Links
 from .kit_ilias_html import IliasElementType, IliasPage, IliasPageElement
@@ -81,17 +81,16 @@ _VIDEO_ELEMENTS: Set[IliasElementType] = set([
     IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED,
 ])
 
-AWrapped = TypeVar("AWrapped", bound=Callable[..., Awaitable[None]])
+AWrapped = TypeVar("AWrapped", bound=Callable[..., Awaitable[Optional[Any]]])
 
 
 def _iorepeat(attempts: int, name: str) -> Callable[[AWrapped], AWrapped]:
     def decorator(f: AWrapped) -> AWrapped:
-        async def wrapper(*args: Any, **kwargs: Any) -> None:
+        async def wrapper(*args: Any, **kwargs: Any) -> Optional[Any]:
             last_exception: Optional[BaseException] = None
             for round in range(attempts):
                 try:
-                    await f(*args, **kwargs)
-                    return
+                    return await f(*args, **kwargs)
                 except aiohttp.ContentTypeError:  # invalid content type
                     raise CrawlWarning("ILIAS returned an invalid content type")
                 except aiohttp.TooManyRedirects:
@@ -230,17 +229,33 @@ instance's greatest bottleneck.
 
         # Fill up our task list with the found elements
         await gather_elements()
-        tasks = [self._handle_ilias_element(PurePath("."), element) for element in elements]
+
+        tasks: List[Awaitable[None]] = []
+        for element in elements:
+            if handle := await self._handle_ilias_element(PurePath("."), element):
+                tasks.append(asyncio.create_task(handle))
 
         # And execute them
         await self.gather(tasks)
 
-    async def _handle_ilias_page(self, url: str, parent: IliasPageElement, path: PurePath) -> None:
+    async def _handle_ilias_page(
+        self,
+        url: str,
+        parent: IliasPageElement,
+        path: PurePath,
+    ) -> Optional[Awaitable[None]]:
         maybe_cl = await self.crawl(path)
         if not maybe_cl:
-            return
-        cl = maybe_cl  # Not mypy's fault, but explained here: https://github.com/python/mypy/issues/2608
+            return None
+        return self._crawl_ilias_page(url, parent, path, maybe_cl)
 
+    async def _crawl_ilias_page(
+        self,
+        url: str,
+        parent: IliasPageElement,
+        path: PurePath,
+        cl: CrawlToken,
+    ) -> None:
         elements: List[IliasPageElement] = []
 
         @_iorepeat(3, "crawling folder")
@@ -265,7 +280,11 @@ instance's greatest bottleneck.
 
         # Fill up our task list with the found elements
         await gather_elements()
-        tasks = [self._handle_ilias_element(cl.path, element) for element in elements]
+
+        tasks: List[Awaitable[None]] = []
+        for element in elements:
+            if handle := await self._handle_ilias_element(cl.path, element):
+                tasks.append(asyncio.create_task(handle))
 
         # And execute them
         await self.gather(tasks)
@@ -274,7 +293,11 @@ instance's greatest bottleneck.
     # Shouldn't happen but we also really don't want to let I/O errors bubble up to anoncritical.
     # If that happens we will be terminated as anoncritical doesn't tream them as non-critical.
     @_wrap_io_in_warning("handling ilias element")
-    async def _handle_ilias_element(self, parent_path: PurePath, element: IliasPageElement) -> None:
+    async def _handle_ilias_element(
+        self,
+        parent_path: PurePath,
+        element: IliasPageElement,
+    ) -> Optional[Awaitable[None]]:
         element_path = PurePath(parent_path, element.name)
 
         if element.type in _VIDEO_ELEMENTS:
@@ -282,35 +305,41 @@ instance's greatest bottleneck.
             if not self._videos:
                 log.explain("Video crawling is disabled")
                 log.explain("Answer: no")
-                return
+                return None
             else:
                 log.explain("Video crawling is enabled")
                 log.explain("Answer: yes")
 
         if element.type == IliasElementType.FILE:
-            await self._download_file(element, element_path)
+            return await self._handle_file(element, element_path)
         elif element.type == IliasElementType.FORUM:
             log.explain_topic(f"Decision: Crawl {fmt_path(element_path)}")
             log.explain("Forums are not supported")
             log.explain("Answer: No")
+            return None
         elif element.type == IliasElementType.TEST:
             log.explain_topic(f"Decision: Crawl {fmt_path(element_path)}")
             log.explain("Tests contain no relevant files")
             log.explain("Answer: No")
+            return None
         elif element.type == IliasElementType.LINK:
-            await self._download_link(element, element_path)
+            return await self._handle_link(element, element_path)
         elif element.type == IliasElementType.VIDEO:
-            await self._download_file(element, element_path)
+            return await self._handle_file(element, element_path)
         elif element.type == IliasElementType.VIDEO_PLAYER:
-            await self._download_video(element, element_path)
+            return await self._handle_video(element, element_path)
         elif element.type in _DIRECTORY_PAGES:
-            await self._handle_ilias_page(element.url, element, element_path)
+            return await self._handle_ilias_page(element.url, element, element_path)
         else:
             # This will retry it a few times, failing everytime. It doesn't make any network
             # requests, so that's fine.
             raise CrawlWarning(f"Unknown element type: {element.type!r}")
 
-    async def _download_link(self, element: IliasPageElement, element_path: PurePath) -> None:
+    async def _handle_link(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Awaitable[None]]:
         log.explain_topic(f"Decision: Crawl Link {fmt_path(element_path)}")
         log.explain(f"Links type is {self._links}")
 
@@ -318,32 +347,30 @@ instance's greatest bottleneck.
         link_extension = self._links.extension()
         if not link_template_maybe or not link_extension:
             log.explain("Answer: No")
-            return
+            return None
         else:
             log.explain("Answer: Yes")
-        link_template = link_template_maybe
         element_path = element_path.with_name(element_path.name + link_extension)
 
         maybe_dl = await self.download(element_path, mtime=element.mtime)
         if not maybe_dl:
-            return
-        dl = maybe_dl  # Not mypy's fault, but explained here: https://github.com/python/mypy/issues/2608
+            return None
 
-        @_iorepeat(3, "resolving link")
-        async def impl() -> None:
-            async with dl as (bar, sink):
-                export_url = element.url.replace("cmd=calldirectlink", "cmd=exportHTML")
-                real_url = await self._resolve_link_target(export_url)
+        return self._download_link(element, link_template_maybe, maybe_dl)
 
-                content = link_template
-                content = content.replace("{{link}}", real_url)
-                content = content.replace("{{name}}", element.name)
-                content = content.replace("{{description}}", str(element.description))
-                content = content.replace("{{redirect_delay}}", str(self._link_file_redirect_delay))
-                sink.file.write(content.encode("utf-8"))
-                sink.done()
+    @_iorepeat(3, "resolving link")
+    async def _download_link(self, element: IliasPageElement, link_template: str, dl: DownloadToken) -> None:
+        async with dl as (bar, sink):
+            export_url = element.url.replace("cmd=calldirectlink", "cmd=exportHTML")
+            real_url = await self._resolve_link_target(export_url)
 
-        await impl()
+            content = link_template
+            content = content.replace("{{link}}", real_url)
+            content = content.replace("{{name}}", element.name)
+            content = content.replace("{{description}}", str(element.description))
+            content = content.replace("{{redirect_delay}}", str(self._link_file_redirect_delay))
+            sink.file.write(content.encode("utf-8"))
+            sink.done()
 
     async def _resolve_link_target(self, export_url: str) -> str:
         async with self.session.get(export_url, allow_redirects=False) as resp:
@@ -360,39 +387,43 @@ instance's greatest bottleneck.
 
         raise CrawlError("resolve_link_target failed even after authenticating")
 
-    async def _download_video(self, element: IliasPageElement, element_path: PurePath) -> None:
+    async def _handle_video(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Awaitable[None]]:
         # Videos will NOT be redownloaded - their content doesn't really change and they are chunky
         maybe_dl = await self.download(element_path, mtime=element.mtime, redownload=Redownload.NEVER)
         if not maybe_dl:
-            return
-        dl = maybe_dl  # Not mypy's fault, but explained here: https://github.com/python/mypy/issues/2608
+            return None
 
-        @_iorepeat(3, "downloading video")
-        async def impl() -> None:
-            assert dl  # The function is only reached when dl is not None
-            async with dl as (bar, sink):
-                page = IliasPage(await self._get_page(element.url), element.url, element)
-                real_element = page.get_child_elements()[0]
+        return self._download_video(element, maybe_dl)
 
-                log.explain(f"Streaming video from real url {real_element.url}")
+    @_iorepeat(3, "downloading video")
+    async def _download_video(self, element: IliasPageElement, dl: DownloadToken) -> None:
+        async with dl as (bar, sink):
+            page = IliasPage(await self._get_page(element.url), element.url, element)
+            real_element = page.get_child_elements()[0]
 
-                await self._stream_from_url(real_element.url, sink, bar, is_video=True)
+            log.explain(f"Streaming video from real url {real_element.url}")
 
-        await impl()
+            await self._stream_from_url(real_element.url, sink, bar, is_video=True)
 
-    async def _download_file(self, element: IliasPageElement, element_path: PurePath) -> None:
+    async def _handle_file(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Awaitable[None]]:
         maybe_dl = await self.download(element_path, mtime=element.mtime)
         if not maybe_dl:
-            return
-        dl = maybe_dl  # Not mypy's fault, but explained here: https://github.com/python/mypy/issues/2608
+            return None
+        return self._download_file(element, maybe_dl)
 
-        @_iorepeat(3, "downloading file")
-        async def impl() -> None:
-            assert dl  # The function is only reached when dl is not None
-            async with dl as (bar, sink):
-                await self._stream_from_url(element.url, sink, bar, is_video=False)
-
-        await impl()
+    @_iorepeat(3, "downloading file")
+    async def _download_file(self, element: IliasPageElement, dl: DownloadToken) -> None:
+        assert dl  # The function is only reached when dl is not None
+        async with dl as (bar, sink):
+            await self._stream_from_url(element.url, sink, bar, is_video=False)
 
     async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar, is_video: bool) -> None:
         async def try_stream() -> bool:

From 89be07d4d3562c75f10539c7a51c171933d3de82 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 3 Jul 2021 17:05:48 +0200
Subject: [PATCH 319/524] Use final crawl path in HTML parsing message

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index a61eb4e..83cac32 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -247,13 +247,12 @@ instance's greatest bottleneck.
         maybe_cl = await self.crawl(path)
         if not maybe_cl:
             return None
-        return self._crawl_ilias_page(url, parent, path, maybe_cl)
+        return self._crawl_ilias_page(url, parent, maybe_cl)
 
     async def _crawl_ilias_page(
         self,
         url: str,
         parent: IliasPageElement,
-        path: PurePath,
         cl: CrawlToken,
     ) -> None:
         elements: List[IliasPageElement] = []
@@ -267,7 +266,7 @@ instance's greatest bottleneck.
 
                 while next_stage_url:
                     soup = await self._get_page(next_stage_url)
-                    log.explain_topic(f"Parsing HTML page for {fmt_path(path)}")
+                    log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
                     log.explain(f"URL: {next_stage_url}")
                     page = IliasPage(soup, next_stage_url, current_parent)
                     if next_element := page.get_next_stage_element():

From 8ec3f41251cf69a365c9009400e67d539bb4afc4 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 6 Jul 2021 16:13:23 +0200
Subject: [PATCH 320/524] Crawl ilias booking objects as links

---
 PFERD/crawl/ilias/kit_ilias_html.py        |  4 ++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 58 +++++++++++++++++++---
 2 files changed, 55 insertions(+), 7 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 41f45e2..247002b 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -22,6 +22,7 @@ class IliasElementType(Enum):
     FOLDER = "folder"
     FORUM = "forum"
     LINK = "link"
+    BOOKING = "booking"
     MEETING = "meeting"
     VIDEO = "video"
     VIDEO_PLAYER = "video_player"
@@ -490,6 +491,9 @@ class IliasPage:
         if str(img_tag["src"]).endswith("icon_webr.svg"):
             return IliasElementType.LINK
 
+        if str(img_tag["src"]).endswith("icon_book.svg"):
+            return IliasElementType.BOOKING
+
         if str(img_tag["src"]).endswith("frm.svg"):
             return IliasElementType.FORUM
 
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 83cac32..a0e323b 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -323,6 +323,8 @@ instance's greatest bottleneck.
             return None
         elif element.type == IliasElementType.LINK:
             return await self._handle_link(element, element_path)
+        elif element.type == IliasElementType.BOOKING:
+            return await self._handle_booking(element, element_path)
         elif element.type == IliasElementType.VIDEO:
             return await self._handle_file(element, element_path)
         elif element.type == IliasElementType.VIDEO_PLAYER:
@@ -362,14 +364,56 @@ instance's greatest bottleneck.
         async with dl as (bar, sink):
             export_url = element.url.replace("cmd=calldirectlink", "cmd=exportHTML")
             real_url = await self._resolve_link_target(export_url)
+            self._write_link_content(link_template, real_url, element.name, element.description, sink)
 
-            content = link_template
-            content = content.replace("{{link}}", real_url)
-            content = content.replace("{{name}}", element.name)
-            content = content.replace("{{description}}", str(element.description))
-            content = content.replace("{{redirect_delay}}", str(self._link_file_redirect_delay))
-            sink.file.write(content.encode("utf-8"))
-            sink.done()
+    def _write_link_content(
+        self,
+        link_template: str,
+        url: str,
+        name: str,
+        description: Optional[str],
+        sink: FileSink,
+    ) -> None:
+        content = link_template
+        content = content.replace("{{link}}", url)
+        content = content.replace("{{name}}", name)
+        content = content.replace("{{description}}", str(description))
+        content = content.replace("{{redirect_delay}}", str(self._link_file_redirect_delay))
+        sink.file.write(content.encode("utf-8"))
+        sink.done()
+
+    async def _handle_booking(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Awaitable[None]]:
+        log.explain_topic(f"Decision: Crawl Booking Link {fmt_path(element_path)}")
+        log.explain(f"Links type is {self._links}")
+
+        link_template_maybe = self._links.template()
+        link_extension = self._links.extension()
+        if not link_template_maybe or not link_extension:
+            log.explain("Answer: No")
+            return None
+        else:
+            log.explain("Answer: Yes")
+        element_path = element_path.with_name(element_path.name + link_extension)
+
+        maybe_dl = await self.download(element_path, mtime=element.mtime)
+        if not maybe_dl:
+            return None
+
+        return self._download_booking(element, link_template_maybe, maybe_dl)
+
+    @_iorepeat(3, "resolving booking")
+    async def _download_booking(
+        self,
+        element: IliasPageElement,
+        link_template: str,
+        dl: DownloadToken,
+    ) -> None:
+        async with dl as (bar, sink):
+            self._write_link_content(link_template, element.url, element.name, element.description, sink)
 
     async def _resolve_link_target(self, export_url: str) -> str:
         async with self.session.get(export_url, allow_redirects=False) as resp:

From ee67f9f4725be9f418d66b85bb8a749de8e5d713 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 6 Jul 2021 17:45:12 +0200
Subject: [PATCH 321/524] Sort elements by ILIAS id to ensure deterministic
 ordering

---
 PFERD/crawl/ilias/kit_ilias_html.py        | 11 +++++++++++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py |  4 ++++
 2 files changed, 15 insertions(+)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 247002b..7e91926 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -38,6 +38,17 @@ class IliasPageElement:
     mtime: Optional[datetime] = None
     description: Optional[str] = None
 
+    def id(self) -> str:
+        regexes = [r"eid=(?P<id>[0-9a-z\-]+)", r"file_(?P<id>\d+)", r"ref_id=(?P<id>\d+)"]
+
+        for regex in regexes:
+            if match := re.search(regex, self.url):
+                return match.groupdict()["id"]
+
+        # Fall back to URL
+        log.warn(f"Didn't find identity for {self.name} - {self.url}. Please report this.")
+        return self.url
+
 
 class IliasPage:
 
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index a0e323b..cca6987 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -230,6 +230,8 @@ instance's greatest bottleneck.
         # Fill up our task list with the found elements
         await gather_elements()
 
+        elements.sort(key=lambda e: e.id())
+
         tasks: List[Awaitable[None]] = []
         for element in elements:
             if handle := await self._handle_ilias_element(PurePath("."), element):
@@ -280,6 +282,8 @@ instance's greatest bottleneck.
         # Fill up our task list with the found elements
         await gather_elements()
 
+        elements.sort(key=lambda e: e.id())
+
         tasks: List[Awaitable[None]] = []
         for element in elements:
             if handle := await self._handle_ilias_element(cl.path, element):

From 86f79ff1f137f6f728df08a51b12acb096e00979 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 7 Jul 2021 14:26:20 +0200
Subject: [PATCH 322/524] Update changelog

---
 CHANGELOG.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 427219e..20dd53c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,19 @@ ambiguous situations.
 
 ## Unreleased
 
+### Added
+- `--skip` command line option
+- Support for ILIAS booking objects
+
+### Changed
+- Using multiple path segments on left side of `-name->` now results in an
+  error. This was already forbidden by the documentation but silently accepted
+  by PFERD.
+- More consistent path printing in some `--explain` messages
+
+### Fixed
+- Nondeterministic name deduplication due to ILIAS reordering elements
+
 ## 3.1.0 - 2021-06-13
 
 If your config file doesn't do weird things with transforms, it should continue

From 544d45cbc570080964ab50044301b304343f9a31 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 13 Jul 2021 15:42:11 +0200
Subject: [PATCH 323/524] Catch non-critical exceptions at crawler top level

---
 CHANGELOG.md           | 1 +
 PFERD/crawl/crawler.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 20dd53c..181ef99 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -34,6 +34,7 @@ ambiguous situations.
 
 ### Fixed
 - Nondeterministic name deduplication due to ILIAS reordering elements
+- More exceptions are handled properly
 
 ## 3.1.0 - 2021-06-13
 
diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index d798bc3..c492ee9 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -320,6 +320,7 @@ class Crawler(ABC):
             log.explain("Warnings or errors occurred during this run")
             log.explain("Answer: No")
 
+    @anoncritical
     async def run(self) -> None:
         """
         Start the crawling process. Call this function if you want to use a

From 742632ed8d6cebd10c7e28902afba2fccb108712 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 4 Aug 2021 18:27:26 +0000
Subject: [PATCH 324/524] Bump version to 3.2.0

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 181ef99..1ac3a8d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.2.0 - 2021-08-04
+
 ### Added
 - `--skip` command line option
 - Support for ILIAS booking objects
diff --git a/PFERD/version.py b/PFERD/version.py
index 8ce7ae4..b8efadd 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.1.0"
+VERSION = "3.2.0"

From 66730773977a2602aebd5396efc1c6d8bd7b0ad7 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 21 Oct 2021 12:01:41 +0200
Subject: [PATCH 325/524] Add kit-ipd crawler

---
 CHANGELOG.md                   |   1 +
 CONFIG.md                      |   7 ++
 PFERD/cli/__init__.py          |   1 +
 PFERD/cli/command_kit_ipd.py   |  46 +++++++++++
 PFERD/crawl/__init__.py        |   3 +
 PFERD/crawl/kit_ipd_crawler.py | 138 +++++++++++++++++++++++++++++++++
 6 files changed, 196 insertions(+)
 create mode 100644 PFERD/cli/command_kit_ipd.py
 create mode 100644 PFERD/crawl/kit_ipd_crawler.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1ac3a8d..cca4839 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,7 @@ ambiguous situations.
 ### Added
 - `--skip` command line option
 - Support for ILIAS booking objects
+- A KIT IPD crawler
 
 ### Changed
 - Using multiple path segments on left side of `-name->` now results in an
diff --git a/CONFIG.md b/CONFIG.md
index 19afbd2..06b9246 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -136,6 +136,13 @@ crawler simulate a slower, network-based crawler.
   requests. (Default: `0.0`)
 - `download_speed`: Download speed (in bytes per second) to simulate. (Optional)
 
+### The `kit-ipd` crawler
+
+This crawler crals a KIT ipd page by url. The root page can be crawled from
+outside the KIT network so you will be informed about any new/deleted files,
+but downloading files requires you to be within. Adding a show delay between
+requests is likely a good idea.
+
 ### The `kit-ilias-web` crawler
 
 This crawler crawls the KIT ILIAS instance.
diff --git a/PFERD/cli/__init__.py b/PFERD/cli/__init__.py
index d70ecd9..efa8f00 100644
--- a/PFERD/cli/__init__.py
+++ b/PFERD/cli/__init__.py
@@ -9,4 +9,5 @@
 
 from . import command_local  # noqa: F401 imported but unused
 from . import command_kit_ilias_web  # noqa: F401 imported but unused
+from . import command_kit_ipd  # noqa: F401 imported but unused
 from .parser import PARSER, ParserLoadError, load_default_section  # noqa: F401 imported but unused
diff --git a/PFERD/cli/command_kit_ipd.py b/PFERD/cli/command_kit_ipd.py
new file mode 100644
index 0000000..480cc9b
--- /dev/null
+++ b/PFERD/cli/command_kit_ipd.py
@@ -0,0 +1,46 @@
+import argparse
+import configparser
+from pathlib import Path
+
+from ..logging import log
+from .parser import CRAWLER_PARSER, SUBPARSERS, load_crawler
+
+SUBPARSER = SUBPARSERS.add_parser(
+    "kit-ipd",
+    parents=[CRAWLER_PARSER],
+)
+
+GROUP = SUBPARSER.add_argument_group(
+    title="kit ipd crawler arguments",
+    description="arguments for the 'kit-ipd' crawler",
+)
+GROUP.add_argument(
+    "target",
+    type=str,
+    metavar="TARGET",
+    help="url to crawl"
+)
+GROUP.add_argument(
+    "output",
+    type=Path,
+    metavar="OUTPUT",
+    help="output directory"
+)
+
+
+def load(
+        args: argparse.Namespace,
+        parser: configparser.ConfigParser,
+) -> None:
+    log.explain("Creating config for command 'kit-ipd'")
+
+    parser["crawl:kit-ipd"] = {}
+    section = parser["crawl:ipd"]
+    load_crawler(args, section)
+
+    section["type"] = "kit-ipd"
+    section["target"] = str(args.target)
+    section["output_dir"] = str(args.output)
+
+
+SUBPARSER.set_defaults(command=load)
diff --git a/PFERD/crawl/__init__.py b/PFERD/crawl/__init__.py
index 7eb2fb1..1f8bd59 100644
--- a/PFERD/crawl/__init__.py
+++ b/PFERD/crawl/__init__.py
@@ -5,6 +5,7 @@ from ..auth import Authenticator
 from ..config import Config
 from .crawler import Crawler, CrawlError, CrawlerSection  # noqa: F401
 from .ilias import KitIliasWebCrawler, KitIliasWebCrawlerSection
+from .kit_ipd_crawler import KitIpdCrawler, KitIpdCrawlerSection
 from .local_crawler import LocalCrawler, LocalCrawlerSection
 
 CrawlerConstructor = Callable[[
@@ -19,4 +20,6 @@ CRAWLERS: Dict[str, CrawlerConstructor] = {
         LocalCrawler(n, LocalCrawlerSection(s), c),
     "kit-ilias-web": lambda n, s, c, a:
         KitIliasWebCrawler(n, KitIliasWebCrawlerSection(s), c, a),
+    "kit-ipd": lambda n, s, c, a:
+        KitIpdCrawler(n, KitIpdCrawlerSection(s), c),
 }
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
new file mode 100644
index 0000000..4d4addd
--- /dev/null
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -0,0 +1,138 @@
+import os
+from dataclasses import dataclass
+from pathlib import PurePath
+from typing import List, Set, Union
+from urllib.parse import urljoin
+
+from bs4 import BeautifulSoup, Tag
+
+from ..config import Config
+from ..logging import ProgressBar, log
+from ..output_dir import FileSink
+from ..utils import soupify
+from .crawler import CrawlError
+from .http_crawler import HttpCrawler, HttpCrawlerSection
+
+
+class KitIpdCrawlerSection(HttpCrawlerSection):
+    def target(self) -> str:
+        target = self.s.get("target")
+        if not target:
+            self.missing_value("target")
+
+        if not target.startswith("https://"):
+            self.invalid_value("target", target, "Should be a URL")
+
+        return target
+
+
+@dataclass
+class KitIpdFile:
+    name: str
+    url: str
+
+
+@dataclass
+class KitIpdFolder:
+    name: str
+    files: List[KitIpdFile]
+
+
+class KitIpdCrawler(HttpCrawler):
+
+    def __init__(
+            self,
+            name: str,
+            section: KitIpdCrawlerSection,
+            config: Config,
+    ):
+        super().__init__(name, section, config)
+        self._url = section.target()
+
+    async def _run(self) -> None:
+        maybe_cl = await self.crawl(PurePath("."))
+        if not maybe_cl:
+            return
+
+        folders: List[KitIpdFolder] = []
+
+        async with maybe_cl:
+            folder_tags = await self._fetch_folder_tags()
+            folders = [self._extract_folder(tag) for tag in folder_tags]
+
+        tasks = [self._crawl_folder(folder) for folder in folders]
+
+        await self.gather(tasks)
+
+    async def _crawl_folder(self, folder: KitIpdFolder) -> None:
+        path = PurePath(folder.name)
+        if not await self.crawl(path):
+            return
+
+        tasks = [self._download_file(path, file) for file in folder.files]
+
+        await self.gather(tasks)
+
+    async def _download_file(self, parent: PurePath, file: KitIpdFile) -> None:
+        element_path = parent / file.name
+        maybe_dl = await self.download(element_path)
+        if not maybe_dl:
+            return
+
+        async with maybe_dl as (bar, sink):
+            await self._stream_from_url(file.url, sink, bar)
+
+    async def _fetch_folder_tags(self) -> Set[Tag]:
+        page = await self.get_page()
+        elements: List[Tag] = self._find_file_links(page)
+        folder_tags: Set[Tag] = set()
+
+        for element in elements:
+            enclosing_data: Tag = element.findParent(name="td")
+            label: Tag = enclosing_data.findPreviousSibling(name="td")
+            folder_tags.add(label)
+
+        return folder_tags
+
+    def _extract_folder(self, folder_tag: Tag) -> KitIpdFolder:
+        name = folder_tag.getText().strip()
+        files: List[KitIpdFile] = []
+
+        container: Tag = folder_tag.findNextSibling(name="td")
+        for link in self._find_file_links(container):
+            files.append(self._extract_file(link))
+
+        log.explain_topic(f"Found folder {name!r}")
+        for file in files:
+            log.explain(f"Found file {file.name!r}")
+
+        return KitIpdFolder(name, files)
+
+    def _extract_file(self, link: Tag) -> KitIpdFile:
+        name = link.getText().strip()
+        url = self._abs_url_from_link(link)
+        _, extension = os.path.splitext(url)
+        return KitIpdFile(name + extension, url)
+
+    def _find_file_links(self, tag: Union[Tag, BeautifulSoup]) -> List[Tag]:
+        return tag.findAll(name="a", attrs={"href": lambda x: x and "intern" in x})
+
+    def _abs_url_from_link(self, link_tag: Tag) -> str:
+        return urljoin(self._url, link_tag.get("href"))
+
+    async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar) -> None:
+        async with self.session.get(url, allow_redirects=False) as resp:
+            if resp.status == 403:
+                raise CrawlError("Received a 403. Are you within the KIT network/VPN?")
+            if resp.content_length:
+                bar.set_total(resp.content_length)
+
+            async for data in resp.content.iter_chunked(1024):
+                sink.file.write(data)
+                bar.advance(len(data))
+
+            sink.done()
+
+    async def get_page(self) -> BeautifulSoup:
+        async with self.session.get(self._url) as request:
+            return soupify(await request.read())

From fee12b3d9e8469d37b972f28d84a7d44538744bc Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Mon, 25 Oct 2021 17:44:12 +0000
Subject: [PATCH 326/524] Fix changelog

---
 CHANGELOG.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cca4839..522d96d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,12 +22,14 @@ ambiguous situations.
 
 ## Unreleased
 
+### Added
+- A KIT IPD crawler
+
 ## 3.2.0 - 2021-08-04
 
 ### Added
 - `--skip` command line option
 - Support for ILIAS booking objects
-- A KIT IPD crawler
 
 ### Changed
 - Using multiple path segments on left side of `-name->` now results in an

From 55ea304ff338f249914b95938675a4e9b07d0875 Mon Sep 17 00:00:00 2001
From: lukasprobst <lukas.probst@student.kit.edu>
Date: Mon, 25 Oct 2021 22:32:54 +0200
Subject: [PATCH 327/524] Disable interpolation of ConfigParser

---
 CHANGELOG.md      | 3 +++
 CONFIG.md         | 6 +++---
 LICENSE           | 2 +-
 PFERD/__main__.py | 2 +-
 4 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 522d96d..a90c978 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,9 @@ ambiguous situations.
 ### Added
 - A KIT IPD crawler
 
+### Removed
+- [Interpolation](https://docs.python.org/3/library/configparser.html#interpolation-of-values) in config file
+
 ## 3.2.0 - 2021-08-04
 
 ### Added
diff --git a/CONFIG.md b/CONFIG.md
index 06b9246..4d2ec33 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -4,11 +4,11 @@ A config file consists of sections. A section begins with a `[section]` header,
 which is followed by a list of `key = value` pairs. Comments must be on their
 own line and start with `#`. Multiline values must be indented beyond their key.
 Boolean values can be `yes` or `no`. For more details and some examples on the
-format, see the [configparser documentation][1] ([basic interpolation][2] is
-enabled).
+format, see the [configparser documentation][1] ([interpolation][2] is
+disabled).
 
 [1]: <https://docs.python.org/3/library/configparser.html#supported-ini-file-structure> "Supported INI File Structure"
-[2]: <https://docs.python.org/3/library/configparser.html#configparser.BasicInterpolation> "BasicInterpolation"
+[2]: <https://docs.python.org/3/library/configparser.html#interpolation-of-values> "Interpolation of values"
 
 ## The `DEFAULT` section
 
diff --git a/LICENSE b/LICENSE
index 01f15f5..c096c4a 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-Copyright 2019-2020 Garmelon, I-Al-Istannen, danstooamerican, pavelzw, TheChristophe, Scriptim
+Copyright 2019-2021 Garmelon, I-Al-Istannen, danstooamerican, pavelzw, TheChristophe, Scriptim, thelukasprobst
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index b665feb..bdf5b34 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -15,7 +15,7 @@ from .transformer import RuleParseError
 
 def load_config_parser(args: argparse.Namespace) -> configparser.ConfigParser:
     log.explain_topic("Loading config")
-    parser = configparser.ConfigParser()
+    parser = configparser.ConfigParser(interpolation=None)
 
     if args.command is None:
         log.explain("No CLI command specified, loading config from file")

From ef7d5ea2d3282e71cf0ba82698e409483cc1ad0a Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 30 Oct 2021 18:09:05 +0200
Subject: [PATCH 328/524] Allow storing crawler-specific data in reports

---
 PFERD/report.py | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/PFERD/report.py b/PFERD/report.py
index 919bb35..99a4661 100644
--- a/PFERD/report.py
+++ b/PFERD/report.py
@@ -1,6 +1,6 @@
 import json
 from pathlib import Path, PurePath
-from typing import Any, Dict, List, Set
+from typing import Any, Dict, List, Optional, Set
 
 
 class ReportLoadError(Exception):
@@ -67,6 +67,7 @@ class Report:
         self.deleted_files: Set[PurePath] = set()
         # Files that should have been deleted by the cleanup but weren't
         self.not_deleted_files: Set[PurePath] = set()
+        self.custom: Dict[str, Any] = dict()
 
     @staticmethod
     def _get_list_of_strs(data: Dict[str, Any], key: str) -> List[str]:
@@ -81,6 +82,15 @@ class Report:
 
         return result
 
+    @staticmethod
+    def _get_str_dictionary(data: Dict[str, Any], key: str) -> Dict[str, Any]:
+        result: Dict[str, Any] = data.get(key, {})
+
+        if not isinstance(result, dict):
+            raise ReportLoadError(f"Incorrect format: {key!r} is not a dictionary")
+
+        return result
+
     @classmethod
     def load(cls, path: Path) -> "Report":
         """
@@ -108,6 +118,7 @@ class Report:
             self.delete_file(PurePath(elem))
         for elem in self._get_list_of_strs(data, "not_deleted"):
             self.not_delete_file(PurePath(elem))
+        self.custom = self._get_str_dictionary(data, "custom")
 
         return self
 
@@ -124,6 +135,7 @@ class Report:
             "changed": [str(path) for path in sorted(self.changed_files)],
             "deleted": [str(path) for path in sorted(self.deleted_files)],
             "not_deleted": [str(path) for path in sorted(self.not_deleted_files)],
+            "custom": self.custom
         }
 
         with open(path, "w") as f:
@@ -190,3 +202,15 @@ class Report:
         """
 
         self.not_deleted_files.add(path)
+
+    def add_custom_value(self, key: str, value: Any) -> None:
+        """
+        Adds a custom value under the passed key, overwriting any existing
+        """
+        self.custom[key] = value
+
+    def get_custom_value(self, key: str) -> Optional[Any]:
+        """
+        Retrieves a custom value for the given key.
+        """
+        return self.custom.get(key)

From f9a3f9b9f2702796f64d11d5d649261ea76a908d Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 30 Oct 2021 18:12:29 +0200
Subject: [PATCH 329/524] Handle multi-stream videos

---
 PFERD/crawl/ilias/kit_ilias_html.py        | 18 ++++-
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 86 +++++++++++++++++++---
 2 files changed, 92 insertions(+), 12 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 7e91926..78ae084 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -133,9 +133,21 @@ class IliasPage:
 
         # parse it
         json_object = json.loads(json_str)
-        # and fetch the video url!
-        video_url = json_object["streams"][0]["sources"]["mp4"][0]["src"]
-        return [IliasPageElement(IliasElementType.VIDEO, video_url, self._source_name)]
+        streams = [stream for stream in json_object["streams"] if stream["type"] == "video"]
+
+        # and just fetch the lone video url!
+        if len(streams) == 1:
+            video_url = streams[0]["sources"]["mp4"][0]["src"]
+            return [IliasPageElement(IliasElementType.VIDEO, video_url, self._source_name)]
+
+        log.explain(f"Found multiple videos for stream at {self._source_name}")
+        items = []
+        for stream in sorted(streams, key=lambda stream: stream["content"]):
+            full_name = f"{self._source_name.replace('.mp4', '')} ({stream['content']}).mp4"
+            video_url = stream["sources"]["mp4"][0]["src"]
+            items.append(IliasPageElement(IliasElementType.VIDEO, video_url, full_name))
+
+        return items
 
     def _find_video_entries(self) -> List[IliasPageElement]:
         # ILIAS has three stages for video pages
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index cca6987..f483754 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -1,7 +1,7 @@
 import asyncio
 import re
 from pathlib import PurePath
-from typing import Any, Awaitable, Callable, Dict, List, Optional, Set, TypeVar, Union
+from typing import Any, Awaitable, Callable, Dict, List, Optional, Set, TypeVar, Union, cast
 
 import aiohttp
 from aiohttp import hdrs
@@ -439,22 +439,90 @@ instance's greatest bottleneck.
         element: IliasPageElement,
         element_path: PurePath,
     ) -> Optional[Awaitable[None]]:
-        # Videos will NOT be redownloaded - their content doesn't really change and they are chunky
-        maybe_dl = await self.download(element_path, mtime=element.mtime, redownload=Redownload.NEVER)
-        if not maybe_dl:
+        # Copy old mapping as it is likely still relevant
+        if self.prev_report:
+            self.report.add_custom_value(
+                str(element_path),
+                self.prev_report.get_custom_value(str(element_path))
+            )
+
+        # A video might contain other videos, so let's "crawl" the video first
+        # to ensure rate limits apply. This must be a download as *this token*
+        # is re-used if the video consists of a single stream. In that case the
+        # file name is used and *not* the stream name the ilias html parser reported
+        # to ensure backwards compatibility.
+        maybe_dl = await self.download(element_path, redownload=Redownload.ALWAYS)
+
+        # If we do not want to crawl it (user filter) or we have every file
+        # from the cached mapping already, we can ignore this and bail
+        if not maybe_dl or self._all_videos_locally_present(element_path):
+            # Mark all existing cideos as known so they do not get deleted
+            # during dleanup. We "downloaded" them, just without actually making
+            # a network request as we assumed they did not change.
+            for video in self._previous_contained_videos(element_path):
+                await self.download(video)
+
             return None
 
-        return self._download_video(element, maybe_dl)
+        return self._download_video(element_path, element, maybe_dl)
+
+    def _previous_contained_videos(self, video_path: PurePath) -> List[PurePath]:
+        if not self.prev_report:
+            return []
+        custom_value = self.prev_report.get_custom_value(str(video_path))
+        if not custom_value:
+            return []
+        names = cast(List[str], custom_value)
+        folder = video_path.parent
+        return [PurePath(folder, name) for name in names]
+
+    def _all_videos_locally_present(self, video_path: PurePath) -> bool:
+        if contained_videos := self._previous_contained_videos(video_path):
+            log.explain_topic(f"Checking local cache for video {video_path.name}")
+            all_found_locally = True
+            for video in contained_videos:
+                all_found_locally = all_found_locally and self._output_dir.resolve(video).exists()
+            if all_found_locally:
+                log.explain("Found all videos locally, skipping enumeration request")
+                return True
+            log.explain("Missing at least one video, continuing with requests!")
+        return False
 
     @_iorepeat(3, "downloading video")
-    async def _download_video(self, element: IliasPageElement, dl: DownloadToken) -> None:
+    async def _download_video(
+        self,
+        original_path: PurePath,
+        element: IliasPageElement,
+        dl: DownloadToken
+    ) -> None:
+        stream_elements: List[IliasPageElement] = []
         async with dl as (bar, sink):
             page = IliasPage(await self._get_page(element.url), element.url, element)
-            real_element = page.get_child_elements()[0]
+            stream_elements = page.get_child_elements()
 
-            log.explain(f"Streaming video from real url {real_element.url}")
+            if len(stream_elements) > 1:
+                log.explain(f"Found multiple video streams for {element.name}")
+            else:
+                log.explain(f"Using single video mode for {element.name}")
+                stream_element = stream_elements[0]
+                await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
+                self.report.add_custom_value(str(original_path), [original_path.name])
+                return
 
-            await self._stream_from_url(real_element.url, sink, bar, is_video=True)
+        contained_video_paths: List[str] = []
+
+        for stream_element in stream_elements:
+            contained_video_paths.append(stream_element.name)
+            video_path = original_path.parent / stream_element.name
+
+            maybe_dl = await self.download(video_path, mtime=element.mtime, redownload=Redownload.NEVER)
+            if not maybe_dl:
+                continue
+            async with maybe_dl as (bar, sink):
+                log.explain(f"Streaming video from real url {stream_element.url}")
+                await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
+
+        self.report.add_custom_value(str(original_path), contained_video_paths)
 
     async def _handle_file(
         self,

From e42ab83d32ce852eb26e1a21982399e2988e769a Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 25 Oct 2021 11:07:25 +0200
Subject: [PATCH 330/524] Add support for ILIAS cards

---
 PFERD/crawl/ilias/kit_ilias_html.py | 94 ++++++++++++++++++++++++++++-
 1 file changed, 93 insertions(+), 1 deletion(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 78ae084..d8c347d 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -368,6 +368,8 @@ class IliasPage:
             log.explain(f"Found {element_name!r}")
             result.append(IliasPageElement(element_type, abs_url, element_name, description=description))
 
+        result += self._find_cards()
+
         return result
 
     def _find_upwards_folder_hierarchy(self, tag: Tag) -> List[str]:
@@ -450,6 +452,90 @@ class IliasPage:
         log.explain(f"Found file {full_path!r}")
         return IliasPageElement(IliasElementType.FILE, url, full_path, modification_date)
 
+    def _find_cards(self) -> List[IliasPageElement]:
+        result: List[IliasPageElement] = []
+
+        card_titles: List[Tag] = self._soup.select(".card-title a")
+
+        for title in card_titles:
+            url = self._abs_url_from_link(title)
+            name = _sanitize_path_name(title.getText().strip())
+            type = self._find_type_from_card(title)
+
+            if not type:
+                _unexpected_html_warning()
+                log.warn_contd(f"Could not extract type for {title}")
+                continue
+
+            result.append(IliasPageElement(type, url, name))
+
+        card_button_tiles: List[Tag] = self._soup.select(".card-title button")
+
+        for button in card_button_tiles:
+            regex = re.compile(button["id"] + r".*window.open\(['\"](.+?)['\"]")
+            res = regex.search(str(self._soup))
+            if not res:
+                _unexpected_html_warning()
+                log.warn_contd(f"Could not find click handler target for {button}")
+                continue
+            url = self._abs_url_from_relative(res.group(1))
+            name = _sanitize_path_name(button.getText().strip())
+            type = self._find_type_from_card(button)
+            caption_parent = button.findParent(
+                "div",
+                attrs={"class": lambda x: x and "caption" in x},
+            )
+            description = caption_parent.find_next_sibling("div").getText().strip()
+
+            if not type:
+                _unexpected_html_warning()
+                log.warn_contd(f"Could not extract type for {button}")
+                continue
+
+            result.append(IliasPageElement(type, url, name, description=description))
+
+        return result
+
+    def _find_type_from_card(self, card_title: Tag) -> Optional[IliasElementType]:
+        def is_card_root(element: Tag) -> bool:
+            return "il-card" in element["class"] and "thumbnail" in element["class"]
+
+        card_root: Optional[Tag] = None
+
+        # We look for the card root
+        for parent in card_title.parents:
+            if is_card_root(parent):
+                card_root = parent
+                break
+
+        if card_root is None:
+            _unexpected_html_warning()
+            log.warn_contd(f"Tried to figure out element type, but did not find an icon for {card_title}")
+            return None
+
+        icon: Tag = card_root.select_one(".il-card-repository-head .icon")
+
+        if "opencast" in icon["class"]:
+            return IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED
+        if "exc" in icon["class"]:
+            return IliasElementType.EXERCISE
+        if "webr" in icon["class"]:
+            return IliasElementType.LINK
+        if "book" in icon["class"]:
+            return IliasElementType.BOOKING
+        if "frm" in icon["class"]:
+            return IliasElementType.FORUM
+        if "sess" in icon["class"]:
+            return IliasElementType.MEETING
+        if "tst" in icon["class"]:
+            return IliasElementType.TEST
+        if "fold" in icon["class"]:
+            return IliasElementType.FOLDER
+
+        _unexpected_html_warning()
+        log.warn_contd(f"Could not extract type from {icon} for card title {card_title}")
+        return None
+
     @staticmethod
     def _find_type_from_link(
             element_name: str,
@@ -550,7 +636,13 @@ class IliasPage:
         """
         Create an absolute url from an <a> tag.
         """
-        return urljoin(self._page_url, link_tag.get("href"))
+        return self._abs_url_from_relative(link_tag.get("href"))
+
+    def _abs_url_from_relative(self, relative_url: str) -> str:
+        """
+        Create an absolute url from a relative URL.
+        """
+        return urljoin(self._page_url, relative_url)
 
 
 def _unexpected_html_warning() -> None:

From ad3f4955f72a6bfbdcbaaae24b821f078e6e44d5 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 30 Oct 2021 18:14:39 +0200
Subject: [PATCH 331/524] Update changelog

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a90c978..faa2507 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,8 @@ ambiguous situations.
 
 ### Added
 - A KIT IPD crawler
+- Support for ILIAS cards
+- Support for multi-stream videos
 
 ### Removed
 - [Interpolation](https://docs.python.org/3/library/configparser.html#interpolation-of-values) in config file

From d6f38a61e16fa95d8a2365abc1cfd70f35ee0289 Mon Sep 17 00:00:00 2001
From: Toorero <22551563+Toorero@users.noreply.github.com>
Date: Mon, 25 Oct 2021 21:34:51 +0200
Subject: [PATCH 332/524] Fixed minor spelling mistakes

---
 CONFIG.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index 4d2ec33..8ccaa50 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -36,7 +36,7 @@ Sections whose names start with `crawl:` are used to configure crawlers. The
 rest of the section name specifies the name of the crawler.
 
 A crawler synchronizes a remote resource to a local directory. There are
-different types of crawlers for different kinds of resources, e. g. ILIAS
+different types of crawlers for different kinds of resources, e.g. ILIAS
 courses or lecture websites.
 
 Each crawl section represents an instance of a specific type of crawler. The
@@ -53,7 +53,7 @@ common to all crawlers:
   crawler can still be executed manually using the `--crawler` or `-C` flags.
   (Default: `no`)
 - `output_dir`: The directory the crawler synchronizes files to. A crawler will
-  never place any files outside of this directory. (Default: the crawler's name)
+  never place any files outside this directory. (Default: the crawler's name)
 - `redownload`: When to download a file that is already present locally.
   (Default: `never-smart`)
     - `never`: If a file is present locally, it is not downloaded again.
@@ -138,7 +138,7 @@ crawler simulate a slower, network-based crawler.
 
 ### The `kit-ipd` crawler
 
-This crawler crals a KIT ipd page by url. The root page can be crawled from
+This crawler crawls a KIT ipd page by url. The root page can be crawled from
 outside the KIT network so you will be informed about any new/deleted files,
 but downloading files requires you to be within. Adding a show delay between
 requests is likely a good idea.
@@ -312,11 +312,11 @@ matches `SOURCE`, the output path is created using `TARGET` as template.
 `SOURCE` is automatically anchored.
 
 `TARGET` uses Python's [format string syntax][3]. The *n*-th capturing group can
-be referred to as `{g<n>}` (e. g. `{g3}`). `{g0}` refers to the original path.
+be referred to as `{g<n>}` (e.g. `{g3}`). `{g0}` refers to the original path.
 If capturing group *n*'s contents are a valid integer, the integer value is
-available as `{i<n>}` (e. g. `{i3}`). If capturing group *n*'s contents are a
-valid float, the float value is available as `{f<n>}` (e. g. `{f3}`). If a
-capturing group is not present (e. g. when matching the string `cd` with the
+available as `{i<n>}` (e.g. `{i3}`). If capturing group *n*'s contents are a
+valid float, the float value is available as `{f<n>}` (e.g. `{f3}`). If a
+capturing group is not present (e.g. when matching the string `cd` with the
 regex `(ab)?cd`), the corresponding variables are not defined.
 
 Python's format string syntax has rich options for formatting its arguments. For

From 6b2a65757373193a5ecb8d2263ae7d758178014d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Julius=20R=C3=BCberg?=
 <22551563+Toorero@users.noreply.github.com>
Date: Mon, 1 Nov 2021 10:09:50 +0100
Subject: [PATCH 333/524] Fix IPD crawler for different subpages (#42)

This patch reworks the IPD crawler to support subpages which do not use
"/intern" for links and fetches the folder names from table headings.
---
 PFERD/crawl/kit_ipd_crawler.py | 50 ++++++++++++++++++++++++++--------
 1 file changed, 38 insertions(+), 12 deletions(-)

diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index 4d4addd..1ed5ffe 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -1,7 +1,9 @@
 import os
+import re
 from dataclasses import dataclass
 from pathlib import PurePath
-from typing import List, Set, Union
+from re import Pattern
+from typing import List, Set, Union, AnyStr, Optional
 from urllib.parse import urljoin
 
 from bs4 import BeautifulSoup, Tag
@@ -25,6 +27,10 @@ class KitIpdCrawlerSection(HttpCrawlerSection):
 
         return target
 
+    def link_regex(self) -> Pattern[AnyStr]:
+        regex = self.s.get("link_regex", "^.*/[^/]*\.(?:pdf|zip|c|java)$")
+        return re.compile(regex)
+
 
 @dataclass
 class KitIpdFile:
@@ -48,6 +54,7 @@ class KitIpdCrawler(HttpCrawler):
     ):
         super().__init__(name, section, config)
         self._url = section.target()
+        self._file_regex = section.link_regex()
 
     async def _run(self) -> None:
         maybe_cl = await self.crawl(PurePath("."))
@@ -88,19 +95,28 @@ class KitIpdCrawler(HttpCrawler):
         folder_tags: Set[Tag] = set()
 
         for element in elements:
-            enclosing_data: Tag = element.findParent(name="td")
-            label: Tag = enclosing_data.findPreviousSibling(name="td")
-            folder_tags.add(label)
+            folder_label = self._fetch_folder_label(element)
+            if folder_label is None:
+                folder_tags.add(page)
+            else:
+                folder_tags.add(folder_label)
 
         return folder_tags
 
     def _extract_folder(self, folder_tag: Tag) -> KitIpdFolder:
-        name = folder_tag.getText().strip()
         files: List[KitIpdFile] = []
+        # if files have found outside a regular table
+        if not folder_tag.name.startswith("h"):
+            name = "."
+            root_links = filter(lambda f: self._fetch_folder_label(f) is None, self._find_file_links(folder_tag))
+            for link in root_links:
+                files.append(self._extract_file(link))
 
-        container: Tag = folder_tag.findNextSibling(name="td")
-        for link in self._find_file_links(container):
-            files.append(self._extract_file(link))
+        else:
+            name = folder_tag.getText().strip()
+            container: Tag = folder_tag.findNextSibling(name="table")
+            for link in self._find_file_links(container):
+                files.append(self._extract_file(link))
 
         log.explain_topic(f"Found folder {name!r}")
         for file in files:
@@ -108,14 +124,24 @@ class KitIpdCrawler(HttpCrawler):
 
         return KitIpdFolder(name, files)
 
+    @staticmethod
+    def _fetch_folder_label(file_link: Tag) -> Optional[Tag]:
+        enclosing_table: Tag = file_link.findParent(name="table")
+        if enclosing_table is None:
+            return None
+        label: Tag = enclosing_table.findPreviousSibling(name=re.compile("^h[1-6]$"))
+        if label is None:
+            return None
+        else:
+            return label
+
     def _extract_file(self, link: Tag) -> KitIpdFile:
-        name = link.getText().strip()
         url = self._abs_url_from_link(link)
-        _, extension = os.path.splitext(url)
-        return KitIpdFile(name + extension, url)
+        name = os.path.basename(url)
+        return KitIpdFile(name, url)
 
     def _find_file_links(self, tag: Union[Tag, BeautifulSoup]) -> List[Tag]:
-        return tag.findAll(name="a", attrs={"href": lambda x: x and "intern" in x})
+        return tag.findAll(name="a", attrs={"href": self._file_regex})
 
     def _abs_url_from_link(self, link_tag: Tag) -> str:
         return urljoin(self._url, link_tag.get("href"))

From 88afe64a928fce7108264f386298edbbe60117f5 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 1 Nov 2021 10:43:13 +0100
Subject: [PATCH 334/524] Refactor IPD crawler a bit

---
 PFERD/cli/command_kit_ipd.py   |  2 +-
 PFERD/crawl/kit_ipd_crawler.py | 75 +++++++++++++++++-----------------
 2 files changed, 39 insertions(+), 38 deletions(-)

diff --git a/PFERD/cli/command_kit_ipd.py b/PFERD/cli/command_kit_ipd.py
index 480cc9b..c4c593f 100644
--- a/PFERD/cli/command_kit_ipd.py
+++ b/PFERD/cli/command_kit_ipd.py
@@ -35,7 +35,7 @@ def load(
     log.explain("Creating config for command 'kit-ipd'")
 
     parser["crawl:kit-ipd"] = {}
-    section = parser["crawl:ipd"]
+    section = parser["crawl:kit-ipd"]
     load_crawler(args, section)
 
     section["type"] = "kit-ipd"
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index 1ed5ffe..76145b4 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -3,7 +3,7 @@ import re
 from dataclasses import dataclass
 from pathlib import PurePath
 from re import Pattern
-from typing import List, Set, Union, AnyStr, Optional
+from typing import Awaitable, List, Optional, Set, Union
 from urllib.parse import urljoin
 
 from bs4 import BeautifulSoup, Tag
@@ -27,12 +27,12 @@ class KitIpdCrawlerSection(HttpCrawlerSection):
 
         return target
 
-    def link_regex(self) -> Pattern[AnyStr]:
-        regex = self.s.get("link_regex", "^.*/[^/]*\.(?:pdf|zip|c|java)$")
+    def link_regex(self) -> Pattern[str]:
+        regex = self.s.get("link_regex", r"^.*/[^/]*\.(?:pdf|zip|c|java)$")
         return re.compile(regex)
 
 
-@dataclass
+@dataclass(unsafe_hash=True)
 class KitIpdFile:
     name: str
     url: str
@@ -43,6 +43,14 @@ class KitIpdFolder:
     name: str
     files: List[KitIpdFile]
 
+    def explain(self) -> None:
+        log.explain_topic(f"Folder {self.name!r}")
+        for file in self.files:
+            log.explain(f"File {file.name!r}")
+
+    def __hash__(self) -> int:
+        return self.name.__hash__()
+
 
 class KitIpdCrawler(HttpCrawler):
 
@@ -61,13 +69,15 @@ class KitIpdCrawler(HttpCrawler):
         if not maybe_cl:
             return
 
-        folders: List[KitIpdFolder] = []
+        tasks: List[Awaitable[None]] = []
 
         async with maybe_cl:
-            folder_tags = await self._fetch_folder_tags()
-            folders = [self._extract_folder(tag) for tag in folder_tags]
-
-        tasks = [self._crawl_folder(folder) for folder in folders]
+            for item in await self._fetch_items():
+                if isinstance(item, KitIpdFolder):
+                    tasks.append(self._crawl_folder(item))
+                else:
+                    # Orphan files are placed in the root folder
+                    tasks.append(self._download_file(PurePath("."), item))
 
         await self.gather(tasks)
 
@@ -89,51 +99,42 @@ class KitIpdCrawler(HttpCrawler):
         async with maybe_dl as (bar, sink):
             await self._stream_from_url(file.url, sink, bar)
 
-    async def _fetch_folder_tags(self) -> Set[Tag]:
+    async def _fetch_items(self) -> Set[Union[KitIpdFile, KitIpdFolder]]:
         page = await self.get_page()
         elements: List[Tag] = self._find_file_links(page)
-        folder_tags: Set[Tag] = set()
+        items: Set[Union[KitIpdFile, KitIpdFolder]] = set()
 
         for element in elements:
-            folder_label = self._fetch_folder_label(element)
-            if folder_label is None:
-                folder_tags.add(page)
+            folder_label = self._find_folder_label(element)
+            if folder_label:
+                folder = self._extract_folder(folder_label)
+                if folder not in items:
+                    items.add(folder)
+                    folder.explain()
             else:
-                folder_tags.add(folder_label)
+                file = self._extract_file(element)
+                items.add(file)
+                log.explain_topic(f"Orphan file {file.name!r}")
+                log.explain("Attributing it to root folder")
 
-        return folder_tags
+        return items
 
     def _extract_folder(self, folder_tag: Tag) -> KitIpdFolder:
         files: List[KitIpdFile] = []
-        # if files have found outside a regular table
-        if not folder_tag.name.startswith("h"):
-            name = "."
-            root_links = filter(lambda f: self._fetch_folder_label(f) is None, self._find_file_links(folder_tag))
-            for link in root_links:
-                files.append(self._extract_file(link))
+        name = folder_tag.getText().strip()
 
-        else:
-            name = folder_tag.getText().strip()
-            container: Tag = folder_tag.findNextSibling(name="table")
-            for link in self._find_file_links(container):
-                files.append(self._extract_file(link))
-
-        log.explain_topic(f"Found folder {name!r}")
-        for file in files:
-            log.explain(f"Found file {file.name!r}")
+        container: Tag = folder_tag.findNextSibling(name="table")
+        for link in self._find_file_links(container):
+            files.append(self._extract_file(link))
 
         return KitIpdFolder(name, files)
 
     @staticmethod
-    def _fetch_folder_label(file_link: Tag) -> Optional[Tag]:
+    def _find_folder_label(file_link: Tag) -> Optional[Tag]:
         enclosing_table: Tag = file_link.findParent(name="table")
         if enclosing_table is None:
             return None
-        label: Tag = enclosing_table.findPreviousSibling(name=re.compile("^h[1-6]$"))
-        if label is None:
-            return None
-        else:
-            return label
+        return enclosing_table.findPreviousSibling(name=re.compile("^h[1-6]$"))
 
     def _extract_file(self, link: Tag) -> KitIpdFile:
         url = self._abs_url_from_link(link)

From 13b8c3d9c6c59ab2714e2670506d89c5a2cb6eb6 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 2 Nov 2021 09:30:46 +0100
Subject: [PATCH 335/524] Add regex option to config and CLI parser

---
 CONFIG.md                    | 7 ++++++-
 LICENSE                      | 3 ++-
 PFERD/cli/command_kit_ipd.py | 8 ++++++++
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index 8ccaa50..569780d 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -138,11 +138,16 @@ crawler simulate a slower, network-based crawler.
 
 ### The `kit-ipd` crawler
 
-This crawler crawls a KIT ipd page by url. The root page can be crawled from
+This crawler crawls a KIT-IPD page by url. The root page can be crawled from
 outside the KIT network so you will be informed about any new/deleted files,
 but downloading files requires you to be within. Adding a show delay between
 requests is likely a good idea.
 
+- `target`: URL to a KIT-IPD page
+- `link_regex`: A regex that is matched against the `href` part of links. If it
+  matches, the given link is downloaded as a file. This is used to extract
+  files from KIT-IPD pages. (Default: `^.*/[^/]*\.(?:pdf|zip|c|java)$`)
+
 ### The `kit-ilias-web` crawler
 
 This crawler crawls the KIT ILIAS instance.
diff --git a/LICENSE b/LICENSE
index c096c4a..fe2293f 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,5 @@
-Copyright 2019-2021 Garmelon, I-Al-Istannen, danstooamerican, pavelzw, TheChristophe, Scriptim, thelukasprobst
+Copyright 2019-2021 Garmelon, I-Al-Istannen, danstooamerican, pavelzw,
+                    TheChristophe, Scriptim, thelukasprobst, Toorero
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
diff --git a/PFERD/cli/command_kit_ipd.py b/PFERD/cli/command_kit_ipd.py
index c4c593f..b53e67e 100644
--- a/PFERD/cli/command_kit_ipd.py
+++ b/PFERD/cli/command_kit_ipd.py
@@ -14,6 +14,12 @@ GROUP = SUBPARSER.add_argument_group(
     title="kit ipd crawler arguments",
     description="arguments for the 'kit-ipd' crawler",
 )
+GROUP.add_argument(
+    "--link-regex",
+    type=str,
+    metavar="REGEX",
+    help="href-matching regex to identify downloadable files"
+)
 GROUP.add_argument(
     "target",
     type=str,
@@ -41,6 +47,8 @@ def load(
     section["type"] = "kit-ipd"
     section["target"] = str(args.target)
     section["output_dir"] = str(args.output)
+    if args.link_regex:
+        section["link_regex"] = str(args.link_regex)
 
 
 SUBPARSER.set_defaults(command=load)

From 6289938d7c772660a5d497ce456168186eb8a6fb Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 6 Nov 2021 12:09:51 +0100
Subject: [PATCH 336/524] Do not stop crawling files when encountering a
 CrawlWarning

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index f483754..c3e51ef 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -84,7 +84,7 @@ _VIDEO_ELEMENTS: Set[IliasElementType] = set([
 AWrapped = TypeVar("AWrapped", bound=Callable[..., Awaitable[Optional[Any]]])
 
 
-def _iorepeat(attempts: int, name: str) -> Callable[[AWrapped], AWrapped]:
+def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Callable[[AWrapped], AWrapped]:
     def decorator(f: AWrapped) -> AWrapped:
         async def wrapper(*args: Any, **kwargs: Any) -> Optional[Any]:
             last_exception: Optional[BaseException] = None
@@ -105,7 +105,10 @@ def _iorepeat(attempts: int, name: str) -> Callable[[AWrapped], AWrapped]:
 
             if last_exception:
                 message = f"Error in I/O Operation: {last_exception}"
-                raise CrawlWarning(message) from last_exception
+                if failure_is_error:
+                    raise CrawlError(message) from last_exception
+                else:
+                    raise CrawlWarning(message) from last_exception
             raise CrawlError("Impossible return in ilias _iorepeat")
 
         return wrapper  # type: ignore
@@ -251,6 +254,7 @@ instance's greatest bottleneck.
             return None
         return self._crawl_ilias_page(url, parent, maybe_cl)
 
+    @anoncritical
     async def _crawl_ilias_page(
         self,
         url: str,
@@ -292,10 +296,12 @@ instance's greatest bottleneck.
         # And execute them
         await self.gather(tasks)
 
+    # These decorators only apply *to this method* and *NOT* to the returned
+    # awaitables!
+    # This method does not await the handlers but returns them instead.
+    # This ensures one level is handled at a time and name deduplication
+    # works correctly.
     @anoncritical
-    # Shouldn't happen but we also really don't want to let I/O errors bubble up to anoncritical.
-    # If that happens we will be terminated as anoncritical doesn't tream them as non-critical.
-    @_wrap_io_in_warning("handling ilias element")
     async def _handle_ilias_element(
         self,
         parent_path: PurePath,
@@ -363,6 +369,7 @@ instance's greatest bottleneck.
 
         return self._download_link(element, link_template_maybe, maybe_dl)
 
+    @anoncritical
     @_iorepeat(3, "resolving link")
     async def _download_link(self, element: IliasPageElement, link_template: str, dl: DownloadToken) -> None:
         async with dl as (bar, sink):
@@ -409,6 +416,7 @@ instance's greatest bottleneck.
 
         return self._download_booking(element, link_template_maybe, maybe_dl)
 
+    @anoncritical
     @_iorepeat(3, "resolving booking")
     async def _download_booking(
         self,
@@ -488,6 +496,7 @@ instance's greatest bottleneck.
             log.explain("Missing at least one video, continuing with requests!")
         return False
 
+    @anoncritical
     @_iorepeat(3, "downloading video")
     async def _download_video(
         self,
@@ -534,6 +543,7 @@ instance's greatest bottleneck.
             return None
         return self._download_file(element, maybe_dl)
 
+    @anoncritical
     @_iorepeat(3, "downloading file")
     async def _download_file(self, element: IliasPageElement, dl: DownloadToken) -> None:
         assert dl  # The function is only reached when dl is not None
@@ -589,7 +599,7 @@ instance's greatest bottleneck.
 
     # We repeat this as the login method in shibboleth doesn't handle I/O errors.
     # Shibboleth is quite reliable as well, the repeat is likely not critical here.
-    @_iorepeat(3, "Login")
+    @_iorepeat(3, "Login", failure_is_error=True)
     async def _authenticate(self) -> None:
         await self._shibboleth_login.login(self.session)
 

From 90cb6e989b492bbfe2f242c77aad616b86637052 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 6 Nov 2021 23:20:24 +0100
Subject: [PATCH 337/524] Do not download single videos if cache does not exist

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index c3e51ef..c6115f4 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -514,7 +514,12 @@ instance's greatest bottleneck.
             else:
                 log.explain(f"Using single video mode for {element.name}")
                 stream_element = stream_elements[0]
-                await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
+
+                # We do not have a local cache yet
+                if self._output_dir.resolve(original_path).exists():
+                    log.explain(f"Video for {element.name} existed locally")
+                else:
+                    await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
                 self.report.add_custom_value(str(original_path), [original_path.name])
                 return
 

From a82a0b19c2193c6817ae07361889de8fd392868e Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 7 Nov 2021 21:40:22 +0100
Subject: [PATCH 338/524] Collect crawler warnings/errors and include them in
 the report

---
 PFERD/crawl/crawler.py |  8 ++++++--
 PFERD/pferd.py         |  8 ++++++++
 PFERD/report.py        | 24 +++++++++++++++++++++++-
 3 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index c492ee9..53f43e9 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -47,10 +47,12 @@ def noncritical(f: Wrapped) -> Wrapped:
         try:
             f(*args, **kwargs)
         except (CrawlWarning, OutputDirError, MarkDuplicateError, MarkConflictError) as e:
+            crawler.report.add_warning(str(e))
             log.warn(str(e))
             crawler.error_free = False
-        except:  # noqa: E722 do not use bare 'except'
+        except Exception as e:
             crawler.error_free = False
+            crawler.report.add_error(str(e))
             raise
 
     return wrapper  # type: ignore
@@ -83,8 +85,10 @@ def anoncritical(f: AWrapped) -> AWrapped:
         except (CrawlWarning, OutputDirError, MarkDuplicateError, MarkConflictError) as e:
             log.warn(str(e))
             crawler.error_free = False
-        except:  # noqa: E722 do not use bare 'except'
+            crawler.report.add_warning(str(e))
+        except Exception as e:
             crawler.error_free = False
+            crawler.report.add_error(str(e))
             raise
 
         return None
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index 726ed45..079053b 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -182,5 +182,13 @@ class Pferd:
                 something_changed = True
                 log.report(f"  [bold bright_magenta]Not deleted[/] {fmt_path(path)}")
 
+            for warning in crawler.report.encountered_warnings:
+                something_changed = True
+                log.report(f"  [bold bright_red]Warning[/] {warning}")
+
+            for error in crawler.report.encountered_errors:
+                something_changed = True
+                log.report(f"  [bold bright_red]Error[/] {error}")
+
             if not something_changed:
                 log.report("  Nothing changed")
diff --git a/PFERD/report.py b/PFERD/report.py
index 99a4661..0e0c789 100644
--- a/PFERD/report.py
+++ b/PFERD/report.py
@@ -67,8 +67,14 @@ class Report:
         self.deleted_files: Set[PurePath] = set()
         # Files that should have been deleted by the cleanup but weren't
         self.not_deleted_files: Set[PurePath] = set()
+
+        # Custom crawler-specific data
         self.custom: Dict[str, Any] = dict()
 
+        # Encountered errors and warnings
+        self.encountered_warnings: List[str] = []
+        self.encountered_errors: List[str] = []
+
     @staticmethod
     def _get_list_of_strs(data: Dict[str, Any], key: str) -> List[str]:
         result: Any = data.get(key, [])
@@ -119,6 +125,8 @@ class Report:
         for elem in self._get_list_of_strs(data, "not_deleted"):
             self.not_delete_file(PurePath(elem))
         self.custom = self._get_str_dictionary(data, "custom")
+        self.encountered_errors = self._get_list_of_strs(data, "encountered_errors")
+        self.encountered_warnings = self._get_list_of_strs(data, "encountered_warnings")
 
         return self
 
@@ -135,7 +143,9 @@ class Report:
             "changed": [str(path) for path in sorted(self.changed_files)],
             "deleted": [str(path) for path in sorted(self.deleted_files)],
             "not_deleted": [str(path) for path in sorted(self.not_deleted_files)],
-            "custom": self.custom
+            "custom": self.custom,
+            "encountered_warnings": self.encountered_warnings,
+            "encountered_errors": self.encountered_errors,
         }
 
         with open(path, "w") as f:
@@ -214,3 +224,15 @@ class Report:
         Retrieves a custom value for the given key.
         """
         return self.custom.get(key)
+
+    def add_error(self, error: str) -> None:
+        """
+        Adds an error to this report's error list.
+        """
+        self.encountered_errors.append(error)
+
+    def add_warning(self, warning: str) -> None:
+        """
+        Adds a warning to this report's warning list.
+        """
+        self.encountered_warnings.append(warning)

From eac2e341612461987d37314110c3f4c7640499f3 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Fri, 7 Jan 2022 23:32:31 +0100
Subject: [PATCH 339/524] Fix is_logged_in for ILIAS 7

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index c6115f4..c5b2953 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -611,9 +611,10 @@ instance's greatest bottleneck.
     @staticmethod
     def _is_logged_in(soup: BeautifulSoup) -> bool:
         # Normal ILIAS pages
-        userlog = soup.find("li", {"id": "userlog"})
-        if userlog is not None:
-            return True
+        mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")
+        if mainbar is not None:
+            login_button = mainbar.find("button", attrs={"data-action": lambda x: x and "login.php" in x})
+            return not login_button
         # Video listing embeds do not have complete ILIAS html. Try to match them by
         # their video listing table
         video_table = soup.find(

From a99356f2a2d403ffb40f47bb159707d73e55a0e3 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 8 Jan 2022 00:27:34 +0100
Subject: [PATCH 340/524] Fix video stream extraction

---
 PFERD/crawl/ilias/kit_ilias_html.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index d8c347d..ece88c5 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -133,7 +133,7 @@ class IliasPage:
 
         # parse it
         json_object = json.loads(json_str)
-        streams = [stream for stream in json_object["streams"] if stream["type"] == "video"]
+        streams = [stream for stream in json_object["streams"]]
 
         # and just fetch the lone video url!
         if len(streams) == 1:

From 462d993fbc00602b4952d675fa4c77e5372c27fa Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 8 Jan 2022 00:27:48 +0100
Subject: [PATCH 341/524] Fix local video path cache (hopefully)

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index c5b2953..5d44566 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -489,7 +489,10 @@ instance's greatest bottleneck.
             log.explain_topic(f"Checking local cache for video {video_path.name}")
             all_found_locally = True
             for video in contained_videos:
-                all_found_locally = all_found_locally and self._output_dir.resolve(video).exists()
+                transformed_path = self._transformer.transform(video)
+                if transformed_path:
+                    exists_locally = self._output_dir.resolve(transformed_path).exists()
+                    all_found_locally = all_found_locally and exists_locally
             if all_found_locally:
                 log.explain("Found all videos locally, skipping enumeration request")
                 return True
@@ -515,8 +518,12 @@ instance's greatest bottleneck.
                 log.explain(f"Using single video mode for {element.name}")
                 stream_element = stream_elements[0]
 
+                transformed_path = self._transformer.transform(original_path)
+                if not transformed_path:
+                    raise CrawlError(f"Download returned a path but transform did not for {original_path}")
+
                 # We do not have a local cache yet
-                if self._output_dir.resolve(original_path).exists():
+                if self._output_dir.resolve(transformed_path).exists():
                     log.explain(f"Video for {element.name} existed locally")
                 else:
                     await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
@@ -526,8 +533,8 @@ instance's greatest bottleneck.
         contained_video_paths: List[str] = []
 
         for stream_element in stream_elements:
-            contained_video_paths.append(stream_element.name)
             video_path = original_path.parent / stream_element.name
+            contained_video_paths.append(str(video_path))
 
             maybe_dl = await self.download(video_path, mtime=element.mtime, redownload=Redownload.NEVER)
             if not maybe_dl:

From 6f3cfd43969cdac557c4f2d38bd2b4f0ffd40721 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 8 Jan 2022 16:58:15 +0100
Subject: [PATCH 342/524] Fix personal desktop crawling

---
 PFERD/crawl/ilias/kit_ilias_html.py        | 61 ++++++++++++++++++++--
 PFERD/crawl/ilias/kit_ilias_web_crawler.py |  9 +++-
 2 files changed, 66 insertions(+), 4 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index ece88c5..9c8ab95 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -39,7 +39,12 @@ class IliasPageElement:
     description: Optional[str] = None
 
     def id(self) -> str:
-        regexes = [r"eid=(?P<id>[0-9a-z\-]+)", r"file_(?P<id>\d+)", r"ref_id=(?P<id>\d+)"]
+        regexes = [
+            r"eid=(?P<id>[0-9a-z\-]+)",
+            r"file_(?P<id>\d+)",
+            r"ref_id=(?P<id>\d+)",
+            r"target=[a-z]+_(?P<id>\d+)"
+        ]
 
         for regex in regexes:
             if match := re.search(regex, self.url):
@@ -71,6 +76,9 @@ class IliasPage:
         if self._is_exercise_file():
             log.explain("Page is an exercise, searching for elements")
             return self._find_exercise_entries()
+        if self._is_personal_desktop():
+            log.explain("Page is the personal desktop")
+            return self._find_personal_desktop_entries()
         log.explain("Page is a normal folder, searching for elements")
         return self._find_normal_entries()
 
@@ -115,6 +123,9 @@ class IliasPage:
 
         return False
 
+    def _is_personal_desktop(self) -> bool:
+        return self._soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x})
+
     def _player_to_video(self) -> List[IliasPageElement]:
         # Fetch the actual video page. This is a small wrapper page initializing a javscript
         # player. Sadly we can not execute that JS. The actual video stream url is nowhere
@@ -149,6 +160,26 @@ class IliasPage:
 
         return items
 
+    def _find_personal_desktop_entries(self) -> List[IliasPageElement]:
+        items: List[IliasPageElement] = []
+
+        titles: List[Tag] = self._soup.select(".il-item-title")
+        for title in titles:
+            link = title.find("a")
+            name = _sanitize_path_name(link.text.strip())
+            url = self._abs_url_from_link(link)
+
+            type = self._find_type_from_link(name, link, url)
+            if not type:
+                _unexpected_html_warning()
+                log.warn_contd(f"Could not extract type for {link}")
+                continue
+
+            log.explain(f"Found {name!r}")
+            items.append(IliasPageElement(type, url, name))
+
+        return items
+
     def _find_video_entries(self) -> List[IliasPageElement]:
         # ILIAS has three stages for video pages
         # 1. The initial dummy page without any videos. This page contains the link to the listing
@@ -551,9 +582,30 @@ class IliasPage:
         if "target=file_" in parsed_url.query:
             return IliasElementType.FILE
 
+        if "target=grp_" in parsed_url.query:
+            return IliasElementType.FOLDER
+
+        if "target=crs_" in parsed_url.query:
+            return IliasElementType.FOLDER
+
+        if "baseClass=ilExerciseHandlerGUI" in parsed_url.query:
+            return IliasElementType.EXERCISE
+
+        if "baseClass=ilLinkResourceHandlerGUI" in parsed_url.query and "calldirectlink" in parsed_url.query:
+            return IliasElementType.LINK
+
+        if "cmd=showThreads" in parsed_url.query or "target=frm_" in parsed_url.query:
+            return IliasElementType.FORUM
+
+        if "cmdClass=ilobjtestgui" in parsed_url.query:
+            return IliasElementType.TEST
+
+        # Booking and Meeting can not be detected based on the link. They do have a ref_id though, so
+        # try to guess it from the image.
+
         # Everything with a ref_id can *probably* be opened to reveal nested things
         # video groups, directories, exercises, etc
-        if "ref_id=" in parsed_url.query:
+        if "ref_id=" in parsed_url.query or "goto.php" in parsed_url.path:
             return IliasPage._find_type_from_folder_like(link_element, url)
 
         _unexpected_html_warning()
@@ -574,7 +626,7 @@ class IliasPage:
         # We look for the outer div of our inner link, to find information around it
         # (mostly the icon)
         for parent in link_element.parents:
-            if "ilContainerListItemOuter" in parent["class"]:
+            if "ilContainerListItemOuter" in parent["class"] or "il-std-item" in parent["class"]:
                 found_parent = parent
                 break
 
@@ -586,6 +638,9 @@ class IliasPage:
         # Find the small descriptive icon to figure out the type
         img_tag: Optional[Tag] = found_parent.select_one("img.ilListItemIcon")
 
+        if img_tag is None:
+            img_tag = found_parent.select_one("img.icon")
+
         if img_tag is None:
             _unexpected_html_warning()
             log.warn_contd(f"Tried to figure out element type, but did not find an image for {url}")
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 5d44566..99d6cf6 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -203,7 +203,9 @@ instance's greatest bottleneck.
         await self._crawl_url(root_url, expected_id=course_id)
 
     async def _crawl_desktop(self) -> None:
-        await self._crawl_url(self._base_url)
+        appendix = r"ILIAS\PersonalDesktop\PDMainBarProvider|mm_pd_sel_items"
+        appendix = appendix.encode("ASCII").hex()
+        await self._crawl_url(self._base_url + "/gs_content.php?item=" + appendix)
 
     async def _crawl_url(self, url: str, expected_id: Optional[int] = None) -> None:
         maybe_cl = await self.crawl(PurePath("."))
@@ -622,6 +624,11 @@ instance's greatest bottleneck.
         if mainbar is not None:
             login_button = mainbar.find("button", attrs={"data-action": lambda x: x and "login.php" in x})
             return not login_button
+
+        # Personal Desktop
+        if soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x}):
+            return True
+
         # Video listing embeds do not have complete ILIAS html. Try to match them by
         # their video listing table
         video_table = soup.find(

From ced8b9a2d032e7e4956b331d4408cb4b0829c780 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 8 Jan 2022 16:58:30 +0100
Subject: [PATCH 343/524] Fix some accordions

---
 PFERD/crawl/ilias/kit_ilias_html.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 9c8ab95..0a81222 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -428,7 +428,10 @@ class IliasPage:
                     continue
                 prev: Tag = parent.findPreviousSibling("div")
                 if "ilContainerBlockHeader" in prev.get("class"):
-                    found_titles.append(prev.find("h3").getText().strip())
+                    if prev.find("h3"):
+                        found_titles.append(prev.find("h3").getText().strip())
+                    else:
+                        found_titles.append(prev.find("h2").getText().strip())
 
             # And this for real accordions
             if "il_VAccordionContentDef" in parent.get("class"):

From 5f527bc697b58512520f4d8ff93b856ff3a345b1 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 8 Jan 2022 17:14:40 +0100
Subject: [PATCH 344/524] Remove Python 3.9 Pattern typehints

---
 PFERD/crawl/ilias/kit_ilias_html.py | 2 +-
 PFERD/crawl/kit_ipd_crawler.py      | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 0a81222..78bedbf 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -132,7 +132,7 @@ class IliasPage:
         # on the page, but defined in a JS object inside a script tag, passed to the player
         # library.
         # We do the impossible and RegEx the stream JSON object out of the page's HTML source
-        regex: re.Pattern[str] = re.compile(
+        regex = re.compile(
             r"({\"streams\"[\s\S]+?),\s*{\"paella_config_file", re.IGNORECASE
         )
         json_match = regex.search(str(self._soup))
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index 76145b4..1a5314b 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -2,8 +2,7 @@ import os
 import re
 from dataclasses import dataclass
 from pathlib import PurePath
-from re import Pattern
-from typing import Awaitable, List, Optional, Set, Union
+from typing import Awaitable, List, Optional, Pattern, Set, Union
 from urllib.parse import urljoin
 
 from bs4 import BeautifulSoup, Tag

From e32c1f000fb9abcc47f8dc127b4d674acfa1662c Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 8 Jan 2022 18:05:48 +0100
Subject: [PATCH 345/524] Fix mtime for single streams

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 99d6cf6..c4e70c0 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -461,7 +461,7 @@ instance's greatest bottleneck.
         # is re-used if the video consists of a single stream. In that case the
         # file name is used and *not* the stream name the ilias html parser reported
         # to ensure backwards compatibility.
-        maybe_dl = await self.download(element_path, redownload=Redownload.ALWAYS)
+        maybe_dl = await self.download(element_path, mtime=element.mtime, redownload=Redownload.ALWAYS)
 
         # If we do not want to crawl it (user filter) or we have every file
         # from the cached mapping already, we can ignore this and bail

From eb4de8ae0cc37e38e9fa801f729e68d1f71a0bb0 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 8 Jan 2022 18:14:43 +0100
Subject: [PATCH 346/524] Ignore 1970 dates as windows crashes when calling
 .timestamp()

---
 PFERD/output_dir.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 0fb9911..e612267 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -231,7 +231,8 @@ class OutputDirectory:
         stat = local_path.stat()
 
         remote_newer = None
-        if mtime := heuristics.mtime:
+        if heuristics.mtime and heuristics.mtime.year > 1970:
+            mtime = heuristics.mtime
             remote_newer = mtime.timestamp() > stat.st_mtime
             if remote_newer:
                 log.explain("Remote file seems to be newer")

From 43c5453e100aedede844a242721d2990845c2c26 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 8 Jan 2022 19:59:42 +0100
Subject: [PATCH 347/524] Correctly crawl files on desktop

The files on the desktop do not include a download link, so we need to
rewrite it.
---
 PFERD/crawl/ilias/kit_ilias_html.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 78bedbf..cee0555 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -176,6 +176,11 @@ class IliasPage:
                 continue
 
             log.explain(f"Found {name!r}")
+
+            if type == IliasElementType.FILE and "_download" not in url:
+                url = re.sub(r"(target=file_\d+)", r"\1_download", url)
+                log.explain("Rewired file URL to include download part")
+
             items.append(IliasPageElement(type, url, name))
 
         return items

From 10d9d7452809aafe4f406f894944a078072f16bf Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 8 Jan 2022 20:28:30 +0100
Subject: [PATCH 348/524] Bail out when crawling recursive courses

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index c4e70c0..8f78e7a 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -182,6 +182,7 @@ instance's greatest bottleneck.
         self._link_file_redirect_delay = section.link_redirect_delay()
         self._links = section.links()
         self._videos = section.videos()
+        self._visited_urls: Set[str] = set()
 
     async def _run(self) -> None:
         if isinstance(self._target, int):
@@ -309,6 +310,12 @@ instance's greatest bottleneck.
         parent_path: PurePath,
         element: IliasPageElement,
     ) -> Optional[Awaitable[None]]:
+        if element.url in self._visited_urls:
+            raise CrawlWarning(
+                f"Found second path to element {element.name!r} at {element.url!r}. Aborting subpath"
+            )
+        self._visited_urls.add(element.url)
+
         element_path = PurePath(parent_path, element.name)
 
         if element.type in _VIDEO_ELEMENTS:

From d30f25ee9788d3363544ba9779cabf157dba3b98 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 8 Jan 2022 20:28:45 +0100
Subject: [PATCH 349/524] Detect shib login page as login page

And do not assume we are logged in...
---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 8f78e7a..c3b2342 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -630,7 +630,8 @@ instance's greatest bottleneck.
         mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")
         if mainbar is not None:
             login_button = mainbar.find("button", attrs={"data-action": lambda x: x and "login.php" in x})
-            return not login_button
+            shib_login = soup.find(id="button_shib_login")
+            return not login_button and not shib_login
 
         # Personal Desktop
         if soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x}):

From 4ee919625da8d3d04cbb889e24d05b1c09436fe8 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 8 Jan 2022 20:47:35 +0100
Subject: [PATCH 350/524] Add rudimentary support for content pages

---
 PFERD/crawl/ilias/kit_ilias_html.py | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index cee0555..754af16 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -77,8 +77,11 @@ class IliasPage:
             log.explain("Page is an exercise, searching for elements")
             return self._find_exercise_entries()
         if self._is_personal_desktop():
-            log.explain("Page is the personal desktop")
+            log.explain("Page is the personal desktop, searching for elements")
             return self._find_personal_desktop_entries()
+        if self._is_content_page():
+            log.explain("Page is a content page, searching for elements")
+            return self._find_copa_entries()
         log.explain("Page is a normal folder, searching for elements")
         return self._find_normal_entries()
 
@@ -126,6 +129,12 @@ class IliasPage:
     def _is_personal_desktop(self) -> bool:
         return self._soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x})
 
+    def _is_content_page(self) -> bool:
+        link = self._soup.find(id="current_perma_link")
+        if not link:
+            return False
+        return "target=copa_" in link.get("value")
+
     def _player_to_video(self) -> List[IliasPageElement]:
         # Fetch the actual video page. This is a small wrapper page initializing a javscript
         # player. Sadly we can not execute that JS. The actual video stream url is nowhere
@@ -185,6 +194,23 @@ class IliasPage:
 
         return items
 
+    def _find_copa_entries(self) -> List[IliasPageElement]:
+        items: List[IliasPageElement] = []
+        links: List[Tag] = self._soup.findAll(class_="ilc_flist_a_FileListItemLink")
+
+        for link in links:
+            url = self._abs_url_from_link(link)
+            name = _sanitize_path_name(link.getText().strip().replace("\t", ""))
+
+            if "file_id" not in url:
+                _unexpected_html_warning()
+                log.warn_contd(f"Found unknown content page item {name!r} with url {url!r}")
+                continue
+
+            items.append(IliasPageElement(IliasElementType.FILE, url, name))
+
+        return items
+
     def _find_video_entries(self) -> List[IliasPageElement]:
         # ILIAS has three stages for video pages
         # 1. The initial dummy page without any videos. This page contains the link to the listing

From 4bf0c972e6e37afc7f9688104082189f5f78d390 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 9 Jan 2022 11:47:59 +0100
Subject: [PATCH 351/524] Update types for rich 11

---
 PFERD/logging.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/PFERD/logging.py b/PFERD/logging.py
index 32e5268..e2d64fc 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -5,7 +5,7 @@ from contextlib import asynccontextmanager, contextmanager
 # TODO In Python 3.9 and above, ContextManager is deprecated
 from typing import AsyncIterator, ContextManager, Iterator, List, Optional
 
-from rich.console import Console, RenderGroup
+from rich.console import Console, Group
 from rich.live import Live
 from rich.markup import escape
 from rich.panel import Panel
@@ -68,7 +68,7 @@ class Log:
         if self._download_progress.task_ids:
             elements.append(self._download_progress)
 
-        group = RenderGroup(*elements)  # type: ignore
+        group = Group(*elements)  # type: ignore
         self._live.update(group)
 
     @contextmanager

From e9d2d0503001728f6c1f313982d8843d83405e3d Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 9 Jan 2022 11:39:42 +0100
Subject: [PATCH 352/524] Update changelog

---
 CHANGELOG.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index faa2507..1b392c1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -26,10 +26,16 @@ ambiguous situations.
 - A KIT IPD crawler
 - Support for ILIAS cards
 - Support for multi-stream videos
+- Support for ILIAS 7
 
 ### Removed
 - [Interpolation](https://docs.python.org/3/library/configparser.html#interpolation-of-values) in config file
 
+### Fixed
+- Crawling of recursive courses
+- Crawling files directly placed on the personal desktop
+- Ignore timestamps at the unix epoch as they crash on windows
+
 ## 3.2.0 - 2021-08-04
 
 ### Added

From e467b38d739347d62cbb122d9f4752abe823b423 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 9 Jan 2022 18:23:00 +0100
Subject: [PATCH 353/524] Only reject 1970 timestamps on windows

---
 PFERD/output_dir.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index e612267..441717b 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -231,7 +231,9 @@ class OutputDirectory:
         stat = local_path.stat()
 
         remote_newer = None
-        if heuristics.mtime and heuristics.mtime.year > 1970:
+
+        # Python on Windows crashes when faced with timestamps around the unix epoch
+        if heuristics.mtime and (os.name != "nt" or heuristics.mtime.year > 1970):
             mtime = heuristics.mtime
             remote_newer = mtime.timestamp() > stat.st_mtime
             if remote_newer:

From 33453ede2d63b15bcca2ce541af2299440bfa8ff Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 9 Jan 2022 18:31:42 +0100
Subject: [PATCH 354/524] Update dependency versions in setup.py

---
 setup.cfg | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/setup.cfg b/setup.cfg
index 5758282..059798a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -6,11 +6,11 @@ version = attr: PFERD.version.VERSION
 packages = find:
 python_requires = >=3.8
 install_requires =
-  aiohttp>=3.7.4.post0
-  beautifulsoup4>=4.9.3
-  rich>=10.1.0
-  keyring>=23.0.1
-  certifi>=2020.12.5
+  aiohttp>=3.8.1
+  beautifulsoup4>=4.10.0
+  rich>=11.0.0
+  keyring>=23.5.0
+  certifi>=2021.10.8
 
 [options.entry_points]
 console_scripts =

From 9618aae83bf10b8e517c53a53c47d14dd707c707 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 9 Jan 2022 18:32:58 +0100
Subject: [PATCH 355/524] Add content pages to changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1b392c1..6e4c7e9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ ambiguous situations.
 ### Added
 - A KIT IPD crawler
 - Support for ILIAS cards
+- (Rudimentary) support for content pages
 - Support for multi-stream videos
 - Support for ILIAS 7
 

From 0045124a4e2851d4d1d84bc7c2b68c75f49d5375 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 9 Jan 2022 21:09:09 +0100
Subject: [PATCH 356/524] Bump version to 3.3.0

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6e4c7e9..132351b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.3.0 - 2022-01-09
+
 ### Added
 - A KIT IPD crawler
 - Support for ILIAS cards
diff --git a/PFERD/version.py b/PFERD/version.py
index b8efadd..ca58f3a 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.2.0"
+VERSION = "3.3.0"

From 57ec51e95a238960d1832ba0ad85b2ff6ec1de3b Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Fri, 14 Jan 2022 20:15:19 +0100
Subject: [PATCH 357/524] Fix login after shib url parser change

---
 CHANGELOG.md                               |  4 +++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 39 +++++++++++++++++++---
 2 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 132351b..41ee3d5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,10 @@ ambiguous situations.
 
 ## Unreleased
 
+### Fixed
+- Shibboleth login fixed. It was broken due to URL parser changes and really
+  *unfortunate* behaviour by aiohttp.
+
 ## 3.3.0 - 2022-01-09
 
 ### Added
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index c3b2342..c26ce8b 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -4,6 +4,7 @@ from pathlib import PurePath
 from typing import Any, Awaitable, Callable, Dict, List, Optional, Set, TypeVar, Union, cast
 
 import aiohttp
+import yarl
 from aiohttp import hdrs
 from bs4 import BeautifulSoup, Tag
 
@@ -674,14 +675,14 @@ class KitShibbolethLogin:
 
         # Equivalent: Click on "Mit KIT-Account anmelden" button in
         # https://ilias.studium.kit.edu/login.php
-        url = "https://ilias.studium.kit.edu/Shibboleth.sso/Login"
+        url = "https://ilias.studium.kit.edu/shib_login.php"
         data = {
             "sendLogin": "1",
             "idp_selection": "https://idp.scc.kit.edu/idp/shibboleth",
-            "target": "/shib_login.php",
-            "home_organization_selection": "Mit KIT-Account anmelden",
+            "il_target": "",
+            "home_organization_selection": "Weiter",
         }
-        soup: BeautifulSoup = await _post(sess, url, data)
+        soup: BeautifulSoup = await _shib_post(sess, url, data)
 
         # Attempt to login using credentials, if necessary
         while not self._login_successful(soup):
@@ -761,3 +762,33 @@ class KitShibbolethLogin:
 async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup:
     async with session.post(url, data=data) as response:
         return soupify(await response.read())
+
+
+async def _shib_post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup:
+    """
+    aiohttp unescapes '/' and ':' in URL query parameters which is not RFC compliant and rejected
+    by Shibboleth. Thanks a lot. So now we unroll the requests manually, parse location headers and
+    build encoded URL objects ourselfs... Who thought mangling location header was a good idea??
+    """
+    async with session.post(url, data=data, allow_redirects=False) as response:
+        location = response.headers.get("location")
+        if not location:
+            raise CrawlWarning(f"Login failed, no location header present at {url}")
+        correct_url = yarl.URL(location, encoded=True)
+
+        async with session.get(correct_url, allow_redirects=False) as response:
+            as_yarl = yarl.URL(response.url)
+            location = response.headers.get("location")
+
+            if not location or not as_yarl.host:
+                raise CrawlWarning(f"Login failed, no location header present at {correct_url}")
+
+            correct_url = yarl.URL.build(
+                scheme=as_yarl.scheme,
+                host=as_yarl.host,
+                path=location,
+                encoded=True
+            )
+
+            async with session.get(correct_url, allow_redirects=False) as response:
+                return soupify(await response.read())

From f47e7374d23b71396b511ee7b57f59d46c34e00d Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Fri, 14 Jan 2022 22:01:45 +0100
Subject: [PATCH 358/524] Use fixed windows path for video cache

---
 CHANGELOG.md                               | 4 +++-
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 9 +++++++--
 PFERD/deduplicator.py                      | 6 ++++++
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 41ee3d5..7f35a90 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,8 +23,10 @@ ambiguous situations.
 ## Unreleased
 
 ### Fixed
-- Shibboleth login fixed. It was broken due to URL parser changes and really
+- Shibboleth login. It was broken due to URL parser changes and really
   *unfortunate* behaviour by aiohttp.
+- local video cache on windows if the path was changed to accomodate windows
+  file system limitations (e.g. replace `:`)
 
 ## 3.3.0 - 2022-01-09
 
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index c26ce8b..b197b6b 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -499,7 +499,7 @@ instance's greatest bottleneck.
             log.explain_topic(f"Checking local cache for video {video_path.name}")
             all_found_locally = True
             for video in contained_videos:
-                transformed_path = self._transformer.transform(video)
+                transformed_path = self._to_local_video_path(video)
                 if transformed_path:
                     exists_locally = self._output_dir.resolve(transformed_path).exists()
                     all_found_locally = all_found_locally and exists_locally
@@ -509,6 +509,11 @@ instance's greatest bottleneck.
             log.explain("Missing at least one video, continuing with requests!")
         return False
 
+    def _to_local_video_path(self, path: PurePath) -> Optional[PurePath]:
+        if transformed := self._transformer.transform(path):
+            return self._deduplicator.fixup_path(transformed)
+        return None
+
     @anoncritical
     @_iorepeat(3, "downloading video")
     async def _download_video(
@@ -528,7 +533,7 @@ instance's greatest bottleneck.
                 log.explain(f"Using single video mode for {element.name}")
                 stream_element = stream_elements[0]
 
-                transformed_path = self._transformer.transform(original_path)
+                transformed_path = self._to_local_video_path(original_path)
                 if not transformed_path:
                     raise CrawlError(f"Download returned a path but transform did not for {original_path}")
 
diff --git a/PFERD/deduplicator.py b/PFERD/deduplicator.py
index ef62dcb..7777f28 100644
--- a/PFERD/deduplicator.py
+++ b/PFERD/deduplicator.py
@@ -56,6 +56,12 @@ class Deduplicator:
             log.explain(f"Changed path to {fmt_path(new_path)} for windows compatibility")
         return new_path
 
+    def fixup_path(self, path: PurePath) -> PurePath:
+        """Fixes up the path for windows, if enabled. Returns the path unchanged otherwise."""
+        if self._windows_paths:
+            return self._fixup_for_windows(path)
+        return path
+
     def mark(self, path: PurePath) -> PurePath:
         if self._windows_paths:
             path = self._fixup_for_windows(path)

From 4f022e2d192552ddef22b169044f2692bc4e1563 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 15 Jan 2022 15:06:02 +0100
Subject: [PATCH 359/524] Reword changelog

---
 CHANGELOG.md                               | 6 ++----
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 2 +-
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7f35a90..76cf836 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,10 +23,8 @@ ambiguous situations.
 ## Unreleased
 
 ### Fixed
-- Shibboleth login. It was broken due to URL parser changes and really
-  *unfortunate* behaviour by aiohttp.
-- local video cache on windows if the path was changed to accomodate windows
-  file system limitations (e.g. replace `:`)
+- ILIAS login
+- Local video cache if `windows_paths` is enabled
 
 ## 3.3.0 - 2022-01-09
 
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index b197b6b..a3e37a9 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -773,7 +773,7 @@ async def _shib_post(session: aiohttp.ClientSession, url: str, data: Any) -> Bea
     """
     aiohttp unescapes '/' and ':' in URL query parameters which is not RFC compliant and rejected
     by Shibboleth. Thanks a lot. So now we unroll the requests manually, parse location headers and
-    build encoded URL objects ourselfs... Who thought mangling location header was a good idea??
+    build encoded URL objects ourselves... Who thought mangling location header was a good idea??
     """
     async with session.post(url, data=data, allow_redirects=False) as response:
         location = response.headers.get("location")

From 86947e4874f0853444e38de0fac4d2ddab5ae41e Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 15 Jan 2022 15:11:22 +0100
Subject: [PATCH 360/524] Bump version to 3.3.1

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 76cf836..d5f9dc6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.3.1 - 2022-01-15
+
 ### Fixed
 - ILIAS login
 - Local video cache if `windows_paths` is enabled
diff --git a/PFERD/version.py b/PFERD/version.py
index ca58f3a..37e91f3 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.3.0"
+VERSION = "3.3.1"

From 7872fe5221c4c8b95b59ffe54f879c1c39e736f3 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 18 Jan 2022 22:32:43 +0100
Subject: [PATCH 361/524] Fix tables with more columns than expected

---
 PFERD/crawl/ilias/kit_ilias_html.py | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 754af16..94b2e4b 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -280,11 +280,22 @@ class IliasPage:
 
     def _listed_video_to_element(self, link: Tag) -> IliasPageElement:
         # The link is part of a table with multiple columns, describing metadata.
-        # 6th child (1 indexed) is the modification time string
-        modification_string = link.parent.parent.parent.select_one(
-            "td.std:nth-child(6)"
-        ).getText().strip()
-        modification_time = datetime.strptime(modification_string, "%d.%m.%Y - %H:%M")
+        # 6th or 7th child (1 indexed) is the modification time string. Try to find it
+        # by parsing backwards from the end and finding something that looks like a date
+        modification_time = None
+        row: Tag = link.parent.parent.parent
+        column_count = len(row.select("td.std"))
+        for index in range(column_count, 0, -1):
+            modification_string = link.parent.parent.parent.select_one(
+                f"td.std:nth-child({index})"
+            ).getText().strip()
+            if re.search(r"\d+\.\d+.\d+ - \d+:\d+", modification_string):
+                modification_time = datetime.strptime(modification_string, "%d.%m.%Y - %H:%M")
+                break
+
+        if modification_time is None:
+            log.warn(f"Could not determine upload time for {link}")
+            modification_time = datetime.now()
 
         title = link.parent.parent.parent.select_one("td.std:nth-child(3)").getText().strip()
         title += ".mp4"

From 86e2e226dcefb98232410cc2289d11a664076adc Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 3 Apr 2022 11:32:38 +0200
Subject: [PATCH 362/524] Notify user when shibboleth presents new entitlements

---
 CHANGELOG.md                               | 2 ++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 6 ++++++
 2 files changed, 8 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d5f9dc6..4e11224 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,8 @@ ambiguous situations.
 ### Fixed
 - ILIAS login
 - Local video cache if `windows_paths` is enabled
+- Report when Shibboleth reviews entitlements
+- Support for video listings with more columns
 
 ## 3.3.0 - 2022-01-09
 
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index a3e37a9..2a5fc87 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -710,6 +710,12 @@ class KitShibbolethLogin:
             }
             soup = await _post(sess, url, data)
 
+            if soup.find(id="attributeRelease"):
+                raise CrawlError(
+                    "ILIAS Shibboleth entitlements changed! "
+                    "Please log in once in your browser and review them"
+                )
+
             if self._tfa_required(soup):
                 soup = await self._authenticate_tfa(sess, soup)
 

From da72863b471c048768a0d8234ba02298b1f9e4c1 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 3 Apr 2022 13:19:08 +0200
Subject: [PATCH 363/524] Placate newer mypy

---
 PFERD/logging.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/logging.py b/PFERD/logging.py
index e2d64fc..e833716 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -68,7 +68,7 @@ class Log:
         if self._download_progress.task_ids:
             elements.append(self._download_progress)
 
-        group = Group(*elements)  # type: ignore
+        group = Group(*elements)
         self._live.update(group)
 
     @contextmanager

From a2831fbea2e8758686677c44645fdd6f3cbc40fa Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 27 Apr 2022 13:55:24 +0200
Subject: [PATCH 364/524] Fix shib authentication

Authentication failed previously if the shib session was still valid.
If Shibboleth gets a request and the session is still valid, it directly
responds without a second redirect.
---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 2a5fc87..571e4d7 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -784,15 +784,19 @@ async def _shib_post(session: aiohttp.ClientSession, url: str, data: Any) -> Bea
     async with session.post(url, data=data, allow_redirects=False) as response:
         location = response.headers.get("location")
         if not location:
-            raise CrawlWarning(f"Login failed, no location header present at {url}")
+            raise CrawlWarning(f"Login failed (1), no location header present at {url}")
         correct_url = yarl.URL(location, encoded=True)
 
         async with session.get(correct_url, allow_redirects=False) as response:
-            as_yarl = yarl.URL(response.url)
             location = response.headers.get("location")
+            # If shib still still has a valid session, it will directly respond to the request
+            if location is None:
+                return soupify(await response.read())
 
+            as_yarl = yarl.URL(response.url)
+            # Probably not needed anymore, but might catch a few weird situations with a nicer message
             if not location or not as_yarl.host:
-                raise CrawlWarning(f"Login failed, no location header present at {correct_url}")
+                raise CrawlWarning(f"Login failed (2), no location header present at {correct_url}")
 
             correct_url = yarl.URL.build(
                 scheme=as_yarl.scheme,

From f17b9b68f4cdc397b029361260d35aad7e778308 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 27 Apr 2022 14:01:40 +0200
Subject: [PATCH 365/524] Add shibboleth authentication fix to changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4e11224..b3da789 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ ambiguous situations.
 - Local video cache if `windows_paths` is enabled
 - Report when Shibboleth reviews entitlements
 - Support for video listings with more columns
+- Authentication when the shib session is still valid
 
 ## 3.3.0 - 2022-01-09
 

From 07a21f80a63dfd4f47dae4dadc8e515334a9891d Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 27 Apr 2022 21:15:33 +0200
Subject: [PATCH 366/524] Link to unofficial packages

---
 CHANGELOG.md | 3 +++
 README.md    | 8 ++++++++
 2 files changed, 11 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b3da789..c64b69a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Added
+- Links to unofficial packages and repology in the readme
+
 ## 3.3.1 - 2022-01-15
 
 ### Fixed
diff --git a/README.md b/README.md
index 836147f..b8b2551 100644
--- a/README.md
+++ b/README.md
@@ -26,6 +26,14 @@ $ pip install --upgrade git+https://github.com/Garmelon/PFERD@latest
 
 The use of [venv](https://docs.python.org/3/library/venv.html) is recommended.
 
+### With package managers
+
+Unofficial packages are available for:
+- [AUR](https://aur.archlinux.org/packages/pferd)
+- [nixpkgs](https://github.com/NixOS/nixpkgs/blob/master/pkgs/tools/misc/pferd/default.nix)
+
+See also PFERD's [repology page](https://repology.org/project/pferd/versions).
+
 ## Basic usage
 
 PFERD can be run directly from the command line with no config file. Run `pferd

From ba3d299c05bae299a3da5c378e9c5f311e78f62f Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 27 Apr 2022 21:23:55 +0200
Subject: [PATCH 367/524] Fix changelog

---
 CHANGELOG.md | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c64b69a..c5480f2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,16 +23,18 @@ ambiguous situations.
 ## Unreleased
 
 ### Added
+- Message when Shibboleth entitlements need to be manually reviewed
+- Support for video listings with more columns
 - Links to unofficial packages and repology in the readme
 
+### Fixed
+- Crash during authentication when the Shibboleth session is still valid
+
 ## 3.3.1 - 2022-01-15
 
 ### Fixed
 - ILIAS login
 - Local video cache if `windows_paths` is enabled
-- Report when Shibboleth reviews entitlements
-- Support for video listings with more columns
-- Authentication when the shib session is still valid
 
 ## 3.3.0 - 2022-01-09
 

From a99ddaa0cc28e04edfc95d541f0b1f6ca885965c Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 27 Apr 2022 21:47:51 +0200
Subject: [PATCH 368/524] Read and write config in UTF-8

---
 PFERD/config.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/PFERD/config.py b/PFERD/config.py
index 0ea7abc..5635573 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -120,7 +120,7 @@ class Config:
         # Using config.read_file instead of config.read because config.read
         # would just ignore a missing file and carry on.
         try:
-            with open(path) as f:
+            with open(path, encoding="utf-8") as f:
                 parser.read_file(f, source=str(path))
         except FileNotFoundError:
             raise ConfigLoadError(path, "File does not exist")
@@ -154,12 +154,12 @@ class Config:
             try:
                 # x = open for exclusive creation, failing if the file already
                 # exists
-                with open(path, "x") as f:
+                with open(path, "x", encoding="utf-8") as f:
                     self._parser.write(f)
             except FileExistsError:
                 print("That file already exists.")
                 if asyncio.run(prompt_yes_no("Overwrite it?", default=False)):
-                    with open(path, "w") as f:
+                    with open(path, "w", encoding="utf-8") as f:
                         self._parser.write(f)
                 else:
                     raise ConfigDumpError(path, "File already exists")

From a709280cbf0bf5dbb62507f9829647862ef5f6bc Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 27 Apr 2022 21:48:09 +0200
Subject: [PATCH 369/524] Try to detect unsupported config file encoding

The encoding detection is quite rudimentary, but should detect the
default windows encoding in many cases.
---
 PFERD/config.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/PFERD/config.py b/PFERD/config.py
index 5635573..8f7e682 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -128,6 +128,8 @@ class Config:
             raise ConfigLoadError(path, "That's a directory, not a file")
         except PermissionError:
             raise ConfigLoadError(path, "Insufficient permissions")
+        except UnicodeDecodeError:
+            raise ConfigLoadError(path, "File is not encoded using UTF-8")
 
     def dump(self, path: Optional[Path] = None) -> None:
         """

From 00db34821825a719712f6bc25420bdfaed9bda11 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 27 Apr 2022 21:53:29 +0200
Subject: [PATCH 370/524] Update changelog

---
 CHANGELOG.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c5480f2..e70d328 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,9 +24,12 @@ ambiguous situations.
 
 ### Added
 - Message when Shibboleth entitlements need to be manually reviewed
-- Support for video listings with more columns
 - Links to unofficial packages and repology in the readme
 
+### Changed
+- Support video listings with more columns
+- Use UTF-8 when reading/writing the config file
+
 ### Fixed
 - Crash during authentication when the Shibboleth session is still valid
 

From 31631fb409d80f7c0cf8dd964da993ef08aa6fe5 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 27 Apr 2022 22:16:47 +0200
Subject: [PATCH 371/524] Increase minimum python version to 3.9

---
 .github/workflows/build-and-release.yml | 2 +-
 CHANGELOG.md                            | 1 +
 README.md                               | 2 +-
 setup.cfg                               | 2 +-
 4 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/build-and-release.yml b/.github/workflows/build-and-release.yml
index 565c4e3..090ac7e 100644
--- a/.github/workflows/build-and-release.yml
+++ b/.github/workflows/build-and-release.yml
@@ -14,7 +14,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, windows-latest, macos-latest]
-        python: ["3.8"]
+        python: ["3.9"]
     steps:
 
       - uses: actions/checkout@v2
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e70d328..7cee430 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,7 @@ ambiguous situations.
 - Links to unofficial packages and repology in the readme
 
 ### Changed
+- Increase minimum supported Python version to 3.9
 - Support video listings with more columns
 - Use UTF-8 when reading/writing the config file
 
diff --git a/README.md b/README.md
index b8b2551..ce917b0 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@ Binaries for Linux, Windows and Mac can be downloaded directly from the
 
 ### With pip
 
-Ensure you have at least Python 3.8 installed. Run the following command to
+Ensure you have at least Python 3.9 installed. Run the following command to
 install PFERD or upgrade it to the latest version:
 
 ```
diff --git a/setup.cfg b/setup.cfg
index 059798a..2378c48 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -4,7 +4,7 @@ version = attr: PFERD.version.VERSION
 
 [options]
 packages = find:
-python_requires = >=3.8
+python_requires = >=3.9
 install_requires =
   aiohttp>=3.8.1
   beautifulsoup4>=4.10.0

From 602044ff1b0b49348a50248f7f93334df979044a Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 27 Apr 2022 22:50:06 +0200
Subject: [PATCH 372/524] Fix mypy errors and add missing await

---
 PFERD/crawl/crawler.py                     |  5 +++--
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 21 ++++++++++-----------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index 53f43e9..0e67c02 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -1,9 +1,10 @@
 import asyncio
 import os
 from abc import ABC, abstractmethod
+from collections.abc import Awaitable, Coroutine
 from datetime import datetime
 from pathlib import Path, PurePath
-from typing import Any, Awaitable, Callable, Dict, List, Optional, Sequence, Set, Tuple, TypeVar
+from typing import Any, Callable, Dict, List, Optional, Sequence, Set, Tuple, TypeVar
 
 from ..auth import Authenticator
 from ..config import Config, Section
@@ -58,7 +59,7 @@ def noncritical(f: Wrapped) -> Wrapped:
     return wrapper  # type: ignore
 
 
-AWrapped = TypeVar("AWrapped", bound=Callable[..., Awaitable[Optional[Any]]])
+AWrapped = TypeVar("AWrapped", bound=Callable[..., Coroutine[Any, Any, Optional[Any]]])
 
 
 def anoncritical(f: AWrapped) -> AWrapped:
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 571e4d7..ae9ebd4 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -1,7 +1,8 @@
 import asyncio
 import re
+from collections.abc import Awaitable, Coroutine
 from pathlib import PurePath
-from typing import Any, Awaitable, Callable, Dict, List, Optional, Set, TypeVar, Union, cast
+from typing import Any, Callable, Dict, List, Optional, Set, Union, cast
 
 import aiohttp
 import yarl
@@ -13,7 +14,7 @@ from ...config import Config
 from ...logging import ProgressBar, log
 from ...output_dir import FileSink, Redownload
 from ...utils import fmt_path, soupify, url_set_query_param
-from ..crawler import CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
+from ..crawler import AWrapped, CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
 from ..http_crawler import HttpCrawler, HttpCrawlerSection
 from .file_templates import Links
 from .kit_ilias_html import IliasElementType, IliasPage, IliasPageElement
@@ -82,8 +83,6 @@ _VIDEO_ELEMENTS: Set[IliasElementType] = set([
     IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED,
 ])
 
-AWrapped = TypeVar("AWrapped", bound=Callable[..., Awaitable[Optional[Any]]])
-
 
 def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Callable[[AWrapped], AWrapped]:
     def decorator(f: AWrapped) -> AWrapped:
@@ -252,7 +251,7 @@ instance's greatest bottleneck.
         url: str,
         parent: IliasPageElement,
         path: PurePath,
-    ) -> Optional[Awaitable[None]]:
+    ) -> Optional[Coroutine[Any, Any, None]]:
         maybe_cl = await self.crawl(path)
         if not maybe_cl:
             return None
@@ -310,7 +309,7 @@ instance's greatest bottleneck.
         self,
         parent_path: PurePath,
         element: IliasPageElement,
-    ) -> Optional[Awaitable[None]]:
+    ) -> Optional[Coroutine[Any, Any, None]]:
         if element.url in self._visited_urls:
             raise CrawlWarning(
                 f"Found second path to element {element.name!r} at {element.url!r}. Aborting subpath"
@@ -360,7 +359,7 @@ instance's greatest bottleneck.
         self,
         element: IliasPageElement,
         element_path: PurePath,
-    ) -> Optional[Awaitable[None]]:
+    ) -> Optional[Coroutine[Any, Any, None]]:
         log.explain_topic(f"Decision: Crawl Link {fmt_path(element_path)}")
         log.explain(f"Links type is {self._links}")
 
@@ -407,7 +406,7 @@ instance's greatest bottleneck.
         self,
         element: IliasPageElement,
         element_path: PurePath,
-    ) -> Optional[Awaitable[None]]:
+    ) -> Optional[Coroutine[Any, Any, None]]:
         log.explain_topic(f"Decision: Crawl Booking Link {fmt_path(element_path)}")
         log.explain(f"Links type is {self._links}")
 
@@ -443,7 +442,7 @@ instance's greatest bottleneck.
             if hdrs.LOCATION not in resp.headers:
                 return soupify(await resp.read()).select_one("a").get("href").strip()
 
-        self._authenticate()
+        await self._authenticate()
 
         async with self.session.get(export_url, allow_redirects=False) as resp:
             # No redirect means we were authenticated
@@ -456,7 +455,7 @@ instance's greatest bottleneck.
         self,
         element: IliasPageElement,
         element_path: PurePath,
-    ) -> Optional[Awaitable[None]]:
+    ) -> Optional[Coroutine[Any, Any, None]]:
         # Copy old mapping as it is likely still relevant
         if self.prev_report:
             self.report.add_custom_value(
@@ -564,7 +563,7 @@ instance's greatest bottleneck.
         self,
         element: IliasPageElement,
         element_path: PurePath,
-    ) -> Optional[Awaitable[None]]:
+    ) -> Optional[Coroutine[Any, Any, None]]:
         maybe_dl = await self.download(element_path, mtime=element.mtime)
         if not maybe_dl:
             return None

From d2e6d918806310a3bcda7a82c74853b7f59eb99f Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 27 Apr 2022 22:50:36 +0200
Subject: [PATCH 373/524] Make PFERD executable via python -m

---
 PFERD/__main__.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index bdf5b34..4faeb13 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -159,3 +159,7 @@ def main() -> None:
         sys.exit(1)
     else:
         pferd.print_report()
+
+
+if __name__ == "__main__":
+    main()

From aa74604d293ec25ae7f94431d4431313dabfc26c Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 29 Apr 2022 23:11:27 +0200
Subject: [PATCH 374/524] Use utf-8 for report

---
 PFERD/output_dir.py | 2 +-
 PFERD/report.py     | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 441717b..c92f4a6 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -503,7 +503,7 @@ class OutputDirectory:
         try:
             self._prev_report = Report.load(self._report_path)
             log.explain("Loaded report successfully")
-        except (OSError, json.JSONDecodeError, ReportLoadError) as e:
+        except (OSError, UnicodeDecodeError, json.JSONDecodeError, ReportLoadError) as e:
             log.explain("Failed to load report")
             log.explain(str(e))
 
diff --git a/PFERD/report.py b/PFERD/report.py
index 0e0c789..0eaaca9 100644
--- a/PFERD/report.py
+++ b/PFERD/report.py
@@ -100,10 +100,10 @@ class Report:
     @classmethod
     def load(cls, path: Path) -> "Report":
         """
-        May raise OSError, JsonDecodeError, ReportLoadError.
+        May raise OSError, UnicodeDecodeError, JsonDecodeError, ReportLoadError.
         """
 
-        with open(path) as f:
+        with open(path, encoding="utf-8") as f:
             data = json.load(f)
 
         if not isinstance(data, dict):
@@ -148,7 +148,7 @@ class Report:
             "encountered_errors": self.encountered_errors,
         }
 
-        with open(path, "w") as f:
+        with open(path, "w", encoding="utf-8") as f:
             json.dump(data, f, indent=2, sort_keys=True)
             f.write("\n")  # json.dump doesn't do this
 

From b56475450de9a00a0ab12bfdf9adf9b5b229f38e Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 29 Apr 2022 23:12:41 +0200
Subject: [PATCH 375/524] Use utf-8 for cookies

---
 PFERD/crawl/http_crawler.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index fa4cf29..44ec4dd 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -108,7 +108,7 @@ class HttpCrawler(Crawler):
 
     def _load_cookies_from_file(self, path: Path) -> None:
         jar: Any = http.cookies.SimpleCookie()
-        with open(path) as f:
+        with open(path, encoding="utf-8") as f:
             for i, line in enumerate(f):
                 # Names of headers are case insensitive
                 if line[:11].lower() == "set-cookie:":
@@ -121,7 +121,7 @@ class HttpCrawler(Crawler):
         jar: Any = http.cookies.SimpleCookie()
         for morsel in self._cookie_jar:
             jar[morsel.key] = morsel
-        with open(path, "w") as f:
+        with open(path, "w", encoding="utf-8") as f:
             f.write(jar.output(sep="\n"))
             f.write("\n")  # A trailing newline is just common courtesy
 

From a8f76e9be76f4bb0ee24030ea252354ede1c8ce4 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 29 Apr 2022 23:15:12 +0200
Subject: [PATCH 376/524] Use utf-8 for credential file

---
 PFERD/auth/credential_file.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/PFERD/auth/credential_file.py b/PFERD/auth/credential_file.py
index d0fcdda..94ffa73 100644
--- a/PFERD/auth/credential_file.py
+++ b/PFERD/auth/credential_file.py
@@ -20,8 +20,10 @@ class CredentialFileAuthenticator(Authenticator):
 
         path = config.default_section.working_dir() / section.path()
         try:
-            with open(path) as f:
+            with open(path, encoding="utf-8") as f:
                 lines = list(f)
+        except UnicodeDecodeError:
+            raise AuthLoadError(f"Credential file at {fmt_real_path(path)} is not encoded using UTF-8")
         except OSError as e:
             raise AuthLoadError(f"No credential file at {fmt_real_path(path)}") from e
 

From a241672726529d1a0ed852b1db2df7968ee6f137 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 1 May 2022 22:29:06 +0200
Subject: [PATCH 377/524] Bump version to 3.4.0

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7cee430..310059a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.4.0 - 2022-05-01
+
 ### Added
 - Message when Shibboleth entitlements need to be manually reviewed
 - Links to unofficial packages and repology in the readme
diff --git a/PFERD/version.py b/PFERD/version.py
index 37e91f3..8102d37 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.3.1"
+VERSION = "3.4.0"

From b8fe25c580a8cafc14c32890f0635c7daecafc4d Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 4 May 2022 14:13:39 +0200
Subject: [PATCH 378/524] Add `.cpp` to ipd link regex

---
 CHANGELOG.md                   | 3 +++
 PFERD/crawl/kit_ipd_crawler.py | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 310059a..22fdd29 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Changed
+- Add `.cpp` to IPD link regex
+
 ## 3.4.0 - 2022-05-01
 
 ### Added
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index 1a5314b..e5ec58f 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -27,7 +27,7 @@ class KitIpdCrawlerSection(HttpCrawlerSection):
         return target
 
     def link_regex(self) -> Pattern[str]:
-        regex = self.s.get("link_regex", r"^.*/[^/]*\.(?:pdf|zip|c|java)$")
+        regex = self.s.get("link_regex", r"^.*/[^/]*\.(?:pdf|zip|c|cpp|java)$")
         return re.compile(regex)
 
 

From afbd03f7774a1c0f22c471d98f995153bb08edcd Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 5 May 2022 14:15:48 +0200
Subject: [PATCH 379/524] Fix docs

---
 CHANGELOG.md | 2 +-
 CONFIG.md    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 22fdd29..f5af29d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,7 +23,7 @@ ambiguous situations.
 ## Unreleased
 
 ### Changed
-- Add `.cpp` to IPD link regex
+- Add `cpp` extension to default `link_regex` of IPD crawler
 
 ## 3.4.0 - 2022-05-01
 
diff --git a/CONFIG.md b/CONFIG.md
index 569780d..1355c34 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -146,7 +146,7 @@ requests is likely a good idea.
 - `target`: URL to a KIT-IPD page
 - `link_regex`: A regex that is matched against the `href` part of links. If it
   matches, the given link is downloaded as a file. This is used to extract
-  files from KIT-IPD pages. (Default: `^.*/[^/]*\.(?:pdf|zip|c|java)$`)
+  files from KIT-IPD pages. (Default: `^.*/[^/]*\.(?:pdf|zip|c|cpp|java)$`)
 
 ### The `kit-ilias-web` crawler
 

From bc3fa36637b5a4f4ea26db1a9437e4cbd5cad5c4 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 5 May 2022 14:20:45 +0200
Subject: [PATCH 380/524] Fix IPD crawler crashing on weird HTML comments

---
 CHANGELOG.md                   | 3 +++
 PFERD/crawl/kit_ipd_crawler.py | 8 +++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f5af29d..de7b795 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,9 @@ ambiguous situations.
 ### Changed
 - Add `cpp` extension to default `link_regex` of IPD crawler
 
+### Fixed
+- IPD crawler crashes on some sites
+
 ## 3.4.0 - 2022-05-01
 
 ### Added
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index e5ec58f..58e71f8 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -161,4 +161,10 @@ class KitIpdCrawler(HttpCrawler):
 
     async def get_page(self) -> BeautifulSoup:
         async with self.session.get(self._url) as request:
-            return soupify(await request.read())
+            # The web page for Algorithmen für Routenplanung contains some
+            # weird comments that beautifulsoup doesn't parse correctly. This
+            # hack enables those pages to be crawled, and should hopefully not
+            # cause issues on other pages.
+            content = (await request.read()).decode("utf-8")
+            content = re.sub(r"<!--.*?-->", "", content)
+            return soupify(content.encode("utf-8"))

From af2cc1169ace7154349518f7f709023eeb76ba95 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 5 May 2022 14:23:19 +0200
Subject: [PATCH 381/524] Mention href for users of link_regex option

---
 CHANGELOG.md                   | 1 +
 PFERD/crawl/kit_ipd_crawler.py | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index de7b795..959fda0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ ambiguous situations.
 
 ### Changed
 - Add `cpp` extension to default `link_regex` of IPD crawler
+- Mention hrefs in IPD crawler for users of `link_regex` option
 
 ### Fixed
 - IPD crawler crashes on some sites
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index 58e71f8..78fe0b1 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -45,7 +45,7 @@ class KitIpdFolder:
     def explain(self) -> None:
         log.explain_topic(f"Folder {self.name!r}")
         for file in self.files:
-            log.explain(f"File {file.name!r}")
+            log.explain(f"File {file.name!r} (href={file.url!r})")
 
     def __hash__(self) -> int:
         return self.name.__hash__()
@@ -113,7 +113,7 @@ class KitIpdCrawler(HttpCrawler):
             else:
                 file = self._extract_file(element)
                 items.add(file)
-                log.explain_topic(f"Orphan file {file.name!r}")
+                log.explain_topic(f"Orphan file {file.name!r} (href={file.url!r})")
                 log.explain("Attributing it to root folder")
 
         return items

From 694ffb4d7711265d768a636cf1843e302485c62d Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 5 May 2022 22:28:30 +0200
Subject: [PATCH 382/524] Fix meeting date parsing

Apparently the new pattern "<relative time qualifier>: <date>," was
added. This patch adds support for it.
---
 PFERD/crawl/ilias/kit_ilias_html.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 94b2e4b..dfe111d 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -763,9 +763,14 @@ def demangle_date(date_str: str, fail_silently: bool = False) -> Optional[dateti
     """
     try:
         date_str = re.sub(r"\s+", " ", date_str)
+        date_str = re.sub("(Gestern|Yesterday):", "", date_str, re.I)
+        date_str = re.sub("(Heute|Today):", "", date_str, re.I)
+        date_str = re.sub("(Morgen|Tomorrow):",  "", date_str, re.I)
+
         date_str = re.sub("Gestern|Yesterday", _format_date_english(_yesterday()), date_str, re.I)
         date_str = re.sub("Heute|Today", _format_date_english(date.today()), date_str, re.I)
         date_str = re.sub("Morgen|Tomorrow",  _format_date_english(_tomorrow()), date_str, re.I)
+        date_str = date_str.strip()
         for german, english in zip(german_months, english_months):
             date_str = date_str.replace(german, english)
             # Remove trailing dots for abbreviations, e.g. "20. Apr. 2020" -> "20. Apr 2020"

From bcc537468c46088f78a037fb28364866e8653bb5 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 5 May 2022 22:53:37 +0200
Subject: [PATCH 383/524] Fix crawling of expanded meetings

The last meeting on every page is expanded by default.
Its content is then shown inline *and* in the meeting page itself.
We should skip the inline content.
---
 PFERD/crawl/ilias/kit_ilias_html.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index dfe111d..d93684c 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -428,6 +428,12 @@ class IliasPage:
             element_type = self._find_type_from_link(element_name, link, abs_url)
             description = self._find_link_description(link)
 
+            # The last meeting on every page is expanded by default.
+            # Its content is then shown inline *and* in the meeting page itself.
+            # We should skip the inline content.
+            if element_type != IliasElementType.MEETING and self._is_in_expanded_meeting(link):
+                continue
+
             if not element_type:
                 continue
             if element_type == IliasElementType.MEETING:
@@ -445,6 +451,26 @@ class IliasPage:
 
         return result
 
+    def _is_in_expanded_meeting(self, tag: Tag) -> bool:
+        """
+        Returns whether a file is part of an expanded meeting.
+        Has false positives for meetings themselves as their title is also "in the expanded meeting content".
+        It is in the same general div and this whole thing is guesswork.
+        Therefore, you should check for meetings before passing them in this function.
+        """
+        parents: List[Tag] = list(tag.parents)
+        for parent in parents:
+            if not parent.get("class"):
+                continue
+
+            # We should not crawl files under meetings
+            if "ilContainerListItemContentCB" in parent.get("class"):
+                link: Tag = parent.parent.find("a")
+                type = IliasPage._find_type_from_folder_like(link, self._page_url)
+                return type == IliasElementType.MEETING
+
+        return False
+
     def _find_upwards_folder_hierarchy(self, tag: Tag) -> List[str]:
         """
         Interprets accordions and expandable blocks as virtual folders and returns them

From 2f0e04ce13ebbc7c7ccaa93e03d8f707f246ceef Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 5 May 2022 22:57:55 +0200
Subject: [PATCH 384/524] Adjust changelog

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 959fda0..4249287 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -28,6 +28,8 @@ ambiguous situations.
 
 ### Fixed
 - IPD crawler crashes on some sites
+- Meeting name normalization for yesterday, today and tomorrow fails
+- Crawling of meeting file previews
 
 ## 3.4.0 - 2022-05-01
 

From 616b0480f7c92afe11c36d2c105c99ba5f960e96 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 8 May 2022 17:39:18 +0200
Subject: [PATCH 385/524] Simplify IPD crawler link regex

---
 CHANGELOG.md                   | 5 +++--
 CONFIG.md                      | 2 +-
 PFERD/crawl/kit_ipd_crawler.py | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4249287..e2d3840 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,11 +24,12 @@ ambiguous situations.
 
 ### Changed
 - Add `cpp` extension to default `link_regex` of IPD crawler
-- Mention hrefs in IPD crawler for users of `link_regex` option
+- Mention hrefs in IPD crawler's `--explain` output for users of `link_regex` option
+- Simplify default IPD crawler `link_regex`
 
 ### Fixed
 - IPD crawler crashes on some sites
-- Meeting name normalization for yesterday, today and tomorrow fails
+- Meeting name normalization for yesterday, today and tomorrow
 - Crawling of meeting file previews
 
 ## 3.4.0 - 2022-05-01
diff --git a/CONFIG.md b/CONFIG.md
index 1355c34..f572a80 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -146,7 +146,7 @@ requests is likely a good idea.
 - `target`: URL to a KIT-IPD page
 - `link_regex`: A regex that is matched against the `href` part of links. If it
   matches, the given link is downloaded as a file. This is used to extract
-  files from KIT-IPD pages. (Default: `^.*/[^/]*\.(?:pdf|zip|c|cpp|java)$`)
+  files from KIT-IPD pages. (Default: `^.*?[^/]+\.(pdf|zip|c|cpp|java)$`)
 
 ### The `kit-ilias-web` crawler
 
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index 78fe0b1..d9fac32 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -27,7 +27,7 @@ class KitIpdCrawlerSection(HttpCrawlerSection):
         return target
 
     def link_regex(self) -> Pattern[str]:
-        regex = self.s.get("link_regex", r"^.*/[^/]*\.(?:pdf|zip|c|cpp|java)$")
+        regex = self.s.get("link_regex", r"^.*?[^/]+\.(pdf|zip|c|cpp|java)$")
         return re.compile(regex)
 
 

From a5015fe9b16d484613a27687f2c122b15e109ba2 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 8 May 2022 23:21:18 +0200
Subject: [PATCH 386/524] Correctly parse day-only meeting dates

I failed to recognize the correct format in the previous adjustment, so
this (hopefully) fixes it for good.
Meetings apparently don't always have a time portion.
---
 PFERD/crawl/ilias/kit_ilias_html.py | 48 +++++++++++++++++++----------
 1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index d93684c..6d063b6 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -746,17 +746,26 @@ class IliasPage:
         Normalizes meeting names, which have a relative time as their first part,
         to their date in ISO format.
         """
-        date_portion_str = meeting_name.split(" - ")[0]
+
+        # This checks whether we can reach a `:` without passing a `-`
+        if re.search(r"^[^-]+: ", meeting_name):
+            # Meeting name only contains date: "05. Jan 2000:"
+            split_delimiter = ":"
+        else:
+            # Meeting name contains date and start/end times: "05. Jan 2000, 16:00 - 17:30:"
+            split_delimiter = ", "
+
+        # We have a meeting day without time
+        date_portion_str = meeting_name.split(split_delimiter)[0]
         date_portion = demangle_date(date_portion_str)
 
+        # We failed to parse the date, bail out
         if not date_portion:
             return meeting_name
 
-        rest_of_name = meeting_name
-        if rest_of_name.startswith(date_portion_str):
-            rest_of_name = rest_of_name[len(date_portion_str):]
-
-        return datetime.strftime(date_portion, "%Y-%m-%d, %H:%M") + rest_of_name
+        # Replace the first section with the absolute date
+        rest_of_name = split_delimiter.join(meeting_name.split(split_delimiter)[1:])
+        return datetime.strftime(date_portion, "%Y-%m-%d") + split_delimiter + rest_of_name
 
     def _abs_url_from_link(self, link_tag: Tag) -> str:
         """
@@ -781,17 +790,15 @@ english_months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep',
 
 def demangle_date(date_str: str, fail_silently: bool = False) -> Optional[datetime]:
     """
-    Demangle a given date in one of the following formats:
+    Demangle a given date in one of the following formats (hour/minute part is optional):
     "Gestern, HH:MM"
     "Heute, HH:MM"
     "Morgen, HH:MM"
     "dd. mon yyyy, HH:MM
     """
     try:
+        # Normalize whitespace because users
         date_str = re.sub(r"\s+", " ", date_str)
-        date_str = re.sub("(Gestern|Yesterday):", "", date_str, re.I)
-        date_str = re.sub("(Heute|Today):", "", date_str, re.I)
-        date_str = re.sub("(Morgen|Tomorrow):",  "", date_str, re.I)
 
         date_str = re.sub("Gestern|Yesterday", _format_date_english(_yesterday()), date_str, re.I)
         date_str = re.sub("Heute|Today", _format_date_english(date.today()), date_str, re.I)
@@ -802,19 +809,28 @@ def demangle_date(date_str: str, fail_silently: bool = False) -> Optional[dateti
             # Remove trailing dots for abbreviations, e.g. "20. Apr. 2020" -> "20. Apr 2020"
             date_str = date_str.replace(english + ".", english)
 
-        # We now have a nice english String in the format: "dd. mmm yyyy, hh:mm"
-        day_part, time_part = date_str.split(",")
+        # We now have a nice english String in the format: "dd. mmm yyyy, hh:mm" or "dd. mmm yyyy"
+
+        # Check if we have a time as well
+        if ", " in date_str:
+            day_part, time_part = date_str.split(",")
+        else:
+            day_part = date_str.split(",")[0]
+            time_part = None
+
         day_str, month_str, year_str = day_part.split(" ")
 
         day = int(day_str.strip().replace(".", ""))
         month = english_months.index(month_str.strip()) + 1
         year = int(year_str.strip())
 
-        hour_str, minute_str = time_part.split(":")
-        hour = int(hour_str)
-        minute = int(minute_str)
+        if time_part:
+            hour_str, minute_str = time_part.split(":")
+            hour = int(hour_str)
+            minute = int(minute_str)
+            return datetime(year, month, day, hour, minute)
 
-        return datetime(year, month, day, hour, minute)
+        return datetime(year, month, day)
     except Exception:
         if not fail_silently:
             log.warn(f"Date parsing failed for {date_str!r}")

From 846c29aee1867f7f0b7efae802af47fee77a3ec6 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 11 May 2022 21:16:09 +0200
Subject: [PATCH 387/524] Download page descriptions

---
 CHANGELOG.md                               |  3 +
 PFERD/crawl/ilias/ilias_html_cleaner.py    | 91 ++++++++++++++++++++++
 PFERD/crawl/ilias/kit_ilias_html.py        | 25 ++++++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 29 +++++++
 4 files changed, 148 insertions(+)
 create mode 100644 PFERD/crawl/ilias/ilias_html_cleaner.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e2d3840..b7cad13 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Added
+- Download of page descriptions
+
 ### Changed
 - Add `cpp` extension to default `link_regex` of IPD crawler
 - Mention hrefs in IPD crawler's `--explain` output for users of `link_regex` option
diff --git a/PFERD/crawl/ilias/ilias_html_cleaner.py b/PFERD/crawl/ilias/ilias_html_cleaner.py
new file mode 100644
index 0000000..5952309
--- /dev/null
+++ b/PFERD/crawl/ilias/ilias_html_cleaner.py
@@ -0,0 +1,91 @@
+from bs4 import BeautifulSoup, Comment, Tag
+
+_STYLE_TAG_CONTENT = """
+    .ilc_text_block_Information {
+      background-color: #f5f7fa;
+    }
+    div.ilc_text_block_Standard {
+      margin-bottom: 10px;
+      margin-top: 10px;
+    }
+    span.ilc_text_inline_Strong {
+      font-weight: bold;
+    }
+
+    .accordion-head {
+      background-color: #f5f7fa;
+      padding: 0.5rem 0;
+    }
+
+    h3 {
+      margin-top: 0.5rem;
+      margin-bottom: 1rem;
+    }
+
+    br.visible-break {
+      margin-bottom: 1rem;
+    }
+
+    article {
+      margin: 0.5rem 0;
+    }
+
+    body {
+      padding: 1em;
+      grid-template-columns: 1fr min(60rem, 90%) 1fr;
+      line-height: 1.2;
+    }
+"""
+
+_ARTICLE_WORTHY_CLASSES = [
+    "ilc_text_block_Information",
+    "ilc_section_Attention",
+    "ilc_section_Link",
+]
+
+
+def insert_base_markup(soup: BeautifulSoup) -> BeautifulSoup:
+    head = soup.new_tag("head")
+    soup.insert(0, head)
+
+    simplecss_link: Tag = soup.new_tag("link")
+    # <link rel="stylesheet" href="https://cdn.simplecss.org/simple.css">
+    simplecss_link["rel"] = "stylesheet"
+    simplecss_link["href"] = "https://cdn.simplecss.org/simple.css"
+    head.append(simplecss_link)
+
+    # Basic style tags for compat
+    style: Tag = soup.new_tag("style")
+    style.append(_STYLE_TAG_CONTENT)
+    head.append(style)
+
+    return soup
+
+
+def clean(soup: BeautifulSoup) -> BeautifulSoup:
+    for block in soup.find_all(class_=lambda x: x in _ARTICLE_WORTHY_CLASSES):
+        block.name = "article"
+
+    for block in soup.find_all("h3"):
+        block.name = "div"
+
+    for block in soup.find_all("h1"):
+        block.name = "h3"
+
+    for block in soup.find_all(class_="ilc_va_ihcap_VAccordIHeadCap"):
+        block.name = "h3"
+        block["class"] += ["accordion-head"]
+
+    for dummy in soup.select(".ilc_text_block_Standard.ilc_Paragraph"):
+        children = list(dummy.children)
+        if not children:
+            dummy.decompose()
+        if len(children) > 1:
+            continue
+        if type(children[0]) == Comment:
+            dummy.decompose()
+
+    for hrule_imposter in soup.find_all(class_="ilc_section_Separator"):
+        hrule_imposter.insert(0, soup.new_tag("hr"))
+
+    return soup
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 6d063b6..d58e5c8 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -85,6 +85,31 @@ class IliasPage:
         log.explain("Page is a normal folder, searching for elements")
         return self._find_normal_entries()
 
+    def get_description(self) -> Optional[BeautifulSoup]:
+        def is_interesting_class(name: str) -> bool:
+            return name in ["ilCOPageSection", "ilc_Paragraph", "ilc_va_ihcap_VAccordIHeadCap"]
+
+        paragraphs: List[Tag] = self._soup.findAll(class_=is_interesting_class)
+        if not paragraphs:
+            return None
+
+        # Extract bits and pieces into a string and parse it again.
+        # This ensures we don't miss anything and weird structures are resolved
+        # somewhat gracefully.
+        raw_html = ""
+        for p in paragraphs:
+            if p.find_parent(class_=is_interesting_class):
+                continue
+
+            # Ignore special listings (like folder groupings)
+            if "ilc_section_Special" in p["class"]:
+                continue
+
+            raw_html += str(p) + "\n"
+        raw_html = f"<body>\n{raw_html}\n</body>"
+
+        return BeautifulSoup(raw_html, "html.parser")
+
     def get_next_stage_element(self) -> Optional[IliasPageElement]:
         if self._is_ilias_opencast_embedding():
             return self.get_child_elements()[0]
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index ae9ebd4..bbed986 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -17,6 +17,7 @@ from ...utils import fmt_path, soupify, url_set_query_param
 from ..crawler import AWrapped, CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
 from ..http_crawler import HttpCrawler, HttpCrawlerSection
 from .file_templates import Links
+from .ilias_html_cleaner import clean, insert_base_markup
 from .kit_ilias_html import IliasElementType, IliasPage, IliasPageElement
 
 TargetType = Union[str, int]
@@ -215,6 +216,8 @@ instance's greatest bottleneck.
         cl = maybe_cl  # Not mypy's fault, but explained here: https://github.com/python/mypy/issues/2608
 
         elements: List[IliasPageElement] = []
+        # A list as variable redefinitions are not propagated to outer scopes
+        description: List[BeautifulSoup] = []
 
         @_iorepeat(3, "crawling url")
         async def gather_elements() -> None:
@@ -233,9 +236,15 @@ instance's greatest bottleneck.
                 page = IliasPage(soup, url, None)
                 elements.extend(page.get_child_elements())
 
+                if description_string := page.get_description():
+                    description.append(description_string)
+
         # Fill up our task list with the found elements
         await gather_elements()
 
+        if description:
+            await self._download_description(PurePath("."), description[0])
+
         elements.sort(key=lambda e: e.id())
 
         tasks: List[Awaitable[None]] = []
@@ -265,6 +274,8 @@ instance's greatest bottleneck.
         cl: CrawlToken,
     ) -> None:
         elements: List[IliasPageElement] = []
+        # A list as variable redefinitions are not propagated to outer scopes
+        description: List[BeautifulSoup] = []
 
         @_iorepeat(3, "crawling folder")
         async def gather_elements() -> None:
@@ -285,10 +296,15 @@ instance's greatest bottleneck.
                         next_stage_url = None
 
                 elements.extend(page.get_child_elements())
+                if description_string := page.get_description():
+                    description.append(description_string)
 
         # Fill up our task list with the found elements
         await gather_elements()
 
+        if description:
+            await self._download_description(PurePath("."), description[0])
+
         elements.sort(key=lambda e: e.id())
 
         tasks: List[Awaitable[None]] = []
@@ -425,6 +441,19 @@ instance's greatest bottleneck.
 
         return self._download_booking(element, link_template_maybe, maybe_dl)
 
+    @anoncritical
+    @_iorepeat(1, "downloading description")
+    async def _download_description(self, parent_path: PurePath, description: BeautifulSoup) -> None:
+        path = parent_path / "Description.html"
+        dl = await self.download(path, redownload=Redownload.ALWAYS)
+        if not dl:
+            return
+
+        async with dl as (bar, sink):
+            description = clean(insert_base_markup(description))
+            sink.file.write(description.prettify().encode("utf-8"))
+            sink.done()
+
     @anoncritical
     @_iorepeat(3, "resolving booking")
     async def _download_booking(

From 46fb782798725b6fde76b71cf7a4d90912ea2c7d Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 24 May 2022 23:28:09 +0200
Subject: [PATCH 388/524] Add forum crawling

This downloads all forum posts when needed and saves each thread in its
own html file, named after the thread title.
---
 CHANGELOG.md                               |   1 +
 PFERD/cli/command_kit_ilias_web.py         |   7 ++
 PFERD/crawl/ilias/kit_ilias_html.py        |  90 ++++++++++++++-
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 122 ++++++++++++++++++---
 PFERD/logging.py                           |   4 +-
 5 files changed, 208 insertions(+), 16 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b7cad13..1d70c4a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ ambiguous situations.
 
 ### Added
 - Download of page descriptions
+- Forum download support
 
 ### Changed
 - Add `cpp` extension to default `link_regex` of IPD crawler
diff --git a/PFERD/cli/command_kit_ilias_web.py b/PFERD/cli/command_kit_ilias_web.py
index 12803a6..de74fc3 100644
--- a/PFERD/cli/command_kit_ilias_web.py
+++ b/PFERD/cli/command_kit_ilias_web.py
@@ -62,6 +62,11 @@ GROUP.add_argument(
     action=BooleanOptionalAction,
     help="crawl and download videos"
 )
+GROUP.add_argument(
+    "--forums",
+    action=BooleanOptionalAction,
+    help="crawl and download forum posts"
+)
 GROUP.add_argument(
     "--http-timeout", "-t",
     type=float,
@@ -90,6 +95,8 @@ def load(
         section["link_redirect_delay"] = str(args.link_redirect_delay)
     if args.videos is not None:
         section["videos"] = "yes" if args.videos else "no"
+    if args.forums is not None:
+        section["forums"] = "yes" if args.forums else "no"
     if args.http_timeout is not None:
         section["http_timeout"] = str(args.http_timeout)
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index d58e5c8..7bab152 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -3,7 +3,7 @@ import re
 from dataclasses import dataclass
 from datetime import date, datetime, timedelta
 from enum import Enum
-from typing import List, Optional, Union
+from typing import Dict, List, Optional, Union
 from urllib.parse import urljoin, urlparse
 
 from bs4 import BeautifulSoup, Tag
@@ -55,6 +55,20 @@ class IliasPageElement:
         return self.url
 
 
+@dataclass
+class IliasDownloadForumData:
+    url: str
+    form_data: Dict[str, Union[str, List[str]]]
+
+
+@dataclass
+class IliasForumThread:
+    title: str
+    title_tag: Tag
+    content_tag: Tag
+    mtime: Optional[datetime]
+
+
 class IliasPage:
 
     def __init__(self, soup: BeautifulSoup, _page_url: str, source_element: Optional[IliasPageElement]):
@@ -110,13 +124,39 @@ class IliasPage:
 
         return BeautifulSoup(raw_html, "html.parser")
 
+    def get_download_forum_data(self) -> Optional[IliasDownloadForumData]:
+        form = self._soup.find("form", attrs={"action": lambda x: x and "fallbackCmd=showThreads" in x})
+        if not form:
+            return None
+        post_url = self._abs_url_from_relative(form["action"])
+
+        form_data: Dict[str, Union[str, List[ſtr]]] = {
+            "thread_ids[]": [f["value"] for f in form.find_all(attrs={"name": "thread_ids[]"})],
+            "selected_cmd2": "html",
+            "select_cmd2": "Ausführen",
+            "selected_cmd": "",
+        }
+
+        return IliasDownloadForumData(post_url, form_data)
+
     def get_next_stage_element(self) -> Optional[IliasPageElement]:
+        if self._is_forum_page():
+            if "trows=800" in self._page_url:
+                return None
+            return self._get_show_max_forum_entries_per_page_url()
         if self._is_ilias_opencast_embedding():
             return self.get_child_elements()[0]
         if self._page_type == IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED:
             return self._find_video_entries_paginated()[0]
         return None
 
+    def _is_forum_page(self) -> bool:
+        read_more_btn = self._soup.find(
+            "button",
+            attrs={"onclick": lambda x: x and "cmdClass=ilobjforumgui&cmd=markAllRead" in x}
+        )
+        return read_more_btn is not None
+
     def _is_video_player(self) -> bool:
         return "paella_config_file" in str(self._soup)
 
@@ -194,6 +234,19 @@ class IliasPage:
 
         return items
 
+    def _get_show_max_forum_entries_per_page_url(self) -> Optional[IliasPageElement]:
+        correct_link = self._soup.find(
+            "a",
+            attrs={"href": lambda x: x and "trows=800" in x and "cmd=showThreads" in x}
+        )
+
+        if not correct_link:
+            return None
+
+        link = self._abs_url_from_link(correct_link)
+
+        return IliasPageElement(IliasElementType.FORUM, link, "show all forum threads")
+
     def _find_personal_desktop_entries(self) -> List[IliasPageElement]:
         items: List[IliasPageElement] = []
 
@@ -877,3 +930,38 @@ def _tomorrow() -> date:
 
 def _sanitize_path_name(name: str) -> str:
     return name.replace("/", "-").replace("\\", "-").strip()
+
+
+def parse_ilias_forum_export(forum_export: BeautifulSoup) -> List[IliasForumThread]:
+    elements = []
+    for p in forum_export.select("body > p"):
+        title_tag = p
+        content_tag = p.find_next_sibling("ul")
+        title = p.find("b").text
+        if ":" in title:
+            title = title[title.find(":") + 1:]
+        title = title.strip()
+        mtime = _guess_timestamp_from_forum_post_content(content_tag)
+        elements.append(IliasForumThread(title, title_tag, content_tag, mtime))
+
+    return elements
+
+
+def _guess_timestamp_from_forum_post_content(content: Tag) -> Optional[datetime]:
+    posts: Optional[Tag] = content.select(".ilFrmPostHeader > span.small")
+    if not posts:
+        return None
+
+    newest_date: Optional[datetime] = None
+
+    for post in posts:
+        text = post.text.strip()
+        text = text[text.rfind("|") + 1:]
+        date = demangle_date(text, fail_silently=True)
+        if not date:
+            continue
+
+        if not newest_date or newest_date < date:
+            newest_date = date
+
+    return newest_date
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index bbed986..156cd4c 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -18,7 +18,8 @@ from ..crawler import AWrapped, CrawlError, CrawlToken, CrawlWarning, DownloadTo
 from ..http_crawler import HttpCrawler, HttpCrawlerSection
 from .file_templates import Links
 from .ilias_html_cleaner import clean, insert_base_markup
-from .kit_ilias_html import IliasElementType, IliasPage, IliasPageElement
+from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasPage, IliasPageElement,
+                             _sanitize_path_name, parse_ilias_forum_export)
 
 TargetType = Union[str, int]
 
@@ -67,6 +68,9 @@ class KitIliasWebCrawlerSection(HttpCrawlerSection):
     def videos(self) -> bool:
         return self.s.getboolean("videos", fallback=False)
 
+    def forums(self) -> bool:
+        return self.s.getboolean("forums", fallback=False)
+
 
 _DIRECTORY_PAGES: Set[IliasElementType] = set([
     IliasElementType.EXERCISE,
@@ -183,6 +187,7 @@ instance's greatest bottleneck.
         self._link_file_redirect_delay = section.link_redirect_delay()
         self._links = section.links()
         self._videos = section.videos()
+        self._forums = section.forums()
         self._visited_urls: Set[str] = set()
 
     async def _run(self) -> None:
@@ -335,22 +340,27 @@ instance's greatest bottleneck.
         element_path = PurePath(parent_path, element.name)
 
         if element.type in _VIDEO_ELEMENTS:
-            log.explain_topic(f"Decision: Crawl video element {fmt_path(element_path)}")
             if not self._videos:
-                log.explain("Video crawling is disabled")
-                log.explain("Answer: no")
+                log.status(
+                    "[bold bright_black]",
+                    "Ignored",
+                    fmt_path(element_path),
+                    "[bright_black](enable with option 'videos')"
+                )
                 return None
-            else:
-                log.explain("Video crawling is enabled")
-                log.explain("Answer: yes")
 
         if element.type == IliasElementType.FILE:
             return await self._handle_file(element, element_path)
         elif element.type == IliasElementType.FORUM:
-            log.explain_topic(f"Decision: Crawl {fmt_path(element_path)}")
-            log.explain("Forums are not supported")
-            log.explain("Answer: No")
-            return None
+            if not self._forums:
+                log.status(
+                    "[bold bright_black]",
+                    "Ignored",
+                    fmt_path(element_path),
+                    "[bright_black](enable with option 'forums')"
+                )
+                return None
+            return await self._handle_forum(element, element_path)
         elif element.type == IliasElementType.TEST:
             log.explain_topic(f"Decision: Crawl {fmt_path(element_path)}")
             log.explain("Tests contain no relevant files")
@@ -635,6 +645,68 @@ instance's greatest bottleneck.
         if not await try_stream():
             raise CrawlError("File streaming failed after authenticate()")
 
+    async def _handle_forum(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Coroutine[Any, Any, None]]:
+        maybe_cl = await self.crawl(element_path)
+        if not maybe_cl:
+            return None
+        return self._crawl_forum(element, maybe_cl)
+
+    @_iorepeat(3, "crawling forum")
+    @anoncritical
+    async def _crawl_forum(self, element: IliasPageElement, cl: CrawlToken) -> None:
+        elements = []
+
+        async with cl:
+            next_stage_url = element.url
+            while next_stage_url:
+                log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
+                log.explain(f"URL: {next_stage_url}")
+
+                soup = await self._get_page(next_stage_url)
+                page = IliasPage(soup, next_stage_url, None)
+
+                if next := page.get_next_stage_element():
+                    next_stage_url = next.url
+                else:
+                    break
+
+            download_data = page.get_download_forum_data()
+            if not download_data:
+                raise CrawlWarning("Failed to extract forum data")
+            html = await self._post_authenticated(download_data.url, download_data.form_data)
+            elements = parse_ilias_forum_export(soupify(html))
+
+        elements.sort(key=lambda elem: elem.title)
+
+        tasks: List[Awaitable[None]] = []
+        for elem in elements:
+            tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, elem)))
+
+        # And execute them
+        await self.gather(tasks)
+
+    @anoncritical
+    @_iorepeat(3, "saving forum thread")
+    async def _download_forum_thread(
+        self,
+        parent_path: PurePath,
+        element: IliasForumThread,
+    ) -> None:
+        path = parent_path / (_sanitize_path_name(element.title) + ".html")
+        maybe_dl = await self.download(path, mtime=element.mtime)
+        if not maybe_dl:
+            return
+
+        async with maybe_dl as (bar, sink):
+            content = element.title_tag.prettify()
+            content += element.content_tag.prettify()
+            sink.file.write(content.encode("utf-8"))
+            sink.done()
+
     async def _get_page(self, url: str) -> BeautifulSoup:
         auth_id = await self._current_auth_id()
         async with self.session.get(url) as request:
@@ -652,13 +724,37 @@ instance's greatest bottleneck.
                 return soup
         raise CrawlError("get_page failed even after authenticating")
 
+    async def _post_authenticated(
+        self,
+        url: str,
+        data: dict[str, Union[str, List[str]]]
+    ) -> BeautifulSoup:
+        auth_id = await self._current_auth_id()
+
+        form_data = aiohttp.FormData()
+        for key, val in data.items():
+            form_data.add_field(key, val)
+
+        async with self.session.post(url, data=form_data(), allow_redirects=False) as request:
+            if request.status == 200:
+                return await request.read()
+
+        # We weren't authenticated, so try to do that
+        await self.authenticate(auth_id)
+
+        # Retry once after authenticating. If this fails, we will die.
+        async with self.session.post(url, data=data, allow_redirects=False) as request:
+            if request.status == 200:
+                return await request.read()
+        raise CrawlError("post_authenticated failed even after authenticating")
+
     # We repeat this as the login method in shibboleth doesn't handle I/O errors.
     # Shibboleth is quite reliable as well, the repeat is likely not critical here.
-    @_iorepeat(3, "Login", failure_is_error=True)
+    @ _iorepeat(3, "Login", failure_is_error=True)
     async def _authenticate(self) -> None:
         await self._shibboleth_login.login(self.session)
 
-    @staticmethod
+    @ staticmethod
     def _is_logged_in(soup: BeautifulSoup) -> bool:
         # Normal ILIAS pages
         mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")
diff --git a/PFERD/logging.py b/PFERD/logging.py
index e833716..340b21f 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -197,7 +197,7 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
         if self.output_explain:
             self.print(f"  {escape(text)}")
 
-    def status(self, style: str, action: str, text: str) -> None:
+    def status(self, style: str, action: str, text: str, suffix: str = "") -> None:
         """
         Print a status update while crawling. Allows markup in the "style"
         argument which will be applied to the "action" string.
@@ -205,7 +205,7 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
 
         if self.output_status:
             action = escape(f"{action:<{self.STATUS_WIDTH}}")
-            self.print(f"{style}{action}[/] {escape(text)}")
+            self.print(f"{style}{action}[/] {escape(text)} {suffix}")
 
     def report(self, text: str) -> None:
         """

From ed24366aba7cfb8ca3cdd0df7b2650bc1220437f Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 15 Jan 2022 16:23:37 +0100
Subject: [PATCH 389/524] Add pass authenticator

---
 CHANGELOG.md           |  1 +
 CONFIG.md              | 21 ++++++++-
 PFERD/auth/__init__.py |  3 ++
 PFERD/auth/pass_.py    | 98 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 121 insertions(+), 2 deletions(-)
 create mode 100644 PFERD/auth/pass_.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1d70c4a..bc9f3e5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ ambiguous situations.
 ### Added
 - Download of page descriptions
 - Forum download support
+- `pass` authenticator
 
 ### Changed
 - Add `cpp` extension to default `link_regex` of IPD crawler
diff --git a/CONFIG.md b/CONFIG.md
index f572a80..0f114ed 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -223,6 +223,23 @@ is stored in the keyring.
 - `keyring_name`: The service name PFERD uses for storing credentials. (Default:
   `PFERD`)
 
+### The `pass` authenticator
+
+This authenticator queries the [`pass` password manager][3] for a username and
+password. It tries to be mostly compatible with [browserpass][4] and
+[passff][5], so see those links for an overview of the format. If PFERD fails
+to load your password, you can use the `--explain` flag to see why.
+
+- `passname`: The name of the password to use (Required)
+- `username_prefixes`: A comma-separated list of username line prefixes
+  (Default: `login,username,user`)
+- `password_prefixes`: A comma-separated list of password line prefixes
+  (Default: `password,pass,secret`)
+
+[3]: <https://www.passwordstore.org/> "Pass: The Standard Unix Password Manager"
+[4]: <https://github.com/browserpass/browserpass-extension#organizing-password-store> "Organizing password store"
+[5]: <https://github.com/passff/passff#multi-line-format> "Multi-line format"
+
 ### The `tfa` authenticator
 
 This authenticator prompts the user on the console for a two-factor
@@ -316,7 +333,7 @@ is a regular expression and `TARGET` an f-string based template. If a path
 matches `SOURCE`, the output path is created using `TARGET` as template.
 `SOURCE` is automatically anchored.
 
-`TARGET` uses Python's [format string syntax][3]. The *n*-th capturing group can
+`TARGET` uses Python's [format string syntax][6]. The *n*-th capturing group can
 be referred to as `{g<n>}` (e.g. `{g3}`). `{g0}` refers to the original path.
 If capturing group *n*'s contents are a valid integer, the integer value is
 available as `{i<n>}` (e.g. `{i3}`). If capturing group *n*'s contents are a
@@ -337,7 +354,7 @@ Example: `f(oo+)/be?ar -re-> B{g1.upper()}H/fear`
 - Converts `fooooo/bear` into `BOOOOOH/fear`
 - Converts `foo/bar/baz` into `BOOH/fear/baz`
 
-[3]: <https://docs.python.org/3/library/string.html#format-string-syntax> "Format String Syntax"
+[6]: <https://docs.python.org/3/library/string.html#format-string-syntax> "Format String Syntax"
 
 ### The `-name-re->` arrow
 
diff --git a/PFERD/auth/__init__.py b/PFERD/auth/__init__.py
index 277cade..aa3ba8e 100644
--- a/PFERD/auth/__init__.py
+++ b/PFERD/auth/__init__.py
@@ -5,6 +5,7 @@ from ..config import Config
 from .authenticator import Authenticator, AuthError, AuthLoadError, AuthSection  # noqa: F401
 from .credential_file import CredentialFileAuthenticator, CredentialFileAuthSection
 from .keyring import KeyringAuthenticator, KeyringAuthSection
+from .pass_ import PassAuthenticator, PassAuthSection
 from .simple import SimpleAuthenticator, SimpleAuthSection
 from .tfa import TfaAuthenticator
 
@@ -19,6 +20,8 @@ AUTHENTICATORS: Dict[str, AuthConstructor] = {
         CredentialFileAuthenticator(n, CredentialFileAuthSection(s), c),
     "keyring": lambda n, s, c:
         KeyringAuthenticator(n, KeyringAuthSection(s)),
+    "pass": lambda n, s, c:
+        PassAuthenticator(n, PassAuthSection(s)),
     "simple": lambda n, s, c:
         SimpleAuthenticator(n, SimpleAuthSection(s)),
     "tfa": lambda n, s, c:
diff --git a/PFERD/auth/pass_.py b/PFERD/auth/pass_.py
new file mode 100644
index 0000000..4c8e775
--- /dev/null
+++ b/PFERD/auth/pass_.py
@@ -0,0 +1,98 @@
+import re
+import subprocess
+from typing import List, Tuple
+
+from ..logging import log
+from .authenticator import Authenticator, AuthError, AuthSection
+
+
+class PassAuthSection(AuthSection):
+    def passname(self) -> str:
+        if (value := self.s.get("passname")) is None:
+            self.missing_value("passname")
+        return value
+
+    def username_prefixes(self) -> List[str]:
+        value = self.s.get("username_prefixes", "login,username,user")
+        return [prefix.lower() for prefix in value.split(",")]
+
+    def password_prefixes(self) -> List[str]:
+        value = self.s.get("password_prefixes", "password,pass,secret")
+        return [prefix.lower() for prefix in value.split(",")]
+
+
+class PassAuthenticator(Authenticator):
+    PREFIXED_LINE_RE = r"([a-zA-Z]+):\s?(.*)"  # to be used with fullmatch
+
+    def __init__(self, name: str, section: PassAuthSection) -> None:
+        super().__init__(name)
+
+        self._passname = section.passname()
+        self._username_prefixes = section.username_prefixes()
+        self._password_prefixes = section.password_prefixes()
+
+    async def credentials(self) -> Tuple[str, str]:
+        log.explain_topic("Obtaining credentials from pass")
+
+        try:
+            log.explain(f"Calling 'pass show {self._passname}'")
+            result = subprocess.check_output(["pass", "show", self._passname], text=True)
+        except subprocess.CalledProcessError as e:
+            raise AuthError(f"Failed to get password info from {self._passname}: {e}")
+
+        prefixed = {}
+        unprefixed = []
+        for line in result.strip().splitlines():
+            if match := re.fullmatch(self.PREFIXED_LINE_RE, line):
+                prefix = match.group(1).lower()
+                value = match.group(2)
+                log.explain(f"Found prefixed line {line!r} with prefix {prefix!r}, value {value!r}")
+                if prefix in prefixed:
+                    raise AuthError(f"Prefix {prefix} specified multiple times")
+                prefixed[prefix] = value
+            else:
+                log.explain(f"Found unprefixed line {line!r}")
+                unprefixed.append(line)
+
+        username = None
+        for prefix in self._username_prefixes:
+            log.explain(f"Looking for username at prefix {prefix!r}")
+            if prefix in prefixed:
+                username = prefixed[prefix]
+                log.explain(f"Found username {username!r}")
+                break
+
+        password = None
+        for prefix in self._password_prefixes:
+            log.explain(f"Looking for password at prefix {prefix!r}")
+            if prefix in prefixed:
+                password = prefixed[prefix]
+                log.explain(f"Found password {password!r}")
+                break
+
+        if password is None and username is None:
+            log.explain("No username and password found so far")
+            log.explain("Using first unprefixed line as password")
+            log.explain("Using second unprefixed line as username")
+        elif password is None:
+            log.explain("No password found so far")
+            log.explain("Using first unprefixed line as password")
+        elif username is None:
+            log.explain("No username found so far")
+            log.explain("Using first unprefixed line as username")
+
+        if password is None:
+            if not unprefixed:
+                log.explain("Not enough unprefixed lines left")
+                raise AuthError("Password could not be determined")
+            password = unprefixed.pop(0)
+            log.explain(f"Found password {password!r}")
+
+        if username is None:
+            if not unprefixed:
+                log.explain("Not enough unprefixed lines left")
+                raise AuthError("Username could not be determined")
+            username = unprefixed.pop(0)
+            log.explain(f"Found username {username!r}")
+
+        return username, password

From 345f52a1f6f55eecf6c31d3cc1a4350c5200087d Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 14 Aug 2022 21:41:29 +0200
Subject: [PATCH 390/524] Detect new login button

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 156cd4c..c99a920 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -759,7 +759,7 @@ instance's greatest bottleneck.
         # Normal ILIAS pages
         mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")
         if mainbar is not None:
-            login_button = mainbar.find("button", attrs={"data-action": lambda x: x and "login.php" in x})
+            login_button = mainbar.find(attrs={"href": lambda x: x and "login.php" in x})
             shib_login = soup.find(id="button_shib_login")
             return not login_button and not shib_login
 

From d9b111cec252f4b1810f06b0f2ca551cb5cdb2a2 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 14 Aug 2022 21:45:33 +0200
Subject: [PATCH 391/524] Correctly nest description entries

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index c99a920..1852c5f 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -308,7 +308,7 @@ instance's greatest bottleneck.
         await gather_elements()
 
         if description:
-            await self._download_description(PurePath("."), description[0])
+            await self._download_description(cl.path, description[0])
 
         elements.sort(key=lambda e: e.id())
 

From aa5a3a10bcbfa0dd54a0dc1a533625f76b2d6ed8 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 14 Aug 2022 21:48:59 +0200
Subject: [PATCH 392/524] Adjust changelog

---
 CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bc9f3e5..7f35c9c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -36,6 +36,9 @@ ambiguous situations.
 - IPD crawler crashes on some sites
 - Meeting name normalization for yesterday, today and tomorrow
 - Crawling of meeting file previews
+- Login with new login button html layout
+- Descriptions for courses are now placed in the correct subfolder when
+  downloading the whole desktop
 
 ## 3.4.0 - 2022-05-01
 

From 66a5b1ba0223848f713192b084f2dcd26a18dbe5 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 17 Aug 2022 13:24:01 +0200
Subject: [PATCH 393/524] Bump version to 3.4.1

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7f35c9c..671d48a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.4.1 - 2022-08-17
+
 ### Added
 - Download of page descriptions
 - Forum download support
diff --git a/PFERD/version.py b/PFERD/version.py
index 8102d37..8832a51 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.4.0"
+VERSION = "3.4.1"

From 4a51aaa4f5a1b3382f0bed59f1292fc0952c2832 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 19 Oct 2022 22:59:33 +0200
Subject: [PATCH 394/524] Fix forum crawling crashing for empty threads

---
 CHANGELOG.md                        | 3 +++
 PFERD/crawl/ilias/kit_ilias_html.py | 7 +++++++
 2 files changed, 10 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 671d48a..70d2cd5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Fixed
+- Forum crawling crashing when parsing empty (= 0 messages) threads
+
 ## 3.4.1 - 2022-08-17
 
 ### Added
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 7bab152..8795512 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -937,6 +937,13 @@ def parse_ilias_forum_export(forum_export: BeautifulSoup) -> List[IliasForumThre
     for p in forum_export.select("body > p"):
         title_tag = p
         content_tag = p.find_next_sibling("ul")
+
+        if not content_tag:
+            # ILIAS allows users to delete the initial post while keeping the thread open
+            # This produces empty threads without *any* content.
+            # I am not sure why you would want this, but ILIAS makes it easy to do.
+            continue
+
         title = p.find("b").text
         if ":" in title:
             title = title[title.find(":") + 1:]

From d72fc2760b1dd8243ccf21876bb8cc6e027944bb Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 24 Oct 2022 13:09:29 +0200
Subject: [PATCH 395/524] Handle empty forums

---
 CHANGELOG.md                               | 1 +
 PFERD/crawl/ilias/kit_ilias_html.py        | 7 +++++--
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 6 +++++-
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 70d2cd5..c7a9899 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ ambiguous situations.
 
 ### Fixed
 - Forum crawling crashing when parsing empty (= 0 messages) threads
+- Forum crawling crashing when a forum has no threads at all
 
 ## 3.4.1 - 2022-08-17
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 8795512..9ea6b9f 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -59,6 +59,7 @@ class IliasPageElement:
 class IliasDownloadForumData:
     url: str
     form_data: Dict[str, Union[str, List[str]]]
+    empty: bool
 
 
 @dataclass
@@ -130,14 +131,16 @@ class IliasPage:
             return None
         post_url = self._abs_url_from_relative(form["action"])
 
+        thread_ids = [f["value"] for f in form.find_all(attrs={"name": "thread_ids[]"})]
+
         form_data: Dict[str, Union[str, List[ſtr]]] = {
-            "thread_ids[]": [f["value"] for f in form.find_all(attrs={"name": "thread_ids[]"})],
+            "thread_ids[]": thread_ids,
             "selected_cmd2": "html",
             "select_cmd2": "Ausführen",
             "selected_cmd": "",
         }
 
-        return IliasDownloadForumData(post_url, form_data)
+        return IliasDownloadForumData(url=post_url, form_data=form_data, empty=len(thread_ids) == 0)
 
     def get_next_stage_element(self) -> Optional[IliasPageElement]:
         if self._is_forum_page():
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 1852c5f..f2d5215 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -658,7 +658,7 @@ instance's greatest bottleneck.
     @_iorepeat(3, "crawling forum")
     @anoncritical
     async def _crawl_forum(self, element: IliasPageElement, cl: CrawlToken) -> None:
-        elements = []
+        elements: List[IliasForumThread] = []
 
         async with cl:
             next_stage_url = element.url
@@ -677,6 +677,10 @@ instance's greatest bottleneck.
             download_data = page.get_download_forum_data()
             if not download_data:
                 raise CrawlWarning("Failed to extract forum data")
+            if download_data.empty:
+                log.explain("Forum had no threads")
+                elements = []
+                return
             html = await self._post_authenticated(download_data.url, download_data.form_data)
             elements = parse_ilias_forum_export(soupify(html))
 

From fb4631ba180a9ff0303d59e798d4bccfa0253666 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 24 Oct 2022 13:13:36 +0200
Subject: [PATCH 396/524] Fix ilias background login

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 35 ++++++++++++++++++----
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index f2d5215..10a270f 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -23,6 +23,12 @@ from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasPage, Ilia
 
 TargetType = Union[str, int]
 
+_ILIAS_URL = "https://ilias.studium.kit.edu"
+
+
+class KitShibbolethBackgroundLoginSuccessful():
+    pass
+
 
 class KitIliasWebCrawlerSection(HttpCrawlerSection):
     def target(self) -> TargetType:
@@ -36,7 +42,7 @@ class KitIliasWebCrawlerSection(HttpCrawlerSection):
         if target == "desktop":
             # Full personal desktop
             return target
-        if target.startswith("https://ilias.studium.kit.edu"):
+        if target.startswith(_ILIAS_URL):
             # ILIAS URL
             return target
 
@@ -181,7 +187,7 @@ instance's greatest bottleneck.
             section.tfa_auth(authenticators),
         )
 
-        self._base_url = "https://ilias.studium.kit.edu"
+        self._base_url = _ILIAS_URL
 
         self._target = section.target()
         self._link_file_redirect_delay = section.link_redirect_delay()
@@ -808,14 +814,17 @@ class KitShibbolethLogin:
 
         # Equivalent: Click on "Mit KIT-Account anmelden" button in
         # https://ilias.studium.kit.edu/login.php
-        url = "https://ilias.studium.kit.edu/shib_login.php"
+        url = f"{_ILIAS_URL}/shib_login.php"
         data = {
             "sendLogin": "1",
             "idp_selection": "https://idp.scc.kit.edu/idp/shibboleth",
             "il_target": "",
             "home_organization_selection": "Weiter",
         }
-        soup: BeautifulSoup = await _shib_post(sess, url, data)
+        soup: Union[BeautifulSoup, KitShibbolethBackgroundLoginSuccessful] = await _shib_post(sess, url, data)
+
+        if isinstance(soup, KitShibbolethBackgroundLoginSuccessful):
+            return
 
         # Attempt to login using credentials, if necessary
         while not self._login_successful(soup):
@@ -854,7 +863,7 @@ class KitShibbolethLogin:
         # (or clicking "Continue" if you have JS disabled)
         relay_state = soup.find("input", {"name": "RelayState"})
         saml_response = soup.find("input", {"name": "SAMLResponse"})
-        url = "https://ilias.studium.kit.edu/Shibboleth.sso/SAML2/POST"
+        url = f"{_ILIAS_URL}/Shibboleth.sso/SAML2/POST"
         data = {  # using the info obtained in the while loop above
             "RelayState": relay_state["value"],
             "SAMLResponse": saml_response["value"],
@@ -903,22 +912,35 @@ async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> Beautifu
         return soupify(await response.read())
 
 
-async def _shib_post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup:
+async def _shib_post(
+    session: aiohttp.ClientSession,
+    url: str,
+    data: Any
+) -> Union[BeautifulSoup, KitShibbolethBackgroundLoginSuccessful]:
     """
     aiohttp unescapes '/' and ':' in URL query parameters which is not RFC compliant and rejected
     by Shibboleth. Thanks a lot. So now we unroll the requests manually, parse location headers and
     build encoded URL objects ourselves... Who thought mangling location header was a good idea??
     """
+    log.explain_topic("Shib login POST")
     async with session.post(url, data=data, allow_redirects=False) as response:
         location = response.headers.get("location")
+        log.explain(f"Got location {location!r}")
         if not location:
             raise CrawlWarning(f"Login failed (1), no location header present at {url}")
         correct_url = yarl.URL(location, encoded=True)
+        log.explain(f"Corrected location to {correct_url!r}")
+
+        if str(correct_url).startswith(_ILIAS_URL):
+            log.explain("ILIAS recognized our shib token and logged us in in the background, returning")
+            return KitShibbolethBackgroundLoginSuccessful()
 
         async with session.get(correct_url, allow_redirects=False) as response:
             location = response.headers.get("location")
+            log.explain(f"Redirected to {location!r} with status {response.status}")
             # If shib still still has a valid session, it will directly respond to the request
             if location is None:
+                log.explain("Shib recognized us, returning its response directly")
                 return soupify(await response.read())
 
             as_yarl = yarl.URL(response.url)
@@ -932,6 +954,7 @@ async def _shib_post(session: aiohttp.ClientSession, url: str, data: Any) -> Bea
                 path=location,
                 encoded=True
             )
+            log.explain(f"Corrected location to {correct_url!r}")
 
             async with session.get(correct_url, allow_redirects=False) as response:
                 return soupify(await response.read())

From 5fdd40204b156b15c008ec1dee05e168672fe243 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 24 Oct 2022 14:33:58 +0200
Subject: [PATCH 397/524] Unwrap future meetings when ILIAS hides them behind a
 pagination

---
 PFERD/crawl/ilias/kit_ilias_html.py        | 20 +++++++++++++++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 29 ++++++++++++++--------
 2 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 9ea6b9f..2f0011e 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -146,11 +146,17 @@ class IliasPage:
         if self._is_forum_page():
             if "trows=800" in self._page_url:
                 return None
+            log.explain("Requesting *all* forum threads")
             return self._get_show_max_forum_entries_per_page_url()
         if self._is_ilias_opencast_embedding():
+            log.explain("Unwrapping opencast embedding")
             return self.get_child_elements()[0]
         if self._page_type == IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED:
+            log.explain("Unwrapping video pagination")
             return self._find_video_entries_paginated()[0]
+        if self._contains_collapsed_future_meetings():
+            log.explain("Requesting *all* future meetings")
+            return self._uncollapse_future_meetings_url()
         return None
 
     def _is_forum_page(self) -> bool:
@@ -203,6 +209,16 @@ class IliasPage:
             return False
         return "target=copa_" in link.get("value")
 
+    def _contains_collapsed_future_meetings(self) -> bool:
+        return self._uncollapse_future_meetings_url() is not None
+
+    def _uncollapse_future_meetings_url(self) -> Optional[IliasPageElement]:
+        element = self._soup.find("a", attrs={"href": lambda x: x and "crs_next_sess=1" in x})
+        if not element:
+            return None
+        link = self._abs_url_from_link(element)
+        return IliasPageElement(IliasElementType.FOLDER, link, "show all meetings")
+
     def _player_to_video(self) -> List[IliasPageElement]:
         # Fetch the actual video page. This is a small wrapper page initializing a javscript
         # player. Sadly we can not execute that JS. The actual video stream url is nowhere
@@ -793,6 +809,10 @@ class IliasPage:
         if img_tag is None:
             img_tag = found_parent.select_one("img.icon")
 
+        if img_tag is None and found_parent.find("a", attrs={"href": lambda x: x and "crs_next_sess=" in x}):
+            log.explain("Found session expansion button, skipping it as it has no content")
+            return None
+
         if img_tag is None:
             _unexpected_html_warning()
             log.warn_contd(f"Tried to figure out element type, but did not find an image for {url}")
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 10a270f..bc0d816 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -234,19 +234,28 @@ instance's greatest bottleneck.
         async def gather_elements() -> None:
             elements.clear()
             async with cl:
-                soup = await self._get_page(url)
-
-                if expected_id is not None:
-                    perma_link_element: Tag = soup.find(id="current_perma_link")
-                    if not perma_link_element or "crs_" not in perma_link_element.get("value"):
-                        raise CrawlError("Invalid course id? Didn't find anything looking like a course")
+                next_stage_url: Optional[str] = url
+                current_parent = None
 
                 # Duplicated code, but the root page is special - we want to avoid fetching it twice!
-                log.explain_topic("Parsing root HTML page")
-                log.explain(f"URL: {url}")
-                page = IliasPage(soup, url, None)
-                elements.extend(page.get_child_elements())
+                while next_stage_url:
+                    soup = await self._get_page(next_stage_url)
 
+                    if current_parent is None and expected_id is not None:
+                        perma_link_element: Tag = soup.find(id="current_perma_link")
+                        if not perma_link_element or "crs_" not in perma_link_element.get("value"):
+                            raise CrawlError("Invalid course id? Didn't find anything looking like a course")
+
+                    log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
+                    log.explain(f"URL: {next_stage_url}")
+                    page = IliasPage(soup, next_stage_url, current_parent)
+                    if next_element := page.get_next_stage_element():
+                        current_parent = next_element
+                        next_stage_url = next_element.url
+                    else:
+                        next_stage_url = None
+
+                elements.extend(page.get_child_elements())
                 if description_string := page.get_description():
                     description.append(description_string)
 

From e1430e629844ad122a78d18197ed54100c734bbb Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 24 Oct 2022 18:36:34 +0200
Subject: [PATCH 398/524] Handle (and ignore) surveys

---
 PFERD/crawl/ilias/kit_ilias_html.py        | 3 +++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 7 +++++++
 2 files changed, 10 insertions(+)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 2f0011e..d969577 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -24,6 +24,7 @@ class IliasElementType(Enum):
     LINK = "link"
     BOOKING = "booking"
     MEETING = "meeting"
+    SURVEY = "survey"
     VIDEO = "video"
     VIDEO_PLAYER = "video_player"
     VIDEO_FOLDER = "video_folder"
@@ -730,6 +731,8 @@ class IliasPage:
             return IliasElementType.TEST
         if "fold" in icon["class"]:
             return IliasElementType.FOLDER
+        if "svy" in icon["class"]:
+            return IliasElementType.SURVEY
 
         _unexpected_html_warning()
         log.warn_contd(f"Could not extract type from {icon} for card title {card_title}")
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index bc0d816..5ff8212 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -380,6 +380,13 @@ instance's greatest bottleneck.
             log.explain_topic(f"Decision: Crawl {fmt_path(element_path)}")
             log.explain("Tests contain no relevant files")
             log.explain("Answer: No")
+        elif element.type == IliasElementType.SURVEY:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](surveys contain no relevant data)"
+            )
             return None
         elif element.type == IliasElementType.LINK:
             return await self._handle_link(element, element_path)

From 1b6be6bd79112faea6e56c43f4756dde10ba00ba Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 24 Oct 2022 18:36:54 +0200
Subject: [PATCH 399/524] Handle content pages in cards

---
 PFERD/crawl/ilias/kit_ilias_html.py        |  2 ++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 10 +++++++---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index d969577..ee0364a 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -731,6 +731,8 @@ class IliasPage:
             return IliasElementType.TEST
         if "fold" in icon["class"]:
             return IliasElementType.FOLDER
+        if "copa" in icon["class"]:
+            return IliasElementType.FOLDER
         if "svy" in icon["class"]:
             return IliasElementType.SURVEY
 
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 5ff8212..9295e93 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -377,9 +377,13 @@ instance's greatest bottleneck.
                 return None
             return await self._handle_forum(element, element_path)
         elif element.type == IliasElementType.TEST:
-            log.explain_topic(f"Decision: Crawl {fmt_path(element_path)}")
-            log.explain("Tests contain no relevant files")
-            log.explain("Answer: No")
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](tests contain no relevant data)"
+            )
+            return None
         elif element.type == IliasElementType.SURVEY:
             log.status(
                 "[bold bright_black]",

From f47d2f11d843bfd3307815b231dd3e3df0265cef Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 25 Oct 2022 20:28:06 +0200
Subject: [PATCH 400/524] Append trailing slash to kit-ipd links to ensure
 urljoin works as expected

---
 CHANGELOG.md                   | 1 +
 PFERD/crawl/kit_ipd_crawler.py | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c7a9899..24d9fa6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ ambiguous situations.
 ### Fixed
 - Forum crawling crashing when parsing empty (= 0 messages) threads
 - Forum crawling crashing when a forum has no threads at all
+- kit-ipd crawler if URL did not end with a trailing slash
 
 ## 3.4.1 - 2022-08-17
 
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index d9fac32..338e059 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -24,6 +24,9 @@ class KitIpdCrawlerSection(HttpCrawlerSection):
         if not target.startswith("https://"):
             self.invalid_value("target", target, "Should be a URL")
 
+        if not target.endswith("/"):
+            target = target + "/"
+
         return target
 
     def link_regex(self) -> Pattern[str]:

From 37b51a66d87d368afc3bef2b81edf1629f95cd57 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 26 Oct 2022 18:22:37 +0200
Subject: [PATCH 401/524] Update changelog

---
 CHANGELOG.md | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 24d9fa6..2bb0231 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,10 +22,16 @@ ambiguous situations.
 
 ## Unreleased
 
+### Added
+- Recognize and crawl content pages in cards
+- Recognize and ignore surveys
+
 ### Fixed
-- Forum crawling crashing when parsing empty (= 0 messages) threads
+- Forum crawling crashing when a thread has no messages at all
 - Forum crawling crashing when a forum has no threads at all
-- kit-ipd crawler if URL did not end with a trailing slash
+- Ilias login failing in some cases
+- Crawling of paginated future meetings
+- IPD crawler handling of URLs without trailing slash
 
 ## 3.4.1 - 2022-08-17
 

From 259cfc20cccae68a2f34984796405a35a7f31707 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 26 Oct 2022 18:26:17 +0200
Subject: [PATCH 402/524] Bump version to 3.4.2

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2bb0231..9ecddf7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.4.2 - 2022-10-26
+
 ### Added
 - Recognize and crawl content pages in cards
 - Recognize and ignore surveys
diff --git a/PFERD/version.py b/PFERD/version.py
index 8832a51..0ef5d89 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.4.1"
+VERSION = "3.4.2"

From c020cccc64f152882688b119416f0582ec94e074 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 29 Oct 2022 14:08:29 +0200
Subject: [PATCH 403/524] Include found paths in "second path found" warning

---
 CHANGELOG.md                               | 3 +++
 PFERD/crawl/ilias/kit_ilias_html.py        | 2 +-
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 8 +++++---
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9ecddf7..3dd25b8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Changed
+- Clear up error message shown when multiple paths are found to an element
+
 ## 3.4.2 - 2022-10-26
 
 ### Added
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index ee0364a..56dcf7b 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -134,7 +134,7 @@ class IliasPage:
 
         thread_ids = [f["value"] for f in form.find_all(attrs={"name": "thread_ids[]"})]
 
-        form_data: Dict[str, Union[str, List[ſtr]]] = {
+        form_data: Dict[str, Union[str, List[str]]] = {
             "thread_ids[]": thread_ids,
             "selected_cmd2": "html",
             "select_cmd2": "Ausführen",
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 9295e93..e3719b8 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -194,7 +194,7 @@ instance's greatest bottleneck.
         self._links = section.links()
         self._videos = section.videos()
         self._forums = section.forums()
-        self._visited_urls: Set[str] = set()
+        self._visited_urls: Dict[str, PurePath] = dict()
 
     async def _run(self) -> None:
         if isinstance(self._target, int):
@@ -348,9 +348,11 @@ instance's greatest bottleneck.
     ) -> Optional[Coroutine[Any, Any, None]]:
         if element.url in self._visited_urls:
             raise CrawlWarning(
-                f"Found second path to element {element.name!r} at {element.url!r}. Aborting subpath"
+                f"Found second path to element {element.name!r} at {element.url!r}. "
+                + f"First path: {fmt_path(self._visited_urls[element.url])}. "
+                + f"Second path: {fmt_path(parent_path)}."
             )
-        self._visited_urls.add(element.url)
+        self._visited_urls[element.url] = parent_path
 
         element_path = PurePath(parent_path, element.name)
 

From 07200bbde5fb72f2f846101b92b440724c8c7959 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Mon, 31 Oct 2022 14:10:45 +0100
Subject: [PATCH 404/524] Document ilias web crawler's forums option

---
 CHANGELOG.md | 3 +++
 CONFIG.md    | 1 +
 2 files changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3dd25b8..e5e81d6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Added
+- Missing documentation for `forums` option
+
 ### Changed
 - Clear up error message shown when multiple paths are found to an element
 
diff --git a/CONFIG.md b/CONFIG.md
index 0f114ed..1ca43c4 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -181,6 +181,7 @@ script once per day should be fine.
   redirect to the actual URL. Set to a negative value to disable the automatic
   redirect. (Default: `-1`)
 - `videos`: Whether to download videos. (Default: `no`)
+- `forums`: Whether to download forum threads. (Default: `no`)
 - `http_timeout`: The timeout (in seconds) for all HTTP requests. (Default:
   `20.0`)
 

From e69b55b3496d58bc19d76429ca0078ab10f23074 Mon Sep 17 00:00:00 2001
From: Pavel Zwerschke <pavelzw@gmail.com>
Date: Fri, 4 Nov 2022 12:18:26 +0100
Subject: [PATCH 405/524] Add more unofficial package managers (#66)

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index ce917b0..31a3475 100644
--- a/README.md
+++ b/README.md
@@ -30,7 +30,10 @@ The use of [venv](https://docs.python.org/3/library/venv.html) is recommended.
 
 Unofficial packages are available for:
 - [AUR](https://aur.archlinux.org/packages/pferd)
+- [brew](https://formulae.brew.sh/formula/pferd)
+- [conda-forge](https://github.com/conda-forge/pferd-feedstock)
 - [nixpkgs](https://github.com/NixOS/nixpkgs/blob/master/pkgs/tools/misc/pferd/default.nix)
+- [PyPi](https://pypi.org/project/pferd)
 
 See also PFERD's [repology page](https://repology.org/project/pferd/versions).
 

From 635caa765decd9a747d8b313252fd6b56cea0951 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 15 Nov 2022 17:17:55 +0100
Subject: [PATCH 406/524] Fix typo

Thanks, burg113
---
 CONFIG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CONFIG.md b/CONFIG.md
index 1ca43c4..640e4af 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -290,7 +290,7 @@ path matches `SOURCE`, it is renamed to `TARGET`.
 Example: `foo/bar --> baz`
 - Doesn't match `foo`, `a/foo/bar` or `foo/baz`
 - Converts `foo/bar` into `baz`
-- Converts `foo/bar/wargl` into `bar/wargl`
+- Converts `foo/bar/wargl` into `baz/wargl`
 
 Example: `foo/bar --> !`
 - Doesn't match `foo`, `a/foo/bar` or `foo/baz`

From c0d6d8b22975234b0c9141a22307c8036698566c Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Mon, 21 Nov 2022 17:53:30 +0100
Subject: [PATCH 407/524] Use url after redirect for relative links

---
 CHANGELOG.md                   |  3 +++
 PFERD/crawl/kit_ipd_crawler.py | 27 ++++++++++++---------------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e5e81d6..5bbefd4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -28,6 +28,9 @@ ambiguous situations.
 ### Changed
 - Clear up error message shown when multiple paths are found to an element
 
+### Fixed
+- IPD crawler unnecessarily appending trailing slashes
+
 ## 3.4.2 - 2022-10-26
 
 ### Added
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index 338e059..c852be0 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -2,7 +2,7 @@ import os
 import re
 from dataclasses import dataclass
 from pathlib import PurePath
-from typing import Awaitable, List, Optional, Pattern, Set, Union
+from typing import Awaitable, List, Optional, Pattern, Set, Tuple, Union
 from urllib.parse import urljoin
 
 from bs4 import BeautifulSoup, Tag
@@ -24,9 +24,6 @@ class KitIpdCrawlerSection(HttpCrawlerSection):
         if not target.startswith("https://"):
             self.invalid_value("target", target, "Should be a URL")
 
-        if not target.endswith("/"):
-            target = target + "/"
-
         return target
 
     def link_regex(self) -> Pattern[str]:
@@ -102,32 +99,32 @@ class KitIpdCrawler(HttpCrawler):
             await self._stream_from_url(file.url, sink, bar)
 
     async def _fetch_items(self) -> Set[Union[KitIpdFile, KitIpdFolder]]:
-        page = await self.get_page()
+        page, url = await self.get_page()
         elements: List[Tag] = self._find_file_links(page)
         items: Set[Union[KitIpdFile, KitIpdFolder]] = set()
 
         for element in elements:
             folder_label = self._find_folder_label(element)
             if folder_label:
-                folder = self._extract_folder(folder_label)
+                folder = self._extract_folder(folder_label, url)
                 if folder not in items:
                     items.add(folder)
                     folder.explain()
             else:
-                file = self._extract_file(element)
+                file = self._extract_file(element, url)
                 items.add(file)
                 log.explain_topic(f"Orphan file {file.name!r} (href={file.url!r})")
                 log.explain("Attributing it to root folder")
 
         return items
 
-    def _extract_folder(self, folder_tag: Tag) -> KitIpdFolder:
+    def _extract_folder(self, folder_tag: Tag, url: str) -> KitIpdFolder:
         files: List[KitIpdFile] = []
         name = folder_tag.getText().strip()
 
         container: Tag = folder_tag.findNextSibling(name="table")
         for link in self._find_file_links(container):
-            files.append(self._extract_file(link))
+            files.append(self._extract_file(link, url))
 
         return KitIpdFolder(name, files)
 
@@ -138,16 +135,16 @@ class KitIpdCrawler(HttpCrawler):
             return None
         return enclosing_table.findPreviousSibling(name=re.compile("^h[1-6]$"))
 
-    def _extract_file(self, link: Tag) -> KitIpdFile:
-        url = self._abs_url_from_link(link)
+    def _extract_file(self, link: Tag, url: str) -> KitIpdFile:
+        url = self._abs_url_from_link(url, link)
         name = os.path.basename(url)
         return KitIpdFile(name, url)
 
     def _find_file_links(self, tag: Union[Tag, BeautifulSoup]) -> List[Tag]:
         return tag.findAll(name="a", attrs={"href": self._file_regex})
 
-    def _abs_url_from_link(self, link_tag: Tag) -> str:
-        return urljoin(self._url, link_tag.get("href"))
+    def _abs_url_from_link(self, url: str, link_tag: Tag) -> str:
+        return urljoin(url, link_tag.get("href"))
 
     async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar) -> None:
         async with self.session.get(url, allow_redirects=False) as resp:
@@ -162,7 +159,7 @@ class KitIpdCrawler(HttpCrawler):
 
             sink.done()
 
-    async def get_page(self) -> BeautifulSoup:
+    async def get_page(self) -> Tuple[BeautifulSoup, str]:
         async with self.session.get(self._url) as request:
             # The web page for Algorithmen für Routenplanung contains some
             # weird comments that beautifulsoup doesn't parse correctly. This
@@ -170,4 +167,4 @@ class KitIpdCrawler(HttpCrawler):
             # cause issues on other pages.
             content = (await request.read()).decode("utf-8")
             content = re.sub(r"<!--.*?-->", "", content)
-            return soupify(content.encode("utf-8"))
+            return soupify(content.encode("utf-8")), str(request.url)

From 55a2de6b88bbd2ee0cb031271e7045f53caa1702 Mon Sep 17 00:00:00 2001
From: c0derMo <jaydeveloper@outlook.de>
Date: Fri, 25 Nov 2022 10:25:22 +0000
Subject: [PATCH 408/524] Fix crawling English opencast

---
 CHANGELOG.md                        | 1 +
 PFERD/crawl/ilias/kit_ilias_html.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5bbefd4..1dc5abc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,6 +30,7 @@ ambiguous situations.
 
 ### Fixed
 - IPD crawler unnecessarily appending trailing slashes
+- Crawling opencast when ILIAS is set to English
 
 ## 3.4.2 - 2022-10-26
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 56dcf7b..c0ebdc9 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -366,7 +366,7 @@ class IliasPage:
         """
         # Video start links are marked with an "Abspielen" link
         video_links: List[Tag] = self._soup.findAll(
-            name="a", text=re.compile(r"\s*Abspielen\s*")
+            name="a", text=re.compile(r"\s*(Abspielen|Play)\s*")
         )
 
         results: List[IliasPageElement] = []

From 6d44aac2783c69031e7686263fc0a2285912376f Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 29 Nov 2022 18:22:19 +0100
Subject: [PATCH 409/524] Bump version to 3.4.3

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1dc5abc..8793d43 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.4.3 - 2022-11-29
+
 ### Added
 - Missing documentation for `forums` option
 
diff --git a/PFERD/version.py b/PFERD/version.py
index 0ef5d89..7043d78 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.4.2"
+VERSION = "3.4.3"

From 722d2eb393913e770aff17da6b5b3b6603d1ee67 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Fri, 25 Nov 2022 12:49:36 +0100
Subject: [PATCH 410/524] Fix crawling of courses with preselected timeline tab

---
 CHANGELOG.md                        |  3 +++
 PFERD/crawl/ilias/kit_ilias_html.py | 23 +++++++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8793d43..b1d18cd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Fixed
+- Crawling of courses with the timeline view as the default tab
+
 ## 3.4.3 - 2022-11-29
 
 ### Added
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index c0ebdc9..44e44d9 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -158,6 +158,8 @@ class IliasPage:
         if self._contains_collapsed_future_meetings():
             log.explain("Requesting *all* future meetings")
             return self._uncollapse_future_meetings_url()
+        if not self._is_content_tab_selected():
+            return self._select_content_page_url()
         return None
 
     def _is_forum_page(self) -> bool:
@@ -220,6 +222,27 @@ class IliasPage:
         link = self._abs_url_from_link(element)
         return IliasPageElement(IliasElementType.FOLDER, link, "show all meetings")
 
+    def _is_content_tab_selected(self) -> bool:
+        return self._select_content_page_url() is None
+
+    def _select_content_page_url(self) -> Optional[IliasPageElement]:
+        tab = self._soup.find(
+            id="tab_view_content",
+            attrs={"class": lambda x: x is not None and "active" not in x}
+        )
+        # Already selected (or not found)
+        if not tab:
+            return None
+        link = tab.find("a")
+        if link:
+            link = self._abs_url_from_link(link)
+            return IliasPageElement(IliasElementType.FOLDER, link, "select content page")
+
+        _unexpected_html_warning()
+        log.warn_contd(f"Could not find content tab URL on {self._page_url!r}.")
+        log.warn_contd("PFERD might not find content on the course's main page.")
+        return None
+
     def _player_to_video(self) -> List[IliasPageElement]:
         # Fetch the actual video page. This is a small wrapper page initializing a javscript
         # player. Sadly we can not execute that JS. The actual video stream url is nowhere

From 467fc526e8411d4a5113dbb78747aa119981c476 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 21 Mar 2023 23:52:24 +0100
Subject: [PATCH 411/524] Fix crawling of file/video cards

---
 CHANGELOG.md                        | 1 +
 PFERD/crawl/ilias/kit_ilias_html.py | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b1d18cd..c27059b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ ambiguous situations.
 
 ### Fixed
 - Crawling of courses with the timeline view as the default tab
+- Crawling of file and custom opencast cards
 
 ## 3.4.3 - 2022-11-29
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 44e44d9..079cfd6 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -738,7 +738,7 @@ class IliasPage:
 
         icon: Tag = card_root.select_one(".il-card-repository-head .icon")
 
-        if "opencast" in icon["class"]:
+        if "opencast" in icon["class"] or "xoct" in icon["class"]:
             return IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED
         if "exc" in icon["class"]:
             return IliasElementType.EXERCISE
@@ -758,6 +758,8 @@ class IliasPage:
             return IliasElementType.FOLDER
         if "svy" in icon["class"]:
             return IliasElementType.SURVEY
+        if "file" in icon["class"]:
+            return IliasElementType.FILE
 
         _unexpected_html_warning()
         log.warn_contd(f"Could not extract type from {icon} for card title {card_title}")

From 6f30c6583d6512c92042c581e86027a4341ddc89 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 21 Mar 2023 23:52:33 +0100
Subject: [PATCH 412/524] Fix crawling of cards without descriptions

---
 CHANGELOG.md                        | 1 +
 PFERD/crawl/ilias/kit_ilias_html.py | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c27059b..7a5f654 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ ambiguous situations.
 ### Fixed
 - Crawling of courses with the timeline view as the default tab
 - Crawling of file and custom opencast cards
+- Crawling of button cards without descriptions
 
 ## 3.4.3 - 2022-11-29
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 079cfd6..efe6757 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -708,7 +708,11 @@ class IliasPage:
                 "div",
                 attrs={"class": lambda x: x and "caption" in x},
             )
-            description = caption_parent.find_next_sibling("div").getText().strip()
+            caption_container = caption_parent.find_next_sibling("div")
+            if caption_container:
+                description = caption_container.getText().strip()
+            else:
+                description = None
 
             if not type:
                 _unexpected_html_warning()

From 0294ceb7d5ff074dcc2566872d6b5f64f99c598f Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 22 Mar 2023 00:08:19 +0100
Subject: [PATCH 413/524] Update github action versions

---
 .github/workflows/build-and-release.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/build-and-release.yml b/.github/workflows/build-and-release.yml
index 090ac7e..83a36e4 100644
--- a/.github/workflows/build-and-release.yml
+++ b/.github/workflows/build-and-release.yml
@@ -17,9 +17,9 @@ jobs:
         python: ["3.9"]
     steps:
 
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
 
-      - uses: actions/setup-python@v2
+      - uses: actions/setup-python@v4
         with:
           python-version: ${{ matrix.python }}
 
@@ -45,7 +45,7 @@ jobs:
         run: mv dist/pferd* dist/pferd-${{ matrix.os }}
 
       - name: Upload binary
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v3
         with:
           name: Binaries
           path: dist/pferd-${{ matrix.os }}
@@ -57,7 +57,7 @@ jobs:
     steps:
 
       - name: Download binaries
-        uses: actions/download-artifact@v2
+        uses: actions/download-artifact@v3
         with:
           name: Binaries
 

From 443f7fe83913bcb82a42d7b70d4d05df65f05278 Mon Sep 17 00:00:00 2001
From: "Mr. Pine" <git@mr-pine.de>
Date: Sat, 29 Jul 2023 17:54:42 +0200
Subject: [PATCH 414/524] Add `no-delete-prompt-overwrite` crawler conflict
 resolution option (#75)

---
 CHANGELOG.md        |  3 +++
 CONFIG.md           |  2 ++
 LICENSE             |  3 ++-
 PFERD/output_dir.py | 11 ++++++-----
 4 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7a5f654..22522e2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,9 @@ ambiguous situations.
 - Crawling of file and custom opencast cards
 - Crawling of button cards without descriptions
 
+### Added
+- `no-delete-prompt-override` conflict resolution strategy
+
 ## 3.4.3 - 2022-11-29
 
 ### Added
diff --git a/CONFIG.md b/CONFIG.md
index 640e4af..84ee885 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -75,6 +75,8 @@ common to all crawlers:
       using `prompt` and always choosing "yes".
     - `no-delete`: Never delete local files, but overwrite local files if the
       remote file is different.
+    - `no-delete-prompt-overwrite`: Never delete local files, but prompt to overwrite local files if the
+      remote file is different.
 - `transform`: Rules for renaming and excluding certain files and directories.
   For more details, see [this section](#transformation-rules). (Default: empty)
 - `tasks`: The maximum number of concurrent tasks (such as crawling or
diff --git a/LICENSE b/LICENSE
index fe2293f..d81e827 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,5 +1,6 @@
 Copyright 2019-2021 Garmelon, I-Al-Istannen, danstooamerican, pavelzw,
-                    TheChristophe, Scriptim, thelukasprobst, Toorero
+                    TheChristophe, Scriptim, thelukasprobst, Toorero,
+                    Mr-Pine
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index c92f4a6..38d1288 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -44,6 +44,7 @@ class OnConflict(Enum):
     LOCAL_FIRST = "local-first"
     REMOTE_FIRST = "remote-first"
     NO_DELETE = "no-delete"
+    NO_DELETE_PROMPT_OVERWRITE = "no-delete-prompt-overwrite"
 
     @staticmethod
     def from_string(string: str) -> "OnConflict":
@@ -51,7 +52,7 @@ class OnConflict(Enum):
             return OnConflict(string)
         except ValueError:
             raise ValueError("must be one of 'prompt', 'local-first',"
-                             " 'remote-first', 'no-delete'")
+                             " 'remote-first', 'no-delete', 'no-delete-prompt-overwrite'")
 
 
 @dataclass
@@ -264,7 +265,7 @@ class OutputDirectory:
             on_conflict: OnConflict,
             path: PurePath,
     ) -> bool:
-        if on_conflict == OnConflict.PROMPT:
+        if on_conflict in {OnConflict.PROMPT, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
             async with log.exclusive_output():
                 prompt = f"Replace {fmt_path(path)} with remote file?"
                 return await prompt_yes_no(prompt, default=False)
@@ -283,7 +284,7 @@ class OutputDirectory:
             on_conflict: OnConflict,
             path: PurePath,
     ) -> bool:
-        if on_conflict == OnConflict.PROMPT:
+        if on_conflict in {OnConflict.PROMPT, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
             async with log.exclusive_output():
                 prompt = f"Recursively delete {fmt_path(path)} and replace with remote file?"
                 return await prompt_yes_no(prompt, default=False)
@@ -303,7 +304,7 @@ class OutputDirectory:
             path: PurePath,
             parent: PurePath,
     ) -> bool:
-        if on_conflict == OnConflict.PROMPT:
+        if on_conflict in {OnConflict.PROMPT, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
             async with log.exclusive_output():
                 prompt = f"Delete {fmt_path(parent)} so remote file {fmt_path(path)} can be downloaded?"
                 return await prompt_yes_no(prompt, default=False)
@@ -330,7 +331,7 @@ class OutputDirectory:
             return False
         elif on_conflict == OnConflict.REMOTE_FIRST:
             return True
-        elif on_conflict == OnConflict.NO_DELETE:
+        elif on_conflict in {OnConflict.NO_DELETE, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
             return False
 
         # This should never be reached

From d204dac8ced63534ca2b4596e9a63c880b2077a3 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Fri, 2 Jun 2023 18:19:39 +0200
Subject: [PATCH 415/524] Detect unexpected root page redirects and abort
 operation

---
 CHANGELOG.md                               |  1 +
 PFERD/crawl/ilias/kit_ilias_html.py        | 10 ++++++++++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 20 ++++++++++++++++----
 3 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 22522e2..ee55659 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -26,6 +26,7 @@ ambiguous situations.
 - Crawling of courses with the timeline view as the default tab
 - Crawling of file and custom opencast cards
 - Crawling of button cards without descriptions
+- Abort crawling when encountering an unexpected ilias root page redirect
 
 ### Added
 - `no-delete-prompt-override` conflict resolution strategy
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index efe6757..aed2069 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -79,6 +79,16 @@ class IliasPage:
         self._page_type = source_element.type if source_element else None
         self._source_name = source_element.name if source_element else ""
 
+    @staticmethod
+    def is_root_page(soup: BeautifulSoup) -> bool:
+        permalink = soup.find(id="current_perma_link")
+        if permalink is None:
+            return False
+        value = permalink.attrs.get("value")
+        if value is None:
+            return False
+        return "goto.php?target=root_" in value
+
     def get_child_elements(self) -> List[IliasPageElement]:
         """
         Return all child page elements you can find here.
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index e3719b8..ae49edc 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -239,7 +239,7 @@ instance's greatest bottleneck.
 
                 # Duplicated code, but the root page is special - we want to avoid fetching it twice!
                 while next_stage_url:
-                    soup = await self._get_page(next_stage_url)
+                    soup = await self._get_page(next_stage_url, root_page_allowed=True)
 
                     if current_parent is None and expected_id is not None:
                         perma_link_element: Tag = soup.find(id="current_perma_link")
@@ -739,12 +739,12 @@ instance's greatest bottleneck.
             sink.file.write(content.encode("utf-8"))
             sink.done()
 
-    async def _get_page(self, url: str) -> BeautifulSoup:
+    async def _get_page(self, url: str, root_page_allowed: bool = False) -> BeautifulSoup:
         auth_id = await self._current_auth_id()
         async with self.session.get(url) as request:
             soup = soupify(await request.read())
             if self._is_logged_in(soup):
-                return soup
+                return self._verify_page(soup, url, root_page_allowed)
 
         # We weren't authenticated, so try to do that
         await self.authenticate(auth_id)
@@ -753,9 +753,21 @@ instance's greatest bottleneck.
         async with self.session.get(url) as request:
             soup = soupify(await request.read())
             if self._is_logged_in(soup):
-                return soup
+                return self._verify_page(soup, url, root_page_allowed)
         raise CrawlError("get_page failed even after authenticating")
 
+    def _verify_page(self, soup: BeautifulSoup, url: str, root_page_allowed: bool) -> BeautifulSoup:
+        if IliasPage.is_root_page(soup) and not root_page_allowed:
+            raise CrawlError(
+                "Unexpectedly encountered ILIAS root page. "
+                "This usually happens because the ILIAS instance is broken. "
+                "If so, wait a day or two and try again. "
+                "It could also happen because a crawled element links to the ILIAS root page. "
+                "If so, use a transform with a ! as target to ignore the particular element. "
+                f"The redirect came from {url}"
+            )
+        return soup
+
     async def _post_authenticated(
         self,
         url: str,

From 123a57beec37090310f76df3746e6ce107ceb299 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 29 Jul 2023 18:14:57 +0200
Subject: [PATCH 416/524] Fix mypy unreachable error in file_templates

---
 PFERD/crawl/ilias/file_templates.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index 151a41b..59123a2 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -102,24 +102,24 @@ class Links(Enum):
     INTERNET_SHORTCUT = "internet-shortcut"
 
     def template(self) -> Optional[str]:
-        if self == self.FANCY:
+        if self == Links.FANCY:
             return _link_template_fancy
-        elif self == self.PLAINTEXT:
+        elif self == Links.PLAINTEXT:
             return _link_template_plain
-        elif self == self.INTERNET_SHORTCUT:
+        elif self == Links.INTERNET_SHORTCUT:
             return _link_template_internet_shortcut
-        elif self == self.IGNORE:
+        elif self == Links.IGNORE:
             return None
         raise ValueError("Missing switch case")
 
     def extension(self) -> Optional[str]:
-        if self == self.FANCY:
+        if self == Links.FANCY:
             return ".html"
-        elif self == self.PLAINTEXT:
+        elif self == Links.PLAINTEXT:
             return ".txt"
-        elif self == self.INTERNET_SHORTCUT:
+        elif self == Links.INTERNET_SHORTCUT:
             return ".url"
-        elif self == self.IGNORE:
+        elif self == Links.IGNORE:
             return None
         raise ValueError("Missing switch case")
 

From 68c398f1fea5cfefd86d11e79f2f6582d50e6563 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 29 Jul 2023 23:23:10 +0200
Subject: [PATCH 417/524] Add support for ILIAS learning modules

---
 CHANGELOG.md                               |   1 +
 PFERD/crawl/ilias/file_templates.py        |  69 +++++++++
 PFERD/crawl/ilias/ilias_html_cleaner.py    |   2 +-
 PFERD/crawl/ilias/kit_ilias_html.py        |  46 ++++++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 160 ++++++++++++++++++++-
 5 files changed, 272 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ee55659..6e3925c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,6 +30,7 @@ ambiguous situations.
 
 ### Added
 - `no-delete-prompt-override` conflict resolution strategy
+- support for ILIAS learning modules
 
 ## 3.4.3 - 2022-11-29
 
diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index 59123a2..b206461 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -1,6 +1,10 @@
 from enum import Enum
 from typing import Optional
 
+import bs4
+
+from PFERD.utils import soupify
+
 _link_template_plain = "{{link}}"
 _link_template_fancy = """
 <!DOCTYPE html>
@@ -94,6 +98,71 @@ _link_template_internet_shortcut = """
 URL={{link}}
 """.strip()
 
+_learning_module_template = """
+<!DOCTYPE html>
+<html lang="en">
+    <head>
+        <meta charset="UTF-8">
+        <title>{{name}}</title>
+    </head>
+
+    <style>
+    * {
+        box-sizing: border-box;
+    }
+    .center-flex {
+        display: flex;
+        align-items: center;
+        justify-content: center;
+    }
+    .nav {
+        display: flex;
+        justify-content: space-between;
+    }
+    </style>
+    <body class="center-flex">
+{{body}}
+    </body>
+</html>
+"""
+
+
+def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next: Optional[str]) -> str:
+    # Seems to be comments, ignore those.
+    for elem in body.select(".il-copg-mob-fullscreen-modal"):
+        elem.decompose()
+
+    nav_template = """
+        <div class="nav">
+            {{left}}
+            {{right}}
+        </div>
+    """
+    if prev and body.select_one(".ilc_page_lnav_LeftNavigation"):
+        text = body.select_one(".ilc_page_lnav_LeftNavigation").getText().strip()
+        left = f'<a href="{prev}">{text}</a>'
+    else:
+        left = "<span></span>"
+
+    if next and body.select_one(".ilc_page_rnav_RightNavigation"):
+        text = body.select_one(".ilc_page_rnav_RightNavigation").getText().strip()
+        right = f'<a href="{next}">{text}</a>'
+    else:
+        right = "<span></span>"
+
+    if top_nav := body.select_one(".ilc_page_tnav_TopNavigation"):
+        top_nav.replace_with(
+            soupify(nav_template.replace("{{left}}", left).replace("{{right}}", right).encode())
+        )
+
+    if bot_nav := body.select_one(".ilc_page_bnav_BottomNavigation"):
+        bot_nav.replace_with(soupify(nav_template.replace(
+            "{{left}}", left).replace("{{right}}", right).encode())
+        )
+
+    body = body.prettify()
+    return _learning_module_template.replace("{{body}}", body).replace("{{name}}", name)
+
 
 class Links(Enum):
     IGNORE = "ignore"
diff --git a/PFERD/crawl/ilias/ilias_html_cleaner.py b/PFERD/crawl/ilias/ilias_html_cleaner.py
index 5952309..5495304 100644
--- a/PFERD/crawl/ilias/ilias_html_cleaner.py
+++ b/PFERD/crawl/ilias/ilias_html_cleaner.py
@@ -82,7 +82,7 @@ def clean(soup: BeautifulSoup) -> BeautifulSoup:
             dummy.decompose()
         if len(children) > 1:
             continue
-        if type(children[0]) == Comment:
+        if isinstance(type(children[0]), Comment):
             dummy.decompose()
 
     for hrule_imposter in soup.find_all(class_="ilc_section_Separator"):
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index aed2069..46a8073 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -22,6 +22,7 @@ class IliasElementType(Enum):
     FOLDER = "folder"
     FORUM = "forum"
     LINK = "link"
+    LEARNING_MODULE = "learning_module"
     BOOKING = "booking"
     MEETING = "meeting"
     SURVEY = "survey"
@@ -71,6 +72,14 @@ class IliasForumThread:
     mtime: Optional[datetime]
 
 
+@dataclass
+class IliasLearningModulePage:
+    title: str
+    content: Tag
+    next_url: Optional[str]
+    previous_url: Optional[str]
+
+
 class IliasPage:
 
     def __init__(self, soup: BeautifulSoup, _page_url: str, source_element: Optional[IliasPageElement]):
@@ -136,6 +145,34 @@ class IliasPage:
 
         return BeautifulSoup(raw_html, "html.parser")
 
+    def get_learning_module_data(self) -> Optional[IliasLearningModulePage]:
+        if not self._is_learning_module_page():
+            return None
+        content = self._soup.select_one("#ilLMPageContent")
+        title = self._soup.select_one(".ilc_page_title_PageTitle").getText().strip()
+        return IliasLearningModulePage(
+            title=title,
+            content=content,
+            next_url=self._find_learning_module_next(),
+            previous_url=self._find_learning_module_prev()
+        )
+
+    def _find_learning_module_next(self) -> Optional[str]:
+        for link in self._soup.select("a.ilc_page_rnavlink_RightNavigationLink"):
+            url = self._abs_url_from_link(link)
+            if "baseClass=ilLMPresentationGUI" not in url:
+                continue
+            return url
+        return None
+
+    def _find_learning_module_prev(self) -> Optional[str]:
+        for link in self._soup.select("a.ilc_page_lnavlink_LeftNavigationLink"):
+            url = self._abs_url_from_link(link)
+            if "baseClass=ilLMPresentationGUI" not in url:
+                continue
+            return url
+        return None
+
     def get_download_forum_data(self) -> Optional[IliasDownloadForumData]:
         form = self._soup.find("form", attrs={"action": lambda x: x and "fallbackCmd=showThreads" in x})
         if not form:
@@ -222,6 +259,12 @@ class IliasPage:
             return False
         return "target=copa_" in link.get("value")
 
+    def _is_learning_module_page(self) -> bool:
+        link = self._soup.find(id="current_perma_link")
+        if not link:
+            return False
+        return "target=pg_" in link.get("value")
+
     def _contains_collapsed_future_meetings(self) -> bool:
         return self._uncollapse_future_meetings_url() is not None
 
@@ -812,6 +855,9 @@ class IliasPage:
         if "cmdClass=ilobjtestgui" in parsed_url.query:
             return IliasElementType.TEST
 
+        if "baseClass=ilLMPresentationGUI" in parsed_url.query:
+            return IliasElementType.LEARNING_MODULE
+
         # Booking and Meeting can not be detected based on the link. They do have a ref_id though, so
         # try to guess it from the image.
 
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index ae49edc..f82d684 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -1,8 +1,11 @@
 import asyncio
+import base64
+import os
 import re
 from collections.abc import Awaitable, Coroutine
 from pathlib import PurePath
-from typing import Any, Callable, Dict, List, Optional, Set, Union, cast
+from typing import Any, Callable, Dict, List, Literal, Optional, Set, Union, cast
+from urllib.parse import urljoin
 
 import aiohttp
 import yarl
@@ -16,10 +19,10 @@ from ...output_dir import FileSink, Redownload
 from ...utils import fmt_path, soupify, url_set_query_param
 from ..crawler import AWrapped, CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
 from ..http_crawler import HttpCrawler, HttpCrawlerSection
-from .file_templates import Links
+from .file_templates import Links, learning_module_template
 from .ilias_html_cleaner import clean, insert_base_markup
-from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasPage, IliasPageElement,
-                             _sanitize_path_name, parse_ilias_forum_export)
+from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
+                             IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
 
 TargetType = Union[str, int]
 
@@ -394,6 +397,8 @@ instance's greatest bottleneck.
                 "[bright_black](surveys contain no relevant data)"
             )
             return None
+        elif element.type == IliasElementType.LEARNING_MODULE:
+            return await self._handle_learning_module(element, element_path)
         elif element.type == IliasElementType.LINK:
             return await self._handle_link(element, element_path)
         elif element.type == IliasElementType.BOOKING:
@@ -739,6 +744,135 @@ instance's greatest bottleneck.
             sink.file.write(content.encode("utf-8"))
             sink.done()
 
+    async def _handle_learning_module(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Coroutine[Any, Any, None]]:
+        maybe_cl = await self.crawl(element_path)
+        if not maybe_cl:
+            return None
+        return self._crawl_learning_module(element, maybe_cl)
+
+    @_iorepeat(3, "crawling learning module")
+    @anoncritical
+    async def _crawl_learning_module(self, element: IliasPageElement, cl: CrawlToken) -> None:
+        elements: List[IliasLearningModulePage] = []
+
+        async with cl:
+            log.explain_topic(f"Parsing initial HTML page for {fmt_path(cl.path)}")
+            log.explain(f"URL: {element.url}")
+            soup = await self._get_page(element.url)
+            page = IliasPage(soup, element.url, None)
+            if next := page.get_learning_module_data():
+                elements.extend(await self._crawl_learning_module_direction(
+                    cl.path, next.previous_url, "left"
+                ))
+                elements.append(next)
+                elements.extend(await self._crawl_learning_module_direction(
+                    cl.path, next.next_url, "right"
+                ))
+
+        # Reflect their natural ordering in the file names
+        for index, lm_element in enumerate(elements):
+            lm_element.title = f"{index:02}_{lm_element.title}"
+
+        tasks: List[Awaitable[None]] = []
+        for index, elem in enumerate(elements):
+            prev_url = elements[index - 1].title if index > 0 else None
+            next_url = elements[index + 1].title if index < len(elements) - 1 else None
+            tasks.append(asyncio.create_task(
+                self._download_learning_module_page(cl.path, elem, prev_url, next_url)
+            ))
+
+        # And execute them
+        await self.gather(tasks)
+
+    async def _crawl_learning_module_direction(
+        self,
+        path: PurePath,
+        start_url: Optional[str],
+        dir: Union[Literal["left"], Literal["right"]]
+    ) -> List[IliasLearningModulePage]:
+        elements: List[IliasLearningModulePage] = []
+
+        if not start_url:
+            return elements
+
+        next_element_url: Optional[str] = start_url
+        counter = 0
+        while next_element_url:
+            log.explain_topic(f"Parsing HTML page for {fmt_path(path)} ({dir}-{counter})")
+            log.explain(f"URL: {next_element_url}")
+            soup = await self._get_page(next_element_url)
+            page = IliasPage(soup, next_element_url, None)
+            if next := page.get_learning_module_data():
+                elements.append(next)
+                if dir == "left":
+                    next_element_url = next.previous_url
+                else:
+                    next_element_url = next.next_url
+            counter += 1
+
+        return elements
+
+    @anoncritical
+    @_iorepeat(3, "saving learning module page")
+    async def _download_learning_module_page(
+        self,
+        parent_path: PurePath,
+        element: IliasLearningModulePage,
+        prev: Optional[str],
+        next: Optional[str]
+    ) -> None:
+        path = parent_path / (_sanitize_path_name(element.title) + ".html")
+        maybe_dl = await self.download(path)
+        if not maybe_dl:
+            return
+        my_path = self._transformer.transform(maybe_dl.path)
+        if not my_path:
+            return
+
+        if prev:
+            prev_p = self._transformer.transform(parent_path / (_sanitize_path_name(prev) + ".html"))
+            if prev_p:
+                prev = os.path.relpath(prev_p, my_path.parent)
+            else:
+                prev = None
+        if next:
+            next_p = self._transformer.transform(parent_path / (_sanitize_path_name(next) + ".html"))
+            if next_p:
+                next = os.path.relpath(next_p, my_path.parent)
+            else:
+                next = None
+
+        async with maybe_dl as (bar, sink):
+            content = element.content
+            content = await self.internalize_images(content)
+            sink.file.write(learning_module_template(content, maybe_dl.path.name, prev, next).encode("utf-8"))
+            sink.done()
+
+    async def internalize_images(self, tag: Tag) -> Tag:
+        """
+        Tries to fetch ILIAS images and embed them as base64 data.
+        """
+        log.explain_topic("Internalizing images")
+        for elem in tag.find_all(recursive=True):
+            if not isinstance(elem, Tag):
+                continue
+            if elem.name == "img":
+                if src := elem.attrs.get("src", None):
+                    url = urljoin(_ILIAS_URL, src)
+                    if not url.startswith(_ILIAS_URL):
+                        continue
+                    log.explain(f"Internalizing {url!r}")
+                    img = await self._get_authenticated(url)
+                    elem.attrs["src"] = "data:;base64," + base64.b64encode(img).decode()
+            if elem.name == "iframe" and elem.attrs.get("src", "").startswith("//"):
+                # For unknown reasons the protocol seems to be stripped.
+                elem.attrs["src"] = "https:" + elem.attrs["src"]
+        return tag
+
     async def _get_page(self, url: str, root_page_allowed: bool = False) -> BeautifulSoup:
         auth_id = await self._current_auth_id()
         async with self.session.get(url) as request:
@@ -772,7 +906,7 @@ instance's greatest bottleneck.
         self,
         url: str,
         data: dict[str, Union[str, List[str]]]
-    ) -> BeautifulSoup:
+    ) -> bytes:
         auth_id = await self._current_auth_id()
 
         form_data = aiohttp.FormData()
@@ -792,6 +926,22 @@ instance's greatest bottleneck.
                 return await request.read()
         raise CrawlError("post_authenticated failed even after authenticating")
 
+    async def _get_authenticated(self, url: str) -> bytes:
+        auth_id = await self._current_auth_id()
+
+        async with self.session.get(url, allow_redirects=False) as request:
+            if request.status == 200:
+                return await request.read()
+
+        # We weren't authenticated, so try to do that
+        await self.authenticate(auth_id)
+
+        # Retry once after authenticating. If this fails, we will die.
+        async with self.session.get(url, allow_redirects=False) as request:
+            if request.status == 200:
+                return await request.read()
+        raise CrawlError("get_authenticated failed even after authenticating")
+
     # We repeat this as the login method in shibboleth doesn't handle I/O errors.
     # Shibboleth is quite reliable as well, the repeat is likely not critical here.
     @ _iorepeat(3, "Login", failure_is_error=True)

From dbc2553b119c39c7a8ad196c6858fc8109f746a9 Mon Sep 17 00:00:00 2001
From: "Mr. Pine" <50425705+Mr-Pine@users.noreply.github.com>
Date: Wed, 15 Mar 2023 15:33:42 +0100
Subject: [PATCH 418/524] Add default `show-not-deleted` option If set to `no`,
 PFERD won't print status or report messages for not deleted files

---
 CHANGELOG.md        |  3 +++
 CONFIG.md           |  8 ++++++--
 PFERD/__main__.py   |  4 ++++
 PFERD/cli/parser.py |  7 +++++++
 PFERD/config.py     |  3 +++
 PFERD/logging.py    | 20 ++++++++++++++++++++
 PFERD/output_dir.py |  2 +-
 PFERD/pferd.py      |  2 +-
 8 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6e3925c..85513d2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -31,6 +31,9 @@ ambiguous situations.
 ### Added
 - `no-delete-prompt-override` conflict resolution strategy
 - support for ILIAS learning modules
+- `show_not_deleted` option to stop printing the "Not Deleted" status or report
+  message. This combines nicely with the `no-delete-prompt-override` strategy,
+  causing PFERD to mostly ignore local-only files.
 
 ## 3.4.3 - 2022-11-29
 
diff --git a/CONFIG.md b/CONFIG.md
index 84ee885..5f62749 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -26,6 +26,9 @@ default values for the other sections.
   `Added ...`) while running a crawler. (Default: `yes`)
 - `report`: Whether PFERD should print a report of added, changed and deleted
    local files for all crawlers before exiting. (Default: `yes`)
+- `show_not_deleted`: Whether PFERD should print messages in status and report
+   when a local-only file wasn't deleted. Combines nicely with the
+   `no-delete-prompt-override` conflict resolution strategy.
 - `share_cookies`: Whether crawlers should share cookies where applicable. For
   example, some crawlers share cookies if they crawl the same website using the
   same account. (Default: `yes`)
@@ -75,8 +78,9 @@ common to all crawlers:
       using `prompt` and always choosing "yes".
     - `no-delete`: Never delete local files, but overwrite local files if the
       remote file is different.
-    - `no-delete-prompt-overwrite`: Never delete local files, but prompt to overwrite local files if the
-      remote file is different.
+    - `no-delete-prompt-overwrite`: Never delete local files, but prompt to
+      overwrite local files if the remote file is different. Combines nicely
+      with the `show_not_deleted` option.
 - `transform`: Rules for renaming and excluding certain files and directories.
   For more details, see [this section](#transformation-rules). (Default: empty)
 - `tasks`: The maximum number of concurrent tasks (such as crawling or
diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index 4faeb13..cb8c67c 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -47,6 +47,8 @@ def configure_logging_from_args(args: argparse.Namespace) -> None:
         log.output_explain = args.explain
     if args.status is not None:
         log.output_status = args.status
+    if args.show_not_deleted is not None:
+        log.output_not_deleted = args.show_not_deleted
     if args.report is not None:
         log.output_report = args.report
 
@@ -72,6 +74,8 @@ def configure_logging_from_config(args: argparse.Namespace, config: Config) -> N
             log.output_status = config.default_section.status()
         if args.report is None:
             log.output_report = config.default_section.report()
+        if args.show_not_deleted is None:
+            log.output_not_deleted = config.default_section.show_not_deleted()
     except ConfigOptionError as e:
         log.error(str(e))
         sys.exit(1)
diff --git a/PFERD/cli/parser.py b/PFERD/cli/parser.py
index e753023..be483fd 100644
--- a/PFERD/cli/parser.py
+++ b/PFERD/cli/parser.py
@@ -215,6 +215,11 @@ PARSER.add_argument(
     action=BooleanOptionalAction,
     help="whether crawlers should share cookies where applicable"
 )
+PARSER.add_argument(
+    "--show-not-deleted",
+    action=BooleanOptionalAction,
+    help="print messages in status and report when PFERD did not delete a local only file"
+)
 
 
 def load_default_section(
@@ -233,6 +238,8 @@ def load_default_section(
         section["report"] = "yes" if args.report else "no"
     if args.share_cookies is not None:
         section["share_cookies"] = "yes" if args.share_cookies else "no"
+    if args.show_not_deleted is not None:
+        section["show_not_deleted"] = "yes" if args.show_not_deleted else "no"
 
 
 SUBPARSERS = PARSER.add_subparsers(title="crawlers")
diff --git a/PFERD/config.py b/PFERD/config.py
index 8f7e682..b2cff4e 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -82,6 +82,9 @@ class DefaultSection(Section):
     def report(self) -> bool:
         return self.s.getboolean("report", fallback=True)
 
+    def show_not_deleted(self) -> bool:
+        return self.s.getboolean("show_not_deleted", fallback=True)
+
     def share_cookies(self) -> bool:
         return self.s.getboolean("share_cookies", fallback=True)
 
diff --git a/PFERD/logging.py b/PFERD/logging.py
index 340b21f..b958fb2 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -59,6 +59,7 @@ class Log:
         # Whether different parts of the output are enabled or disabled
         self.output_explain = False
         self.output_status = True
+        self.output_not_deleted = True
         self.output_report = True
 
     def _update_live(self) -> None:
@@ -207,6 +208,17 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
             action = escape(f"{action:<{self.STATUS_WIDTH}}")
             self.print(f"{style}{action}[/] {escape(text)} {suffix}")
 
+    def not_deleted(self, style: str, action: str, text: str, suffix: str = "") -> None:
+        """
+        Print a message for a local only file that wasn't
+        deleted while crawling. Allows markup in the "style"
+        argument which will be applied to the "action" string.
+        """
+
+        if self.output_status and self.output_not_deleted:
+            action = escape(f"{action:<{self.STATUS_WIDTH}}")
+            self.print(f"{style}{action}[/] {escape(text)} {suffix}")
+
     def report(self, text: str) -> None:
         """
         Print a report after crawling. Allows markup.
@@ -215,6 +227,14 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
         if self.output_report:
             self.print(text)
 
+    def report_not_deleted(self, text: str) -> None:
+        """
+        Print a report for a local only file that wasn't deleted after crawling. Allows markup.
+        """
+
+        if self.output_report and self.output_not_deleted:
+            self.print(text)
+
     @contextmanager
     def _bar(
             self,
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 38d1288..e9e9b93 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -496,7 +496,7 @@ class OutputDirectory:
             except OSError:
                 pass
         else:
-            log.status("[bold bright_magenta]", "Not deleted", fmt_path(pure))
+            log.not_deleted("[bold bright_magenta]", "Not deleted", fmt_path(pure))
             self._report.not_delete_file(pure)
 
     def load_prev_report(self) -> None:
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index 079053b..b30a04a 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -180,7 +180,7 @@ class Pferd:
                 log.report(f"  [bold bright_magenta]Deleted[/] {fmt_path(path)}")
             for path in sorted(crawler.report.not_deleted_files):
                 something_changed = True
-                log.report(f"  [bold bright_magenta]Not deleted[/] {fmt_path(path)}")
+                log.report_not_deleted(f"  [bold bright_magenta]Not deleted[/] {fmt_path(path)}")
 
             for warning in crawler.report.encountered_warnings:
                 something_changed = True

From b3d412360baeed6992535e6957d0bc1e368c337f Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 26 Aug 2023 23:48:14 +0200
Subject: [PATCH 419/524] Add Nix flake

---
 flake.lock | 27 +++++++++++++++++++++++++++
 flake.nix  | 41 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+)
 create mode 100644 flake.lock
 create mode 100644 flake.nix

diff --git a/flake.lock b/flake.lock
new file mode 100644
index 0000000..914c58b
--- /dev/null
+++ b/flake.lock
@@ -0,0 +1,27 @@
+{
+  "nodes": {
+    "nixpkgs": {
+      "locked": {
+        "lastModified": 1692986144,
+        "narHash": "sha256-M4VFpy7Av9j+33HF5nIGm0k2+DXXW4qSSKdidIKg5jY=",
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "74e5bdc5478ebbe7ba5849f0d765f92757bb9dbf",
+        "type": "github"
+      },
+      "original": {
+        "owner": "NixOS",
+        "ref": "nixos-23.05",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "root": {
+      "inputs": {
+        "nixpkgs": "nixpkgs"
+      }
+    }
+  },
+  "root": "root",
+  "version": 7
+}
diff --git a/flake.nix b/flake.nix
new file mode 100644
index 0000000..e3d52af
--- /dev/null
+++ b/flake.nix
@@ -0,0 +1,41 @@
+{
+  description = "Tool for downloading course-related files from ILIAS";
+
+  inputs = {
+    nixpkgs.url = "github:NixOS/nixpkgs/nixos-23.05";
+  };
+
+  outputs = { self, nixpkgs }:
+    let
+      # Helper function to generate an attrset '{ x86_64-linux = f "x86_64-linux"; ... }'.
+      forAllSystems = nixpkgs.lib.genAttrs nixpkgs.lib.systems.flakeExposed;
+    in
+    {
+      packages = forAllSystems (system:
+        let pkgs = import nixpkgs { inherit system; };
+        in
+        rec {
+          default = pkgs.python3Packages.buildPythonApplication rec {
+            pname = "pferd";
+            # Performing black magic
+            # Don't worry, I sacrificed enough goats for the next few years
+            version = (pkgs.lib.importTOML ./PFERD/version.py).VERSION;
+            format = "pyproject";
+
+            src = ./.;
+
+            nativeBuildInputs = with pkgs.python3Packages; [
+              setuptools
+            ];
+
+            propagatedBuildInputs = with pkgs.python3Packages; [
+              aiohttp
+              beautifulsoup4
+              rich
+              keyring
+              certifi
+            ];
+          };
+        });
+    };
+}

From 2184ac804018e836e439e365ae2b0d184adae26d Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 26 Aug 2023 19:39:40 +0200
Subject: [PATCH 420/524] Add support for ILIAS mediacast listings

---
 CHANGELOG.md                               |   1 +
 PFERD/crawl/ilias/kit_ilias_html.py        | 110 +++++++++++++++------
 PFERD/crawl/ilias/kit_ilias_web_crawler.py |  45 +++++----
 3 files changed, 107 insertions(+), 49 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 85513d2..d58ea18 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -34,6 +34,7 @@ ambiguous situations.
 - `show_not_deleted` option to stop printing the "Not Deleted" status or report
   message. This combines nicely with the `no-delete-prompt-override` strategy,
   causing PFERD to mostly ignore local-only files.
+- support for mediacast video listings
 
 ## 3.4.3 - 2022-11-29
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 46a8073..d5ea76d 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -3,7 +3,7 @@ import re
 from dataclasses import dataclass
 from datetime import date, datetime, timedelta
 from enum import Enum
-from typing import Dict, List, Optional, Union
+from typing import Dict, List, Optional, Union, cast
 from urllib.parse import urljoin, urlparse
 
 from bs4 import BeautifulSoup, Tag
@@ -26,10 +26,12 @@ class IliasElementType(Enum):
     BOOKING = "booking"
     MEETING = "meeting"
     SURVEY = "survey"
-    VIDEO = "video"
-    VIDEO_PLAYER = "video_player"
-    VIDEO_FOLDER = "video_folder"
-    VIDEO_FOLDER_MAYBE_PAGINATED = "video_folder_maybe_paginated"
+    MEDIACAST_VIDEO_FOLDER = "mediacast_video_folder"
+    MEDIACAST_VIDEO = "mediacast_video"
+    OPENCAST_VIDEO = "opencast_video"
+    OPENCAST_VIDEO_PLAYER = "opencast_video_player"
+    OPENCAST_VIDEO_FOLDER = "opencast_video_folder"
+    OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED = "opencast_video_folder_maybe_paginated"
 
 
 @dataclass
@@ -45,7 +47,8 @@ class IliasPageElement:
             r"eid=(?P<id>[0-9a-z\-]+)",
             r"file_(?P<id>\d+)",
             r"ref_id=(?P<id>\d+)",
-            r"target=[a-z]+_(?P<id>\d+)"
+            r"target=[a-z]+_(?P<id>\d+)",
+            r"mm_(?P<id>\d+)"
         ]
 
         for regex in regexes:
@@ -105,9 +108,9 @@ class IliasPage:
         if self._is_video_player():
             log.explain("Page is a video player, extracting URL")
             return self._player_to_video()
-        if self._is_video_listing():
-            log.explain("Page is a video listing, searching for elements")
-            return self._find_video_entries()
+        if self._is_opencast_video_listing():
+            log.explain("Page is an opencast video listing, searching for elements")
+            return self._find_opencast_video_entries()
         if self._is_exercise_file():
             log.explain("Page is an exercise, searching for elements")
             return self._find_exercise_entries()
@@ -199,9 +202,9 @@ class IliasPage:
         if self._is_ilias_opencast_embedding():
             log.explain("Unwrapping opencast embedding")
             return self.get_child_elements()[0]
-        if self._page_type == IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED:
+        if self._page_type == IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED:
             log.explain("Unwrapping video pagination")
-            return self._find_video_entries_paginated()[0]
+            return self._find_opencast_video_entries_paginated()[0]
         if self._contains_collapsed_future_meetings():
             log.explain("Requesting *all* future meetings")
             return self._uncollapse_future_meetings_url()
@@ -219,7 +222,7 @@ class IliasPage:
     def _is_video_player(self) -> bool:
         return "paella_config_file" in str(self._soup)
 
-    def _is_video_listing(self) -> bool:
+    def _is_opencast_video_listing(self) -> bool:
         if self._is_ilias_opencast_embedding():
             return True
 
@@ -319,14 +322,14 @@ class IliasPage:
         # and just fetch the lone video url!
         if len(streams) == 1:
             video_url = streams[0]["sources"]["mp4"][0]["src"]
-            return [IliasPageElement(IliasElementType.VIDEO, video_url, self._source_name)]
+            return [IliasPageElement(IliasElementType.OPENCAST_VIDEO, video_url, self._source_name)]
 
         log.explain(f"Found multiple videos for stream at {self._source_name}")
         items = []
         for stream in sorted(streams, key=lambda stream: stream["content"]):
             full_name = f"{self._source_name.replace('.mp4', '')} ({stream['content']}).mp4"
             video_url = stream["sources"]["mp4"][0]["src"]
-            items.append(IliasPageElement(IliasElementType.VIDEO, video_url, full_name))
+            items.append(IliasPageElement(IliasElementType.OPENCAST_VIDEO, video_url, full_name))
 
         return items
 
@@ -385,7 +388,7 @@ class IliasPage:
 
         return items
 
-    def _find_video_entries(self) -> List[IliasPageElement]:
+    def _find_opencast_video_entries(self) -> List[IliasPageElement]:
         # ILIAS has three stages for video pages
         # 1. The initial dummy page without any videos. This page contains the link to the listing
         # 2. The video listing which might be paginated
@@ -405,27 +408,27 @@ class IliasPage:
             query_params = {"limit": "800", "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
             url = url_set_query_params(url, query_params)
             log.explain("Found ILIAS video frame page, fetching actual content next")
-            return [IliasPageElement(IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED, url, "")]
+            return [IliasPageElement(IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED, url, "")]
 
         is_paginated = self._soup.find(id=re.compile(r"tab_page_sel.+")) is not None
 
-        if is_paginated and not self._page_type == IliasElementType.VIDEO_FOLDER:
+        if is_paginated and not self._page_type == IliasElementType.OPENCAST_VIDEO_FOLDER:
             # We are in stage 2 - try to break pagination
-            return self._find_video_entries_paginated()
+            return self._find_opencast_video_entries_paginated()
 
-        return self._find_video_entries_no_paging()
+        return self._find_opencast_video_entries_no_paging()
 
-    def _find_video_entries_paginated(self) -> List[IliasPageElement]:
+    def _find_opencast_video_entries_paginated(self) -> List[IliasPageElement]:
         table_element: Tag = self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
 
         if table_element is None:
             log.warn("Couldn't increase elements per page (table not found). I might miss elements.")
-            return self._find_video_entries_no_paging()
+            return self._find_opencast_video_entries_no_paging()
 
         id_match = re.match(r"tbl_xoct_(.+)", table_element.attrs["id"])
         if id_match is None:
             log.warn("Couldn't increase elements per page (table id not found). I might miss elements.")
-            return self._find_video_entries_no_paging()
+            return self._find_opencast_video_entries_no_paging()
 
         table_id = id_match.group(1)
 
@@ -434,9 +437,9 @@ class IliasPage:
         url = url_set_query_params(self._page_url, query_params)
 
         log.explain("Disabled pagination, retrying folder as a new entry")
-        return [IliasPageElement(IliasElementType.VIDEO_FOLDER, url, "")]
+        return [IliasPageElement(IliasElementType.OPENCAST_VIDEO_FOLDER, url, "")]
 
-    def _find_video_entries_no_paging(self) -> List[IliasPageElement]:
+    def _find_opencast_video_entries_no_paging(self) -> List[IliasPageElement]:
         """
         Crawls the "second stage" video page. This page contains the actual video urls.
         """
@@ -448,11 +451,11 @@ class IliasPage:
         results: List[IliasPageElement] = []
 
         for link in video_links:
-            results.append(self._listed_video_to_element(link))
+            results.append(self._listed_opencast_video_to_element(link))
 
         return results
 
-    def _listed_video_to_element(self, link: Tag) -> IliasPageElement:
+    def _listed_opencast_video_to_element(self, link: Tag) -> IliasPageElement:
         # The link is part of a table with multiple columns, describing metadata.
         # 6th or 7th child (1 indexed) is the modification time string. Try to find it
         # by parsing backwards from the end and finding something that looks like a date
@@ -479,7 +482,9 @@ class IliasPage:
         video_url = self._abs_url_from_link(link)
 
         log.explain(f"Found video {video_name!r} at {video_url}")
-        return IliasPageElement(IliasElementType.VIDEO_PLAYER, video_url, video_name, modification_time)
+        return IliasPageElement(
+            IliasElementType.OPENCAST_VIDEO_PLAYER, video_url, video_name, modification_time
+        )
 
     def _find_exercise_entries(self) -> List[IliasPageElement]:
         if self._soup.find(id="tab_submission"):
@@ -622,9 +627,48 @@ class IliasPage:
             result.append(IliasPageElement(element_type, abs_url, element_name, description=description))
 
         result += self._find_cards()
+        result += self._find_mediacast_videos()
 
         return result
 
+    def _find_mediacast_videos(self) -> List[IliasPageElement]:
+        videos: List[IliasPageElement] = []
+
+        for elem in cast(List[Tag], self._soup.select(".ilPlayerPreviewOverlayOuter")):
+            element_name = _sanitize_path_name(
+                elem.select_one(".ilPlayerPreviewDescription").getText().strip()
+            )
+            if not element_name.endswith(".mp4"):
+                # just to make sure it has some kinda-alrightish ending
+                element_name = element_name + ".mp4"
+            video_element = elem.find(name="video")
+            if not video_element:
+                _unexpected_html_warning()
+                log.warn_contd(f"No <video> element found for mediacast video '{element_name}'")
+                continue
+
+            videos.append(IliasPageElement(
+                type=IliasElementType.MEDIACAST_VIDEO,
+                url=self._abs_url_from_relative(video_element.get("src")),
+                name=element_name,
+                mtime=self._find_mediacast_video_mtime(elem.findParent(name="td"))
+            ))
+
+        return videos
+
+    def _find_mediacast_video_mtime(self, enclosing_td: Tag) -> Optional[datetime]:
+        description_td: Tag = enclosing_td.findPreviousSibling("td")
+        if not description_td:
+            return None
+
+        meta_tag: Tag = description_td.find_all("p")[-1]
+        if not meta_tag:
+            return None
+
+        updated_str = meta_tag.getText().strip().replace("\n", " ")
+        updated_str = re.sub(".+?: ", "", updated_str)
+        return demangle_date(updated_str)
+
     def _is_in_expanded_meeting(self, tag: Tag) -> bool:
         """
         Returns whether a file is part of an expanded meeting.
@@ -796,7 +840,7 @@ class IliasPage:
         icon: Tag = card_root.select_one(".il-card-repository-head .icon")
 
         if "opencast" in icon["class"] or "xoct" in icon["class"]:
-            return IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED
+            return IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED
         if "exc" in icon["class"]:
             return IliasElementType.EXERCISE
         if "webr" in icon["class"]:
@@ -817,6 +861,8 @@ class IliasPage:
             return IliasElementType.SURVEY
         if "file" in icon["class"]:
             return IliasElementType.FILE
+        if "mcst" in icon["class"]:
+            return IliasElementType.MEDIACAST_VIDEO_FOLDER
 
         _unexpected_html_warning()
         log.warn_contd(f"Could not extract type from {icon} for card title {card_title}")
@@ -858,6 +904,9 @@ class IliasPage:
         if "baseClass=ilLMPresentationGUI" in parsed_url.query:
             return IliasElementType.LEARNING_MODULE
 
+        if "baseClass=ilMediaCastHandlerGUI" in parsed_url.query:
+            return IliasElementType.MEDIACAST_VIDEO_FOLDER
+
         # Booking and Meeting can not be detected based on the link. They do have a ref_id though, so
         # try to guess it from the image.
 
@@ -909,7 +958,7 @@ class IliasPage:
             return None
 
         if "opencast" in str(img_tag["alt"]).lower():
-            return IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED
+            return IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED
 
         if str(img_tag["src"]).endswith("icon_exc.svg"):
             return IliasElementType.EXERCISE
@@ -929,6 +978,9 @@ class IliasPage:
         if str(img_tag["src"]).endswith("icon_tst.svg"):
             return IliasElementType.TEST
 
+        if str(img_tag["src"]).endswith("icon_mcst.svg"):
+            return IliasElementType.MEDIACAST_VIDEO_FOLDER
+
         return IliasElementType.FOLDER
 
     @staticmethod
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index f82d684..eef3373 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -86,15 +86,18 @@ _DIRECTORY_PAGES: Set[IliasElementType] = set([
     IliasElementType.EXERCISE_FILES,
     IliasElementType.FOLDER,
     IliasElementType.MEETING,
-    IliasElementType.VIDEO_FOLDER,
-    IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED,
+    IliasElementType.MEDIACAST_VIDEO_FOLDER,
+    IliasElementType.OPENCAST_VIDEO_FOLDER,
+    IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
 ])
 
 _VIDEO_ELEMENTS: Set[IliasElementType] = set([
-    IliasElementType.VIDEO,
-    IliasElementType.VIDEO_PLAYER,
-    IliasElementType.VIDEO_FOLDER,
-    IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED,
+    IliasElementType.MEDIACAST_VIDEO_FOLDER,
+    IliasElementType.MEDIACAST_VIDEO,
+    IliasElementType.OPENCAST_VIDEO,
+    IliasElementType.OPENCAST_VIDEO_PLAYER,
+    IliasElementType.OPENCAST_VIDEO_FOLDER,
+    IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
 ])
 
 
@@ -403,10 +406,12 @@ instance's greatest bottleneck.
             return await self._handle_link(element, element_path)
         elif element.type == IliasElementType.BOOKING:
             return await self._handle_booking(element, element_path)
-        elif element.type == IliasElementType.VIDEO:
+        elif element.type == IliasElementType.OPENCAST_VIDEO:
+            return await self._handle_file(element, element_path)
+        elif element.type == IliasElementType.OPENCAST_VIDEO_PLAYER:
+            return await self._handle_opencast_video(element, element_path)
+        elif element.type == IliasElementType.MEDIACAST_VIDEO:
             return await self._handle_file(element, element_path)
-        elif element.type == IliasElementType.VIDEO_PLAYER:
-            return await self._handle_video(element, element_path)
         elif element.type in _DIRECTORY_PAGES:
             return await self._handle_ilias_page(element.url, element, element_path)
         else:
@@ -523,7 +528,7 @@ instance's greatest bottleneck.
 
         raise CrawlError("resolve_link_target failed even after authenticating")
 
-    async def _handle_video(
+    async def _handle_opencast_video(
         self,
         element: IliasPageElement,
         element_path: PurePath,
@@ -544,18 +549,18 @@ instance's greatest bottleneck.
 
         # If we do not want to crawl it (user filter) or we have every file
         # from the cached mapping already, we can ignore this and bail
-        if not maybe_dl or self._all_videos_locally_present(element_path):
+        if not maybe_dl or self._all_opencast_videos_locally_present(element_path):
             # Mark all existing cideos as known so they do not get deleted
             # during dleanup. We "downloaded" them, just without actually making
             # a network request as we assumed they did not change.
-            for video in self._previous_contained_videos(element_path):
+            for video in self._previous_contained_opencast_videos(element_path):
                 await self.download(video)
 
             return None
 
-        return self._download_video(element_path, element, maybe_dl)
+        return self._download_opencast_video(element_path, element, maybe_dl)
 
-    def _previous_contained_videos(self, video_path: PurePath) -> List[PurePath]:
+    def _previous_contained_opencast_videos(self, video_path: PurePath) -> List[PurePath]:
         if not self.prev_report:
             return []
         custom_value = self.prev_report.get_custom_value(str(video_path))
@@ -565,12 +570,12 @@ instance's greatest bottleneck.
         folder = video_path.parent
         return [PurePath(folder, name) for name in names]
 
-    def _all_videos_locally_present(self, video_path: PurePath) -> bool:
-        if contained_videos := self._previous_contained_videos(video_path):
+    def _all_opencast_videos_locally_present(self, video_path: PurePath) -> bool:
+        if contained_videos := self._previous_contained_opencast_videos(video_path):
             log.explain_topic(f"Checking local cache for video {video_path.name}")
             all_found_locally = True
             for video in contained_videos:
-                transformed_path = self._to_local_video_path(video)
+                transformed_path = self._to_local_opencast_video_path(video)
                 if transformed_path:
                     exists_locally = self._output_dir.resolve(transformed_path).exists()
                     all_found_locally = all_found_locally and exists_locally
@@ -580,14 +585,14 @@ instance's greatest bottleneck.
             log.explain("Missing at least one video, continuing with requests!")
         return False
 
-    def _to_local_video_path(self, path: PurePath) -> Optional[PurePath]:
+    def _to_local_opencast_video_path(self, path: PurePath) -> Optional[PurePath]:
         if transformed := self._transformer.transform(path):
             return self._deduplicator.fixup_path(transformed)
         return None
 
     @anoncritical
     @_iorepeat(3, "downloading video")
-    async def _download_video(
+    async def _download_opencast_video(
         self,
         original_path: PurePath,
         element: IliasPageElement,
@@ -604,7 +609,7 @@ instance's greatest bottleneck.
                 log.explain(f"Using single video mode for {element.name}")
                 stream_element = stream_elements[0]
 
-                transformed_path = self._to_local_video_path(original_path)
+                transformed_path = self._to_local_opencast_video_path(original_path)
                 if not transformed_path:
                     raise CrawlError(f"Download returned a path but transform did not for {original_path}")
 

From b54b3b979c41204a51f0d7f02de7f55a0031ba3e Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 27 Aug 2023 11:42:25 +0200
Subject: [PATCH 421/524] Remove size suffix for content pages

---
 CHANGELOG.md                        | 1 +
 PFERD/crawl/ilias/kit_ilias_html.py | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d58ea18..0e93f01 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,7 @@ ambiguous situations.
 - Crawling of file and custom opencast cards
 - Crawling of button cards without descriptions
 - Abort crawling when encountering an unexpected ilias root page redirect
+- Remove size suffix for files in content pages
 
 ### Added
 - `no-delete-prompt-override` conflict resolution strategy
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index d5ea76d..c0807d3 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -377,7 +377,8 @@ class IliasPage:
 
         for link in links:
             url = self._abs_url_from_link(link)
-            name = _sanitize_path_name(link.getText().strip().replace("\t", ""))
+            name = re.sub(r"\([\d,.]+ [MK]B\)", "", link.getText()).strip().replace("\t", "")
+            name = _sanitize_path_name(name)
 
             if "file_id" not in url:
                 _unexpected_html_warning()

From 87b67e9271bd843397542aef75d75557762f641b Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 29 Aug 2023 11:52:16 +0200
Subject: [PATCH 422/524] Crawl files in the info tab

---
 CHANGELOG.md                               |  1 +
 PFERD/crawl/ilias/kit_ilias_html.py        | 44 +++++++++++++++++++++-
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 16 +++++---
 3 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0e93f01..3c675f2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -36,6 +36,7 @@ ambiguous situations.
   message. This combines nicely with the `no-delete-prompt-override` strategy,
   causing PFERD to mostly ignore local-only files.
 - support for mediacast video listings
+- crawling of files in info tab
 
 ## 3.4.3 - 2022-11-29
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index c0807d3..a8fcecb 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -22,6 +22,7 @@ class IliasElementType(Enum):
     FOLDER = "folder"
     FORUM = "forum"
     LINK = "link"
+    INFO_TAB = "info_tab"
     LEARNING_MODULE = "learning_module"
     BOOKING = "booking"
     MEETING = "meeting"
@@ -120,9 +121,25 @@ class IliasPage:
         if self._is_content_page():
             log.explain("Page is a content page, searching for elements")
             return self._find_copa_entries()
+        if self._is_info_tab():
+            log.explain("Page is info tab, searching for elements")
+            return self._find_info_tab_entries()
         log.explain("Page is a normal folder, searching for elements")
         return self._find_normal_entries()
 
+    def get_info_tab(self) -> Optional[IliasPageElement]:
+        tab: Optional[Tag] = self._soup.find(
+            name="a",
+            attrs={"href": lambda x: x and "cmdClass=ilinfoscreengui" in x}
+        )
+        if tab is not None:
+            return IliasPageElement(
+                IliasElementType.INFO_TAB,
+                self._abs_url_from_link(tab),
+                "infos"
+            )
+        return None
+
     def get_description(self) -> Optional[BeautifulSoup]:
         def is_interesting_class(name: str) -> bool:
             return name in ["ilCOPageSection", "ilc_Paragraph", "ilc_va_ihcap_VAccordIHeadCap"]
@@ -209,7 +226,11 @@ class IliasPage:
             log.explain("Requesting *all* future meetings")
             return self._uncollapse_future_meetings_url()
         if not self._is_content_tab_selected():
-            return self._select_content_page_url()
+            if self._page_type != IliasElementType.INFO_TAB:
+                log.explain("Selecting content tab")
+                return self._select_content_page_url()
+            else:
+                log.explain("Crawling info tab, skipping content select")
         return None
 
     def _is_forum_page(self) -> bool:
@@ -281,6 +302,10 @@ class IliasPage:
     def _is_content_tab_selected(self) -> bool:
         return self._select_content_page_url() is None
 
+    def _is_info_tab(self) -> bool:
+        might_be_info = self._soup.find("form", attrs={"name": lambda x: x == "formInfoScreen"}) is not None
+        return self._page_type == IliasElementType.INFO_TAB and might_be_info
+
     def _select_content_page_url(self) -> Optional[IliasPageElement]:
         tab = self._soup.find(
             id="tab_view_content",
@@ -389,6 +414,23 @@ class IliasPage:
 
         return items
 
+    def _find_info_tab_entries(self) -> List[IliasPageElement]:
+        items = []
+        links: List[Tag] = self._soup.select("a.il_ContainerItemCommand")
+
+        for link in links:
+            if "cmdClass=ilobjcoursegui" not in link["href"]:
+                continue
+            if "cmd=sendfile" not in link["href"]:
+                continue
+            items.append(IliasPageElement(
+                IliasElementType.FILE,
+                self._abs_url_from_link(link),
+                _sanitize_path_name(link.getText())
+            ))
+
+        return items
+
     def _find_opencast_video_entries(self) -> List[IliasPageElement]:
         # ILIAS has three stages for video pages
         # 1. The initial dummy page without any videos. This page contains the link to the listing
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index eef3373..4f6cc74 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -85,6 +85,7 @@ _DIRECTORY_PAGES: Set[IliasElementType] = set([
     IliasElementType.EXERCISE,
     IliasElementType.EXERCISE_FILES,
     IliasElementType.FOLDER,
+    IliasElementType.INFO_TAB,
     IliasElementType.MEETING,
     IliasElementType.MEDIACAST_VIDEO_FOLDER,
     IliasElementType.OPENCAST_VIDEO_FOLDER,
@@ -262,6 +263,8 @@ instance's greatest bottleneck.
                         next_stage_url = None
 
                 elements.extend(page.get_child_elements())
+                if info_tab := page.get_info_tab():
+                    elements.append(info_tab)
                 if description_string := page.get_description():
                     description.append(description_string)
 
@@ -705,7 +708,7 @@ instance's greatest bottleneck.
                 log.explain(f"URL: {next_stage_url}")
 
                 soup = await self._get_page(next_stage_url)
-                page = IliasPage(soup, next_stage_url, None)
+                page = IliasPage(soup, next_stage_url, element)
 
                 if next := page.get_next_stage_element():
                     next_stage_url = next.url
@@ -768,14 +771,14 @@ instance's greatest bottleneck.
             log.explain_topic(f"Parsing initial HTML page for {fmt_path(cl.path)}")
             log.explain(f"URL: {element.url}")
             soup = await self._get_page(element.url)
-            page = IliasPage(soup, element.url, None)
+            page = IliasPage(soup, element.url, element)
             if next := page.get_learning_module_data():
                 elements.extend(await self._crawl_learning_module_direction(
-                    cl.path, next.previous_url, "left"
+                    cl.path, next.previous_url, "left", element
                 ))
                 elements.append(next)
                 elements.extend(await self._crawl_learning_module_direction(
-                    cl.path, next.next_url, "right"
+                    cl.path, next.next_url, "right", element
                 ))
 
         # Reflect their natural ordering in the file names
@@ -797,7 +800,8 @@ instance's greatest bottleneck.
         self,
         path: PurePath,
         start_url: Optional[str],
-        dir: Union[Literal["left"], Literal["right"]]
+        dir: Union[Literal["left"], Literal["right"]],
+        parent_element: IliasPageElement
     ) -> List[IliasLearningModulePage]:
         elements: List[IliasLearningModulePage] = []
 
@@ -810,7 +814,7 @@ instance's greatest bottleneck.
             log.explain_topic(f"Parsing HTML page for {fmt_path(path)} ({dir}-{counter})")
             log.explain(f"URL: {next_element_url}")
             soup = await self._get_page(next_element_url)
-            page = IliasPage(soup, next_element_url, None)
+            page = IliasPage(soup, next_element_url, parent_element)
             if next := page.get_learning_module_data():
                 elements.append(next)
                 if dir == "left":

From ad53185247aa7182e95f7ef486b557e5a342ba08 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 29 Aug 2023 12:25:16 +0200
Subject: [PATCH 423/524] Sanitize ascii control characters on windows

---
 CHANGELOG.md          | 1 +
 PFERD/deduplicator.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3c675f2..ae809e3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -28,6 +28,7 @@ ambiguous situations.
 - Crawling of button cards without descriptions
 - Abort crawling when encountering an unexpected ilias root page redirect
 - Remove size suffix for files in content pages
+- Sanitize ascii control characters on Windows
 
 ### Added
 - `no-delete-prompt-override` conflict resolution strategy
diff --git a/PFERD/deduplicator.py b/PFERD/deduplicator.py
index 7777f28..559addb 100644
--- a/PFERD/deduplicator.py
+++ b/PFERD/deduplicator.py
@@ -14,7 +14,7 @@ def name_variants(path: PurePath) -> Iterator[PurePath]:
 
 
 class Deduplicator:
-    FORBIDDEN_CHARS = '<>:"/\\|?*'
+    FORBIDDEN_CHARS = '<>:"/\\|?*' + "".join([chr(i) for i in range(0, 32)])
     FORBIDDEN_NAMES = {
         "CON", "PRN", "AUX", "NUL",
         "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9",

From df3514cd0350fd6ef9231cadb236c930c99b89db Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 29 Aug 2023 12:30:54 +0200
Subject: [PATCH 424/524] Crawl paginated past meetings

---
 CHANGELOG.md                        |  1 +
 PFERD/crawl/ilias/kit_ilias_html.py | 11 +++++++++--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ae809e3..3f318b2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ ambiguous situations.
 - Abort crawling when encountering an unexpected ilias root page redirect
 - Remove size suffix for files in content pages
 - Sanitize ascii control characters on Windows
+- Crawling of paginated past meetings
 
 ### Added
 - `no-delete-prompt-override` conflict resolution strategy
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index a8fcecb..5a94a0b 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -293,7 +293,10 @@ class IliasPage:
         return self._uncollapse_future_meetings_url() is not None
 
     def _uncollapse_future_meetings_url(self) -> Optional[IliasPageElement]:
-        element = self._soup.find("a", attrs={"href": lambda x: x and "crs_next_sess=1" in x})
+        element = self._soup.find(
+            "a",
+            attrs={"href": lambda x: x and ("crs_next_sess=1" in x or "crs_prev_sess=1" in x)}
+        )
         if not element:
             return None
         link = self._abs_url_from_link(element)
@@ -991,7 +994,11 @@ class IliasPage:
         if img_tag is None:
             img_tag = found_parent.select_one("img.icon")
 
-        if img_tag is None and found_parent.find("a", attrs={"href": lambda x: x and "crs_next_sess=" in x}):
+        is_session_expansion_button = found_parent.find(
+            "a",
+            attrs={"href": lambda x: x and ("crs_next_sess=" in x or "crs_prev_sess=" in x)}
+        )
+        if img_tag is None and is_session_expansion_button:
             log.explain("Found session expansion button, skipping it as it has no content")
             return None
 

From 50b50513c6d8bb01200104633d7ce312e17a0ba7 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 29 Aug 2023 13:51:19 +0200
Subject: [PATCH 425/524] Ignore SCORM learning modules

---
 CHANGELOG.md                               |  1 +
 PFERD/crawl/ilias/kit_ilias_html.py        |  7 +++++++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 10 +++++++++-
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3f318b2..47df846 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,6 +30,7 @@ ambiguous situations.
 - Remove size suffix for files in content pages
 - Sanitize ascii control characters on Windows
 - Crawling of paginated past meetings
+- Ignore SCORM learning modules
 
 ### Added
 - `no-delete-prompt-override` conflict resolution strategy
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 5a94a0b..2c37816 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -27,6 +27,7 @@ class IliasElementType(Enum):
     BOOKING = "booking"
     MEETING = "meeting"
     SURVEY = "survey"
+    SCORM_LEARNING_MODULE = "scorm_learning_module"
     MEDIACAST_VIDEO_FOLDER = "mediacast_video_folder"
     MEDIACAST_VIDEO = "mediacast_video"
     OPENCAST_VIDEO = "opencast_video"
@@ -953,6 +954,9 @@ class IliasPage:
         if "baseClass=ilMediaCastHandlerGUI" in parsed_url.query:
             return IliasElementType.MEDIACAST_VIDEO_FOLDER
 
+        if "baseClass=ilSAHSPresentationGUI" in parsed_url.query:
+            return IliasElementType.SCORM_LEARNING_MODULE
+
         # Booking and Meeting can not be detected based on the link. They do have a ref_id though, so
         # try to guess it from the image.
 
@@ -1031,6 +1035,9 @@ class IliasPage:
         if str(img_tag["src"]).endswith("icon_mcst.svg"):
             return IliasElementType.MEDIACAST_VIDEO_FOLDER
 
+        if str(img_tag["src"]).endswith("icon_sahs.svg"):
+            return IliasElementType.SCORM_LEARNING_MODULE
+
         return IliasElementType.FOLDER
 
     @staticmethod
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 4f6cc74..d5f6809 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -403,6 +403,14 @@ instance's greatest bottleneck.
                 "[bright_black](surveys contain no relevant data)"
             )
             return None
+        elif element.type == IliasElementType.SCORM_LEARNING_MODULE:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](scorm learning modules are not supported)"
+            )
+            return None
         elif element.type == IliasElementType.LEARNING_MODULE:
             return await self._handle_learning_module(element, element_path)
         elif element.type == IliasElementType.LINK:
@@ -897,7 +905,7 @@ instance's greatest bottleneck.
             soup = soupify(await request.read())
             if self._is_logged_in(soup):
                 return self._verify_page(soup, url, root_page_allowed)
-        raise CrawlError("get_page failed even after authenticating")
+        raise CrawlError(f"get_page failed even after authenticating on {url!r}")
 
     def _verify_page(self, soup: BeautifulSoup, url: str, root_page_allowed: bool) -> BeautifulSoup:
         if IliasPage.is_root_page(soup) and not root_page_allowed:

From 40f8a05ad66edb1951524a728eeb1a6f2819e4e5 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 13 Sep 2023 22:23:31 +0200
Subject: [PATCH 426/524] Add .idea to gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 455eaca..36ab590 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@
 /PFERD.egg-info/
 __pycache__/
 /.vscode/
+/.idea/
 
 # pyinstaller
 /pferd.spec

From 0113a0ca1027278eb4a8ecee3bf925ac1ffed201 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 13 Sep 2023 22:23:21 +0200
Subject: [PATCH 427/524] Update flake.lock

---
 flake.lock | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/flake.lock b/flake.lock
index 914c58b..1655107 100644
--- a/flake.lock
+++ b/flake.lock
@@ -2,11 +2,11 @@
   "nodes": {
     "nixpkgs": {
       "locked": {
-        "lastModified": 1692986144,
-        "narHash": "sha256-M4VFpy7Av9j+33HF5nIGm0k2+DXXW4qSSKdidIKg5jY=",
+        "lastModified": 1694499547,
+        "narHash": "sha256-R7xMz1Iia6JthWRHDn36s/E248WB1/je62ovC/dUVKI=",
         "owner": "NixOS",
         "repo": "nixpkgs",
-        "rev": "74e5bdc5478ebbe7ba5849f0d765f92757bb9dbf",
+        "rev": "e5f018cf150e29aac26c61dac0790ea023c46b24",
         "type": "github"
       },
       "original": {

From 533bc274395589459a5197462274a4e22e097914 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 13 Sep 2023 23:13:30 +0200
Subject: [PATCH 428/524] Bump version to 3.5.0

---
 CHANGELOG.md     | 24 ++++++++++++++----------
 PFERD/version.py |  2 +-
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 47df846..e902efa 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,25 +22,29 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.5.0 - 2023-09-13
+
+### Added
+- `no-delete-prompt-override` conflict resolution strategy
+- Support for ILIAS learning modules
+- `show_not_deleted` option to stop printing the "Not Deleted" status or report
+  message. This combines nicely with the `no-delete-prompt-override` strategy,
+  causing PFERD to mostly ignore local-only files.
+- Support for mediacast video listings
+- Crawling of files in info tab
+
+### Changed
+- Remove size suffix for files in content pages
+
 ### Fixed
 - Crawling of courses with the timeline view as the default tab
 - Crawling of file and custom opencast cards
 - Crawling of button cards without descriptions
 - Abort crawling when encountering an unexpected ilias root page redirect
-- Remove size suffix for files in content pages
 - Sanitize ascii control characters on Windows
 - Crawling of paginated past meetings
 - Ignore SCORM learning modules
 
-### Added
-- `no-delete-prompt-override` conflict resolution strategy
-- support for ILIAS learning modules
-- `show_not_deleted` option to stop printing the "Not Deleted" status or report
-  message. This combines nicely with the `no-delete-prompt-override` strategy,
-  causing PFERD to mostly ignore local-only files.
-- support for mediacast video listings
-- crawling of files in info tab
-
 ## 3.4.3 - 2022-11-29
 
 ### Added
diff --git a/PFERD/version.py b/PFERD/version.py
index 7043d78..5ee464d 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.4.3"
+VERSION = "3.5.0"

From 266812f90ea7b33e2cd195ee6d34dc2ba53c4926 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 16 Nov 2023 10:34:49 +0100
Subject: [PATCH 429/524] Move is_logged_in helper to kit_ilias_html

---
 PFERD/crawl/ilias/kit_ilias_html.py        | 28 +++++++++++++++++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 35 +++-------------------
 2 files changed, 32 insertions(+), 31 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 2c37816..d23141f 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -1067,6 +1067,34 @@ class IliasPage:
         rest_of_name = split_delimiter.join(meeting_name.split(split_delimiter)[1:])
         return datetime.strftime(date_portion, "%Y-%m-%d") + split_delimiter + rest_of_name
 
+    @staticmethod
+    def is_logged_in(soup: BeautifulSoup) -> bool:
+        # Normal ILIAS pages
+        mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")
+        if mainbar is not None:
+            login_button = mainbar.find(attrs={"href": lambda x: x and "login.php" in x})
+            shib_login = soup.find(id="button_shib_login")
+            return not login_button and not shib_login
+
+        # Personal Desktop
+        if soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x}):
+            return True
+
+        # Video listing embeds do not have complete ILIAS html. Try to match them by
+        # their video listing table
+        video_table = soup.find(
+            recursive=True,
+            name="table",
+            attrs={"id": lambda x: x is not None and x.startswith("tbl_xoct")}
+        )
+        if video_table is not None:
+            return True
+        # The individual video player wrapper page has nothing of the above.
+        # Match it by its playerContainer.
+        if soup.select_one("#playerContainer") is not None:
+            return True
+        return False
+
     def _abs_url_from_link(self, link_tag: Tag) -> str:
         """
         Create an absolute url from an <a> tag.
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index d5f6809..94b7b9e 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -894,7 +894,7 @@ instance's greatest bottleneck.
         auth_id = await self._current_auth_id()
         async with self.session.get(url) as request:
             soup = soupify(await request.read())
-            if self._is_logged_in(soup):
+            if IliasPage.is_logged_in(soup):
                 return self._verify_page(soup, url, root_page_allowed)
 
         # We weren't authenticated, so try to do that
@@ -903,11 +903,12 @@ instance's greatest bottleneck.
         # Retry once after authenticating. If this fails, we will die.
         async with self.session.get(url) as request:
             soup = soupify(await request.read())
-            if self._is_logged_in(soup):
+            if IliasPage.is_logged_in(soup):
                 return self._verify_page(soup, url, root_page_allowed)
         raise CrawlError(f"get_page failed even after authenticating on {url!r}")
 
-    def _verify_page(self, soup: BeautifulSoup, url: str, root_page_allowed: bool) -> BeautifulSoup:
+    @staticmethod
+    def _verify_page(soup: BeautifulSoup, url: str, root_page_allowed: bool) -> BeautifulSoup:
         if IliasPage.is_root_page(soup) and not root_page_allowed:
             raise CrawlError(
                 "Unexpectedly encountered ILIAS root page. "
@@ -965,34 +966,6 @@ instance's greatest bottleneck.
     async def _authenticate(self) -> None:
         await self._shibboleth_login.login(self.session)
 
-    @ staticmethod
-    def _is_logged_in(soup: BeautifulSoup) -> bool:
-        # Normal ILIAS pages
-        mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")
-        if mainbar is not None:
-            login_button = mainbar.find(attrs={"href": lambda x: x and "login.php" in x})
-            shib_login = soup.find(id="button_shib_login")
-            return not login_button and not shib_login
-
-        # Personal Desktop
-        if soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x}):
-            return True
-
-        # Video listing embeds do not have complete ILIAS html. Try to match them by
-        # their video listing table
-        video_table = soup.find(
-            recursive=True,
-            name="table",
-            attrs={"id": lambda x: x is not None and x.startswith("tbl_xoct")}
-        )
-        if video_table is not None:
-            return True
-        # The individual video player wrapper page has nothing of the above.
-        # Match it by its playerContainer.
-        if soup.select_one("#playerContainer") is not None:
-            return True
-        return False
-
 
 class KitShibbolethLogin:
     """

From e9f8901520356e23a7fe75c232e2abeb65e2d5a7 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 30 Nov 2023 20:50:53 +0100
Subject: [PATCH 430/524] Fix typos in ilias crawler and use set literals

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 94b7b9e..b9fb45a 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -81,7 +81,7 @@ class KitIliasWebCrawlerSection(HttpCrawlerSection):
         return self.s.getboolean("forums", fallback=False)
 
 
-_DIRECTORY_PAGES: Set[IliasElementType] = set([
+_DIRECTORY_PAGES: Set[IliasElementType] = {
     IliasElementType.EXERCISE,
     IliasElementType.EXERCISE_FILES,
     IliasElementType.FOLDER,
@@ -90,16 +90,16 @@ _DIRECTORY_PAGES: Set[IliasElementType] = set([
     IliasElementType.MEDIACAST_VIDEO_FOLDER,
     IliasElementType.OPENCAST_VIDEO_FOLDER,
     IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
-])
+}
 
-_VIDEO_ELEMENTS: Set[IliasElementType] = set([
+_VIDEO_ELEMENTS: Set[IliasElementType] = {
     IliasElementType.MEDIACAST_VIDEO_FOLDER,
     IliasElementType.MEDIACAST_VIDEO,
     IliasElementType.OPENCAST_VIDEO,
     IliasElementType.OPENCAST_VIDEO_PLAYER,
     IliasElementType.OPENCAST_VIDEO_FOLDER,
     IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
-])
+}
 
 
 def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Callable[[AWrapped], AWrapped]:
@@ -561,8 +561,8 @@ instance's greatest bottleneck.
         # If we do not want to crawl it (user filter) or we have every file
         # from the cached mapping already, we can ignore this and bail
         if not maybe_dl or self._all_opencast_videos_locally_present(element_path):
-            # Mark all existing cideos as known so they do not get deleted
-            # during dleanup. We "downloaded" them, just without actually making
+            # Mark all existing videos as known so they do not get deleted
+            # during cleanup. We "downloaded" them, just without actually making
             # a network request as we assumed they did not change.
             for video in self._previous_contained_opencast_videos(element_path):
                 await self.download(video)

From a117126389a6298d04944ddbcda35f9b537e960b Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 9 Dec 2023 23:01:59 +0100
Subject: [PATCH 431/524] Fix video name deduplication

---
 CHANGELOG.md                               |   3 +
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 117 +++++++++++----------
 2 files changed, 65 insertions(+), 55 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e902efa..0443d50 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Fixed
+- Video name deduplication
+
 ## 3.5.0 - 2023-09-13
 
 ### Added
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index b9fb45a..ac1f10d 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -140,6 +140,10 @@ def _wrap_io_in_warning(name: str) -> Callable[[AWrapped], AWrapped]:
     return _iorepeat(1, name)
 
 
+def _get_video_cache_key(element: IliasPageElement) -> str:
+    return f"ilias-video-cache-{element.id()}"
+
+
 # Crawler control flow:
 #
 #     crawl_desktop -+
@@ -547,8 +551,8 @@ instance's greatest bottleneck.
         # Copy old mapping as it is likely still relevant
         if self.prev_report:
             self.report.add_custom_value(
-                str(element_path),
-                self.prev_report.get_custom_value(str(element_path))
+                _get_video_cache_key(element),
+                self.prev_report.get_custom_value(_get_video_cache_key(element))
             )
 
         # A video might contain other videos, so let's "crawl" the video first
@@ -558,58 +562,69 @@ instance's greatest bottleneck.
         # to ensure backwards compatibility.
         maybe_dl = await self.download(element_path, mtime=element.mtime, redownload=Redownload.ALWAYS)
 
-        # If we do not want to crawl it (user filter) or we have every file
-        # from the cached mapping already, we can ignore this and bail
-        if not maybe_dl or self._all_opencast_videos_locally_present(element_path):
-            # Mark all existing videos as known so they do not get deleted
-            # during cleanup. We "downloaded" them, just without actually making
-            # a network request as we assumed they did not change.
-            for video in self._previous_contained_opencast_videos(element_path):
-                await self.download(video)
+        # If we do not want to crawl it (user filter), we can move on
+        if not maybe_dl:
+            return None
+
+        # If we have every file from the cached mapping already, we can ignore this and bail
+        if self._all_opencast_videos_locally_present(element, maybe_dl.path):
+            # Mark all existing videos as known to ensure they do not get deleted during cleanup.
+            # We "downloaded" them, just without actually making a network request as we assumed
+            # they did not change.
+            contained = self._previous_contained_opencast_videos(element, maybe_dl.path)
+            if len(contained) > 1:
+                # Only do this if we threw away the original dl token,
+                # to not download single-stream videos twice
+                for video in contained:
+                    await self.download(video)
 
             return None
 
-        return self._download_opencast_video(element_path, element, maybe_dl)
+        return self._download_opencast_video(element, maybe_dl)
 
-    def _previous_contained_opencast_videos(self, video_path: PurePath) -> List[PurePath]:
+    def _previous_contained_opencast_videos(
+        self, element: IliasPageElement, element_path: PurePath
+    ) -> List[PurePath]:
         if not self.prev_report:
             return []
-        custom_value = self.prev_report.get_custom_value(str(video_path))
+        custom_value = self.prev_report.get_custom_value(_get_video_cache_key(element))
         if not custom_value:
             return []
-        names = cast(List[str], custom_value)
-        folder = video_path.parent
-        return [PurePath(folder, name) for name in names]
+        cached_value = cast(dict[str, Any], custom_value)
+        if "known_paths" not in cached_value or "own_path" not in cached_value:
+            log.explain(f"'known_paths' or 'own_path' missing from cached value: {cached_value}")
+            return []
+        transformed_own_path = self._transformer.transform(element_path)
+        if cached_value["own_path"] != str(transformed_own_path):
+            log.explain(
+                f"own_path '{transformed_own_path}' does not match cached value: '{cached_value['own_path']}"
+            )
+            return []
+        return [PurePath(name) for name in cached_value["known_paths"]]
 
-    def _all_opencast_videos_locally_present(self, video_path: PurePath) -> bool:
-        if contained_videos := self._previous_contained_opencast_videos(video_path):
-            log.explain_topic(f"Checking local cache for video {video_path.name}")
-            all_found_locally = True
-            for video in contained_videos:
-                transformed_path = self._to_local_opencast_video_path(video)
-                if transformed_path:
-                    exists_locally = self._output_dir.resolve(transformed_path).exists()
-                    all_found_locally = all_found_locally and exists_locally
-            if all_found_locally:
-                log.explain("Found all videos locally, skipping enumeration request")
+    def _all_opencast_videos_locally_present(self, element: IliasPageElement, element_path: PurePath) -> bool:
+        log.explain_topic(f"Checking local cache for video {fmt_path(element_path)}")
+        if contained_videos := self._previous_contained_opencast_videos(element, element_path):
+            log.explain(
+                f"The following contained videos are known: {','.join(map(fmt_path, contained_videos))}"
+            )
+            if all(self._output_dir.resolve(path).exists() for path in contained_videos):
+                log.explain("Found all known videos locally, skipping enumeration request")
                 return True
             log.explain("Missing at least one video, continuing with requests!")
+        else:
+            log.explain("No local cache present")
         return False
 
-    def _to_local_opencast_video_path(self, path: PurePath) -> Optional[PurePath]:
-        if transformed := self._transformer.transform(path):
-            return self._deduplicator.fixup_path(transformed)
-        return None
-
     @anoncritical
     @_iorepeat(3, "downloading video")
-    async def _download_opencast_video(
-        self,
-        original_path: PurePath,
-        element: IliasPageElement,
-        dl: DownloadToken
-    ) -> None:
-        stream_elements: List[IliasPageElement] = []
+    async def _download_opencast_video(self, element: IliasPageElement, dl: DownloadToken) -> None:
+        def add_to_report(paths: list[str]) -> None:
+            self.report.add_custom_value(
+                _get_video_cache_key(element),
+                {"known_paths": paths, "own_path": str(self._transformer.transform(dl.path))}
+            )
+
         async with dl as (bar, sink):
             page = IliasPage(await self._get_page(element.url), element.url, element)
             stream_elements = page.get_child_elements()
@@ -620,32 +635,25 @@ instance's greatest bottleneck.
                 log.explain(f"Using single video mode for {element.name}")
                 stream_element = stream_elements[0]
 
-                transformed_path = self._to_local_opencast_video_path(original_path)
-                if not transformed_path:
-                    raise CrawlError(f"Download returned a path but transform did not for {original_path}")
-
                 # We do not have a local cache yet
-                if self._output_dir.resolve(transformed_path).exists():
-                    log.explain(f"Video for {element.name} existed locally")
-                else:
-                    await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
-                self.report.add_custom_value(str(original_path), [original_path.name])
+                await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
+                add_to_report([str(self._transformer.transform(dl.path))])
                 return
 
         contained_video_paths: List[str] = []
 
         for stream_element in stream_elements:
-            video_path = original_path.parent / stream_element.name
-            contained_video_paths.append(str(video_path))
+            video_path = dl.path.parent / stream_element.name
 
             maybe_dl = await self.download(video_path, mtime=element.mtime, redownload=Redownload.NEVER)
             if not maybe_dl:
                 continue
             async with maybe_dl as (bar, sink):
                 log.explain(f"Streaming video from real url {stream_element.url}")
+                contained_video_paths.append(str(self._transformer.transform(maybe_dl.path)))
                 await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
 
-        self.report.add_custom_value(str(original_path), contained_video_paths)
+        add_to_report(contained_video_paths)
 
     async def _handle_file(
         self,
@@ -657,8 +665,8 @@ instance's greatest bottleneck.
             return None
         return self._download_file(element, maybe_dl)
 
-    @anoncritical
     @_iorepeat(3, "downloading file")
+    @anoncritical
     async def _download_file(self, element: IliasPageElement, dl: DownloadToken) -> None:
         assert dl  # The function is only reached when dl is not None
         async with dl as (bar, sink):
@@ -728,7 +736,6 @@ instance's greatest bottleneck.
                 raise CrawlWarning("Failed to extract forum data")
             if download_data.empty:
                 log.explain("Forum had no threads")
-                elements = []
                 return
             html = await self._post_authenticated(download_data.url, download_data.form_data)
             elements = parse_ilias_forum_export(soupify(html))
@@ -962,7 +969,7 @@ instance's greatest bottleneck.
 
     # We repeat this as the login method in shibboleth doesn't handle I/O errors.
     # Shibboleth is quite reliable as well, the repeat is likely not critical here.
-    @ _iorepeat(3, "Login", failure_is_error=True)
+    @_iorepeat(3, "Login", failure_is_error=True)
     async def _authenticate(self) -> None:
         await self._shibboleth_login.login(self.session)
 
@@ -1112,7 +1119,7 @@ async def _shib_post(
         async with session.get(correct_url, allow_redirects=False) as response:
             location = response.headers.get("location")
             log.explain(f"Redirected to {location!r} with status {response.status}")
-            # If shib still still has a valid session, it will directly respond to the request
+            # If shib still has a valid session, it will directly respond to the request
             if location is None:
                 log.explain("Shib recognized us, returning its response directly")
                 return soupify(await response.read())

From ab0cb2d956129c51b67e4573da7c5e95372e9f5f Mon Sep 17 00:00:00 2001
From: TornaxO7 <tornax@proton.me>
Date: Tue, 27 Feb 2024 23:39:53 +0100
Subject: [PATCH 432/524] nix: bump nixpgs dependency

---
 flake.lock | 8 ++++----
 flake.nix  | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/flake.lock b/flake.lock
index 1655107..6428667 100644
--- a/flake.lock
+++ b/flake.lock
@@ -2,16 +2,16 @@
   "nodes": {
     "nixpkgs": {
       "locked": {
-        "lastModified": 1694499547,
-        "narHash": "sha256-R7xMz1Iia6JthWRHDn36s/E248WB1/je62ovC/dUVKI=",
+        "lastModified": 1708979614,
+        "narHash": "sha256-FWLWmYojIg6TeqxSnHkKpHu5SGnFP5um1uUjH+wRV6g=",
         "owner": "NixOS",
         "repo": "nixpkgs",
-        "rev": "e5f018cf150e29aac26c61dac0790ea023c46b24",
+        "rev": "b7ee09cf5614b02d289cd86fcfa6f24d4e078c2a",
         "type": "github"
       },
       "original": {
         "owner": "NixOS",
-        "ref": "nixos-23.05",
+        "ref": "nixos-23.11",
         "repo": "nixpkgs",
         "type": "github"
       }
diff --git a/flake.nix b/flake.nix
index e3d52af..4fc47b2 100644
--- a/flake.nix
+++ b/flake.nix
@@ -2,7 +2,7 @@
   description = "Tool for downloading course-related files from ILIAS";
 
   inputs = {
-    nixpkgs.url = "github:NixOS/nixpkgs/nixos-23.05";
+    nixpkgs.url = "github:NixOS/nixpkgs/nixos-23.11";
   };
 
   outputs = { self, nixpkgs }:

From eb0c956d32b9181c46d0ca8ce4f5d3f871e2c1df Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Fri, 5 Apr 2024 19:06:54 +0200
Subject: [PATCH 433/524] Add compatibility with ILIAS 8

---
 CHANGELOG.md                               |  1 +
 PFERD/crawl/ilias/kit_ilias_html.py        | 50 ++++++++++++----------
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 35 +++++++--------
 3 files changed, 46 insertions(+), 40 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0443d50..df4fcf5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ ambiguous situations.
 
 ### Fixed
 - Video name deduplication
+- Compatibility with ILIAS 8
 
 ## 3.5.0 - 2023-09-13
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index d23141f..0be6448 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -17,7 +17,7 @@ TargetType = Union[str, int]
 class IliasElementType(Enum):
     EXERCISE = "exercise"
     EXERCISE_FILES = "exercise_files"  # own submitted files
-    TEST = "test"                      # an online test. Will be ignored currently.
+    TEST = "test"  # an online test. Will be ignored currently.
     FILE = "file"
     FOLDER = "folder"
     FORUM = "forum"
@@ -95,13 +95,9 @@ class IliasPage:
 
     @staticmethod
     def is_root_page(soup: BeautifulSoup) -> bool:
-        permalink = soup.find(id="current_perma_link")
-        if permalink is None:
-            return False
-        value = permalink.attrs.get("value")
-        if value is None:
-            return False
-        return "goto.php?target=root_" in value
+        if permalink := IliasPage.get_soup_permalink(soup):
+            return "goto.php?target=root_" in permalink
+        return False
 
     def get_child_elements(self) -> List[IliasPageElement]:
         """
@@ -279,16 +275,14 @@ class IliasPage:
         return self._soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x})
 
     def _is_content_page(self) -> bool:
-        link = self._soup.find(id="current_perma_link")
-        if not link:
-            return False
-        return "target=copa_" in link.get("value")
+        if link := self.get_permalink():
+            return "target=copa_" in link
+        return False
 
     def _is_learning_module_page(self) -> bool:
-        link = self._soup.find(id="current_perma_link")
-        if not link:
-            return False
-        return "target=pg_" in link.get("value")
+        if link := self.get_permalink():
+            return "target=pg_" in link
+        return False
 
     def _contains_collapsed_future_meetings(self) -> bool:
         return self._uncollapse_future_meetings_url() is not None
@@ -513,8 +507,8 @@ class IliasPage:
             modification_string = link.parent.parent.parent.select_one(
                 f"td.std:nth-child({index})"
             ).getText().strip()
-            if re.search(r"\d+\.\d+.\d+ - \d+:\d+", modification_string):
-                modification_time = datetime.strptime(modification_string, "%d.%m.%Y - %H:%M")
+            if match := re.search(r"\d+\.\d+.\d+ \d+:\d+", modification_string):
+                modification_time = datetime.strptime(match.group(0), "%d.%m.%Y %H:%M")
                 break
 
         if modification_time is None:
@@ -613,7 +607,7 @@ class IliasPage:
             file_listings: List[Tag] = container.findAll(
                 name="a",
                 # download links contain the given command class
-                attrs={"href": lambda x: x and "cmdClass=ilexsubmissionfilegui" in x}
+                attrs={"href": lambda x: x and "cmdclass=ilexsubmissionfilegui" in x.lower()}
             )
 
             # Add each listing as a new
@@ -917,9 +911,9 @@ class IliasPage:
 
     @staticmethod
     def _find_type_from_link(
-            element_name: str,
-            link_element: Tag,
-            url: str
+        element_name: str,
+        link_element: Tag,
+        url: str
     ) -> Optional[IliasElementType]:
         """
         Decides which sub crawler to use for a given top level element.
@@ -1095,6 +1089,9 @@ class IliasPage:
             return True
         return False
 
+    def get_permalink(self) -> Optional[str]:
+        return IliasPage.get_soup_permalink(self._soup)
+
     def _abs_url_from_link(self, link_tag: Tag) -> str:
         """
         Create an absolute url from an <a> tag.
@@ -1107,6 +1104,13 @@ class IliasPage:
         """
         return urljoin(self._page_url, relative_url)
 
+    @staticmethod
+    def get_soup_permalink(soup: BeautifulSoup) -> Optional[str]:
+        perma_link_element: Tag = soup.select_one(".il-footer-permanent-url > a")
+        if not perma_link_element or not perma_link_element.get("href"):
+            return None
+        return perma_link_element.get("href")
+
 
 def _unexpected_html_warning() -> None:
     log.warn("Encountered unexpected HTML structure, ignoring element.")
@@ -1130,7 +1134,7 @@ def demangle_date(date_str: str, fail_silently: bool = False) -> Optional[dateti
 
         date_str = re.sub("Gestern|Yesterday", _format_date_english(_yesterday()), date_str, re.I)
         date_str = re.sub("Heute|Today", _format_date_english(date.today()), date_str, re.I)
-        date_str = re.sub("Morgen|Tomorrow",  _format_date_english(_tomorrow()), date_str, re.I)
+        date_str = re.sub("Morgen|Tomorrow", _format_date_english(_tomorrow()), date_str, re.I)
         date_str = date_str.strip()
         for german, english in zip(german_months, english_months):
             date_str = date_str.replace(german, english)
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index ac1f10d..52de793 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -12,17 +12,17 @@ import yarl
 from aiohttp import hdrs
 from bs4 import BeautifulSoup, Tag
 
+from .file_templates import Links, learning_module_template
+from .ilias_html_cleaner import clean, insert_base_markup
+from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
+                             IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
+from ..crawler import AWrapped, CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
+from ..http_crawler import HttpCrawler, HttpCrawlerSection
 from ...auth import Authenticator, TfaAuthenticator
 from ...config import Config
 from ...logging import ProgressBar, log
 from ...output_dir import FileSink, Redownload
 from ...utils import fmt_path, soupify, url_set_query_param
-from ..crawler import AWrapped, CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
-from ..http_crawler import HttpCrawler, HttpCrawlerSection
-from .file_templates import Links, learning_module_template
-from .ilias_html_cleaner import clean, insert_base_markup
-from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
-                             IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
 
 TargetType = Union[str, int]
 
@@ -130,6 +130,7 @@ def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Calla
             raise CrawlError("Impossible return in ilias _iorepeat")
 
         return wrapper  # type: ignore
+
     return decorator
 
 
@@ -177,11 +178,11 @@ def _get_video_cache_key(element: IliasPageElement) -> str:
 
 class KitIliasWebCrawler(HttpCrawler):
     def __init__(
-            self,
-            name: str,
-            section: KitIliasWebCrawlerSection,
-            config: Config,
-            authenticators: Dict[str, Authenticator]
+        self,
+        name: str,
+        section: KitIliasWebCrawlerSection,
+        config: Config,
+        authenticators: Dict[str, Authenticator]
     ):
         # Setting a main authenticator for cookie sharing
         auth = section.auth(authenticators)
@@ -253,8 +254,8 @@ instance's greatest bottleneck.
                     soup = await self._get_page(next_stage_url, root_page_allowed=True)
 
                     if current_parent is None and expected_id is not None:
-                        perma_link_element: Tag = soup.find(id="current_perma_link")
-                        if not perma_link_element or "crs_" not in perma_link_element.get("value"):
+                        perma_link = IliasPage.get_soup_permalink(soup)
+                        if not perma_link or "crs_" not in perma_link:
                             raise CrawlError("Invalid course id? Didn't find anything looking like a course")
 
                     log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
@@ -677,7 +678,7 @@ instance's greatest bottleneck.
             async with self.session.get(url, allow_redirects=is_video) as resp:
                 if not is_video:
                     # Redirect means we weren't authenticated
-                    if hdrs.LOCATION in resp.headers:
+                    if hdrs.LOCATION in resp.headers and "&cmd=sendfile" not in resp.headers[hdrs.LOCATION]:
                         return False
                 # we wanted a video but got HTML
                 if is_video and "html" in resp.content_type:
@@ -1052,9 +1053,9 @@ class KitShibbolethLogin:
         await sess.post(url, data=data)
 
     async def _authenticate_tfa(
-            self,
-            session: aiohttp.ClientSession,
-            soup: BeautifulSoup
+        self,
+        session: aiohttp.ClientSession,
+        soup: BeautifulSoup
     ) -> BeautifulSoup:
         if not self._tfa_auth:
             self._tfa_auth = TfaAuthenticator("ilias-anon-tfa")

From c1b592ac2930c1ced40dd7282ae8bca4d1b6109d Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 8 Apr 2024 17:52:13 +0200
Subject: [PATCH 434/524] Fix ILIAS 8 file downloads truncating to zero bytes

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 40 +++++++++++++++-------
 1 file changed, 28 insertions(+), 12 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 52de793..7d6b309 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -12,17 +12,17 @@ import yarl
 from aiohttp import hdrs
 from bs4 import BeautifulSoup, Tag
 
-from .file_templates import Links, learning_module_template
-from .ilias_html_cleaner import clean, insert_base_markup
-from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
-                             IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
-from ..crawler import AWrapped, CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
-from ..http_crawler import HttpCrawler, HttpCrawlerSection
 from ...auth import Authenticator, TfaAuthenticator
 from ...config import Config
 from ...logging import ProgressBar, log
 from ...output_dir import FileSink, Redownload
 from ...utils import fmt_path, soupify, url_set_query_param
+from ..crawler import AWrapped, CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
+from ..http_crawler import HttpCrawler, HttpCrawlerSection
+from .file_templates import Links, learning_module_template
+from .ilias_html_cleaner import clean, insert_base_markup
+from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
+                             IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
 
 TargetType = Union[str, int]
 
@@ -675,12 +675,28 @@ instance's greatest bottleneck.
 
     async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar, is_video: bool) -> None:
         async def try_stream() -> bool:
-            async with self.session.get(url, allow_redirects=is_video) as resp:
-                if not is_video:
-                    # Redirect means we weren't authenticated
-                    if hdrs.LOCATION in resp.headers and "&cmd=sendfile" not in resp.headers[hdrs.LOCATION]:
-                        return False
-                # we wanted a video but got HTML
+            next_url = url
+
+            # Normal files redirect to the magazine if we are not authenticated. As files could be HTML,
+            # we can not match on the content type here. Instead, we disallow redirects and inspect the
+            # new location. If we are redirected anywhere but the ILIAS 8 "sendfile" command, we assume
+            # our authentication expired.
+            if not is_video:
+                async with self.session.get(url, allow_redirects=False) as resp:
+                    # Redirect to anything except a "sendfile" means we weren't authenticated
+                    if hdrs.LOCATION in resp.headers:
+                        if "&cmd=sendfile" not in resp.headers[hdrs.LOCATION]:
+                            return False
+                        # Directly follow the redirect to not make a second, unnecessary request
+                        next_url = resp.headers[hdrs.LOCATION]
+
+            # Let's try this again and follow redirects
+            return await fetch_follow_redirects(next_url)
+
+        async def fetch_follow_redirects(file_url: str) -> bool:
+            async with self.session.get(file_url) as resp:
+                # We wanted a video but got HTML => Forbidden, auth expired. Logging in won't really
+                # solve that depending on the setup, but it is better than nothing.
                 if is_video and "html" in resp.content_type:
                     return False
 

From da627ff929abb3a1a3dff58ec46f29025e16c96b Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 9 Apr 2024 14:28:56 +0200
Subject: [PATCH 435/524] Bump version to 3.5.1

---
 CHANGELOG.md     | 6 +++++-
 PFERD/version.py | 2 +-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index df4fcf5..a76508e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,9 +22,13 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.5.1 - 2024-04-09
+
+### Added
+- Support for ILIAS 8
+
 ### Fixed
 - Video name deduplication
-- Compatibility with ILIAS 8
 
 ## 3.5.0 - 2023-09-13
 
diff --git a/PFERD/version.py b/PFERD/version.py
index 5ee464d..3f27494 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.5.0"
+VERSION = "3.5.1"

From 1cbc2b717a76751725f776483b611bd6b43525cf Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 10 Apr 2024 01:01:59 +0200
Subject: [PATCH 436/524] Fix personal desktop crawling with ILIAS 8

---
 CHANGELOG.md                               | 3 +++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a76508e..36768b0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Fixed
+- Crawling of personal desktop with ILIAS 8
+
 ## 3.5.1 - 2024-04-09
 
 ### Added
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 7d6b309..371ffb3 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -228,7 +228,7 @@ instance's greatest bottleneck.
         await self._crawl_url(root_url, expected_id=course_id)
 
     async def _crawl_desktop(self) -> None:
-        appendix = r"ILIAS\PersonalDesktop\PDMainBarProvider|mm_pd_sel_items"
+        appendix = r"ILIAS\Repository\Provider\RepositoryMainBarProvider|mm_pd_sel_items"
         appendix = appendix.encode("ASCII").hex()
         await self._crawl_url(self._base_url + "/gs_content.php?item=" + appendix)
 

From 4a5959fd58d9c063ea9a37089d0aaa01c23544bc Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 10 Apr 2024 11:12:48 +0200
Subject: [PATCH 437/524] Fix personal desktop crawling without favorites

---
 CHANGELOG.md                        | 1 +
 PFERD/crawl/ilias/kit_ilias_html.py | 8 ++++++++
 2 files changed, 9 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 36768b0..5212824 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ ambiguous situations.
 
 ### Fixed
 - Crawling of personal desktop with ILIAS 8
+- Crawling of empty personal desktops
 
 ## 3.5.1 - 2024-04-09
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 0be6448..aa00a87 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -1074,6 +1074,14 @@ class IliasPage:
         if soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x}):
             return True
 
+        # Empty personal desktop has zero (0) markers. Match on the text...
+        if alert := soup.select_one(".alert-info"):
+            text = alert.getText().lower()
+            if "you have not yet selected any favourites" in text:
+                return True
+            if "sie haben aktuell noch keine favoriten ausgewählt" in text:
+                return True
+
         # Video listing embeds do not have complete ILIAS html. Try to match them by
         # their video listing table
         video_table = soup.find(

From 3db186a9782e22cf1cd45b8d343b5cfa5124eb25 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 10 Apr 2024 11:12:55 +0200
Subject: [PATCH 438/524] Fix personal desktop crawling HTML warnings

---
 PFERD/crawl/ilias/kit_ilias_html.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index aa00a87..4cfec9b 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -378,6 +378,10 @@ class IliasPage:
             name = _sanitize_path_name(link.text.strip())
             url = self._abs_url_from_link(link)
 
+            if "cmd=manage" in url and "cmdClass=ilPDSelectedItemsBlockGUI" in url:
+                # Configure button/link does not have anything interesting
+                continue
+
             type = self._find_type_from_link(name, link, url)
             if not type:
                 _unexpected_html_warning()

From eb01aa86cbad96dd3a6dba86b92b73fdefd86eb0 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 14 Apr 2024 12:10:17 +0200
Subject: [PATCH 439/524] Bump version to 3.5.2

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5212824..e404d1d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.5.2 - 2024-04-14
+
 ### Fixed
 - Crawling of personal desktop with ILIAS 8
 - Crawling of empty personal desktops
diff --git a/PFERD/version.py b/PFERD/version.py
index 3f27494..47da4a6 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.5.1"
+VERSION = "3.5.2"

From bbcfe9c8dd5383463b4415d78e0a10ca8458b34d Mon Sep 17 00:00:00 2001
From: Florian Raith <37345813+florianraith@users.noreply.github.com>
Date: Fri, 19 Apr 2024 16:52:18 +0200
Subject: [PATCH 440/524] Fix typo in CONFIG.md (#89)

---
 CONFIG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CONFIG.md b/CONFIG.md
index 5f62749..25496e0 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -146,7 +146,7 @@ crawler simulate a slower, network-based crawler.
 
 This crawler crawls a KIT-IPD page by url. The root page can be crawled from
 outside the KIT network so you will be informed about any new/deleted files,
-but downloading files requires you to be within. Adding a show delay between
+but downloading files requires you to be within. Adding a short delay between
 requests is likely a good idea.
 
 - `target`: URL to a KIT-IPD page

From 3e831c7e23e9214e2cbbaf04709c153ee1fcb893 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 24 Apr 2024 22:32:26 +0200
Subject: [PATCH 441/524] Fix normalization of meeting names in cards

---
 CHANGELOG.md                        |   3 +
 PFERD/crawl/ilias/kit_ilias_html.py | 119 ++++++++++++++++------------
 2 files changed, 70 insertions(+), 52 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e404d1d..f244a9b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Fixed
+- Normalization of meeting names in cards
+
 ## 3.5.2 - 2024-04-14
 
 ### Fixed
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 4cfec9b..866f7c0 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -61,6 +61,47 @@ class IliasPageElement:
         log.warn(f"Didn't find identity for {self.name} - {self.url}. Please report this.")
         return self.url
 
+    @staticmethod
+    def create_new(
+        typ: IliasElementType,
+        url: str,
+        name: str,
+        mtime: Optional[datetime] = None,
+        description: Optional[str] = None
+    ) -> 'IliasPageElement':
+        if typ == IliasElementType.MEETING:
+            normalized = _sanitize_path_name(IliasPageElement._normalize_meeting_name(name))
+            log.explain(f"Normalized meeting name from {name!r} to {normalized!r}")
+            name = normalized
+        return IliasPageElement(typ, url, name, mtime, description)
+
+    @staticmethod
+    def _normalize_meeting_name(meeting_name: str) -> str:
+        """
+        Normalizes meeting names, which have a relative time as their first part,
+        to their date in ISO format.
+        """
+
+        # This checks whether we can reach a `:` without passing a `-`
+        if re.search(r"^[^-]+: ", meeting_name):
+            # Meeting name only contains date: "05. Jan 2000:"
+            split_delimiter = ":"
+        else:
+            # Meeting name contains date and start/end times: "05. Jan 2000, 16:00 - 17:30:"
+            split_delimiter = ", "
+
+        # We have a meeting day without time
+        date_portion_str = meeting_name.split(split_delimiter)[0]
+        date_portion = demangle_date(date_portion_str)
+
+        # We failed to parse the date, bail out
+        if not date_portion:
+            return meeting_name
+
+        # Replace the first section with the absolute date
+        rest_of_name = split_delimiter.join(meeting_name.split(split_delimiter)[1:])
+        return datetime.strftime(date_portion, "%Y-%m-%d") + split_delimiter + rest_of_name
+
 
 @dataclass
 class IliasDownloadForumData:
@@ -130,7 +171,7 @@ class IliasPage:
             attrs={"href": lambda x: x and "cmdClass=ilinfoscreengui" in x}
         )
         if tab is not None:
-            return IliasPageElement(
+            return IliasPageElement.create_new(
                 IliasElementType.INFO_TAB,
                 self._abs_url_from_link(tab),
                 "infos"
@@ -295,7 +336,7 @@ class IliasPage:
         if not element:
             return None
         link = self._abs_url_from_link(element)
-        return IliasPageElement(IliasElementType.FOLDER, link, "show all meetings")
+        return IliasPageElement.create_new(IliasElementType.FOLDER, link, "show all meetings")
 
     def _is_content_tab_selected(self) -> bool:
         return self._select_content_page_url() is None
@@ -315,7 +356,7 @@ class IliasPage:
         link = tab.find("a")
         if link:
             link = self._abs_url_from_link(link)
-            return IliasPageElement(IliasElementType.FOLDER, link, "select content page")
+            return IliasPageElement.create_new(IliasElementType.FOLDER, link, "select content page")
 
         _unexpected_html_warning()
         log.warn_contd(f"Could not find content tab URL on {self._page_url!r}.")
@@ -345,14 +386,16 @@ class IliasPage:
         # and just fetch the lone video url!
         if len(streams) == 1:
             video_url = streams[0]["sources"]["mp4"][0]["src"]
-            return [IliasPageElement(IliasElementType.OPENCAST_VIDEO, video_url, self._source_name)]
+            return [
+                IliasPageElement.create_new(IliasElementType.OPENCAST_VIDEO, video_url, self._source_name)
+            ]
 
         log.explain(f"Found multiple videos for stream at {self._source_name}")
         items = []
         for stream in sorted(streams, key=lambda stream: stream["content"]):
             full_name = f"{self._source_name.replace('.mp4', '')} ({stream['content']}).mp4"
             video_url = stream["sources"]["mp4"][0]["src"]
-            items.append(IliasPageElement(IliasElementType.OPENCAST_VIDEO, video_url, full_name))
+            items.append(IliasPageElement.create_new(IliasElementType.OPENCAST_VIDEO, video_url, full_name))
 
         return items
 
@@ -367,7 +410,7 @@ class IliasPage:
 
         link = self._abs_url_from_link(correct_link)
 
-        return IliasPageElement(IliasElementType.FORUM, link, "show all forum threads")
+        return IliasPageElement.create_new(IliasElementType.FORUM, link, "show all forum threads")
 
     def _find_personal_desktop_entries(self) -> List[IliasPageElement]:
         items: List[IliasPageElement] = []
@@ -394,7 +437,7 @@ class IliasPage:
                 url = re.sub(r"(target=file_\d+)", r"\1_download", url)
                 log.explain("Rewired file URL to include download part")
 
-            items.append(IliasPageElement(type, url, name))
+            items.append(IliasPageElement.create_new(type, url, name))
 
         return items
 
@@ -412,7 +455,7 @@ class IliasPage:
                 log.warn_contd(f"Found unknown content page item {name!r} with url {url!r}")
                 continue
 
-            items.append(IliasPageElement(IliasElementType.FILE, url, name))
+            items.append(IliasPageElement.create_new(IliasElementType.FILE, url, name))
 
         return items
 
@@ -425,7 +468,7 @@ class IliasPage:
                 continue
             if "cmd=sendfile" not in link["href"]:
                 continue
-            items.append(IliasPageElement(
+            items.append(IliasPageElement.create_new(
                 IliasElementType.FILE,
                 self._abs_url_from_link(link),
                 _sanitize_path_name(link.getText())
@@ -453,7 +496,9 @@ class IliasPage:
             query_params = {"limit": "800", "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
             url = url_set_query_params(url, query_params)
             log.explain("Found ILIAS video frame page, fetching actual content next")
-            return [IliasPageElement(IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED, url, "")]
+            return [
+                IliasPageElement.create_new(IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED, url, "")
+            ]
 
         is_paginated = self._soup.find(id=re.compile(r"tab_page_sel.+")) is not None
 
@@ -482,7 +527,7 @@ class IliasPage:
         url = url_set_query_params(self._page_url, query_params)
 
         log.explain("Disabled pagination, retrying folder as a new entry")
-        return [IliasPageElement(IliasElementType.OPENCAST_VIDEO_FOLDER, url, "")]
+        return [IliasPageElement.create_new(IliasElementType.OPENCAST_VIDEO_FOLDER, url, "")]
 
     def _find_opencast_video_entries_no_paging(self) -> List[IliasPageElement]:
         """
@@ -527,7 +572,7 @@ class IliasPage:
         video_url = self._abs_url_from_link(link)
 
         log.explain(f"Found video {video_name!r} at {video_url}")
-        return IliasPageElement(
+        return IliasPageElement.create_new(
             IliasElementType.OPENCAST_VIDEO_PLAYER, video_url, video_name, modification_time
         )
 
@@ -563,7 +608,7 @@ class IliasPage:
             if date is None:
                 log.warn(f"Date parsing failed for exercise entry {name!r}")
 
-            results.append(IliasPageElement(
+            results.append(IliasPageElement.create_new(
                 IliasElementType.FILE,
                 self._abs_url_from_link(link),
                 name,
@@ -600,7 +645,7 @@ class IliasPage:
                 url = self._abs_url_from_link(file_link)
 
                 log.explain(f"Found exercise entry {file_name!r}")
-                results.append(IliasPageElement(
+                results.append(IliasPageElement.create_new(
                     IliasElementType.FILE,
                     url,
                     container_name + "/" + file_name,
@@ -625,7 +670,7 @@ class IliasPage:
                 file_name = _sanitize_path_name(label_container.getText().strip())
                 url = self._abs_url_from_link(listing)
                 log.explain(f"Found exercise detail {file_name!r} at {url}")
-                results.append(IliasPageElement(
+                results.append(IliasPageElement.create_new(
                     IliasElementType.EXERCISE_FILES,
                     url,
                     container_name + "/" + file_name,
@@ -660,16 +705,13 @@ class IliasPage:
 
             if not element_type:
                 continue
-            if element_type == IliasElementType.MEETING:
-                normalized = _sanitize_path_name(self._normalize_meeting_name(element_name))
-                log.explain(f"Normalized meeting name from {element_name!r} to {normalized!r}")
-                element_name = normalized
             elif element_type == IliasElementType.FILE:
                 result.append(self._file_to_element(element_name, abs_url, link))
                 continue
 
             log.explain(f"Found {element_name!r}")
-            result.append(IliasPageElement(element_type, abs_url, element_name, description=description))
+            result.append(IliasPageElement.create_new(
+                element_type, abs_url, element_name, description=description))
 
         result += self._find_cards()
         result += self._find_mediacast_videos()
@@ -692,8 +734,8 @@ class IliasPage:
                 log.warn_contd(f"No <video> element found for mediacast video '{element_name}'")
                 continue
 
-            videos.append(IliasPageElement(
-                type=IliasElementType.MEDIACAST_VIDEO,
+            videos.append(IliasPageElement.create_new(
+                typ=IliasElementType.MEDIACAST_VIDEO,
                 url=self._abs_url_from_relative(video_element.get("src")),
                 name=element_name,
                 mtime=self._find_mediacast_video_mtime(elem.findParent(name="td"))
@@ -815,7 +857,7 @@ class IliasPage:
         full_path = name + "." + file_type
 
         log.explain(f"Found file {full_path!r}")
-        return IliasPageElement(IliasElementType.FILE, url, full_path, modification_date)
+        return IliasPageElement.create_new(IliasElementType.FILE, url, full_path, modification_date)
 
     def _find_cards(self) -> List[IliasPageElement]:
         result: List[IliasPageElement] = []
@@ -832,7 +874,7 @@ class IliasPage:
                 log.warn_contd(f"Could not extract type for {title}")
                 continue
 
-            result.append(IliasPageElement(type, url, name))
+            result.append(IliasPageElement.create_new(type, url, name))
 
         card_button_tiles: List[Tag] = self._soup.select(".card-title button")
 
@@ -861,7 +903,7 @@ class IliasPage:
                 log.warn_contd(f"Could not extract type for {button}")
                 continue
 
-            result.append(IliasPageElement(type, url, name, description=description))
+            result.append(IliasPageElement.create_new(type, url, name, description=description))
 
         return result
 
@@ -1038,33 +1080,6 @@ class IliasPage:
 
         return IliasElementType.FOLDER
 
-    @staticmethod
-    def _normalize_meeting_name(meeting_name: str) -> str:
-        """
-        Normalizes meeting names, which have a relative time as their first part,
-        to their date in ISO format.
-        """
-
-        # This checks whether we can reach a `:` without passing a `-`
-        if re.search(r"^[^-]+: ", meeting_name):
-            # Meeting name only contains date: "05. Jan 2000:"
-            split_delimiter = ":"
-        else:
-            # Meeting name contains date and start/end times: "05. Jan 2000, 16:00 - 17:30:"
-            split_delimiter = ", "
-
-        # We have a meeting day without time
-        date_portion_str = meeting_name.split(split_delimiter)[0]
-        date_portion = demangle_date(date_portion_str)
-
-        # We failed to parse the date, bail out
-        if not date_portion:
-            return meeting_name
-
-        # Replace the first section with the absolute date
-        rest_of_name = split_delimiter.join(meeting_name.split(split_delimiter)[1:])
-        return datetime.strftime(date_portion, "%Y-%m-%d") + split_delimiter + rest_of_name
-
     @staticmethod
     def is_logged_in(soup: BeautifulSoup) -> bool:
         # Normal ILIAS pages

From fc1f68ccd9a18f939b06908e32725d3ee70bc7ee Mon Sep 17 00:00:00 2001
From: Philipp Fruck <dev@p-fruck.de>
Date: Sat, 4 May 2024 17:44:18 +0200
Subject: [PATCH 442/524] refactor: Separate generic and KIT ilias functions

---
 PFERD/crawl/ilias/ilias_web_crawler.py     | 931 ++++++++++++++++++++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 958 +--------------------
 PFERD/utils.py                             |  36 +
 3 files changed, 977 insertions(+), 948 deletions(-)
 create mode 100644 PFERD/crawl/ilias/ilias_web_crawler.py

diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
new file mode 100644
index 0000000..ba7d564
--- /dev/null
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -0,0 +1,931 @@
+import asyncio
+import base64
+import os
+import re
+from collections.abc import Awaitable, Coroutine
+from pathlib import PurePath
+from typing import Any, Dict, List, Literal, Optional, Set, Union, cast
+from urllib.parse import urljoin
+
+import aiohttp
+from aiohttp import hdrs
+from bs4 import BeautifulSoup, Tag
+
+from ...auth import Authenticator
+from ...config import Config
+from ...logging import ProgressBar, log
+from ...output_dir import FileSink, Redownload
+from ...utils import _iorepeat, fmt_path, soupify, url_set_query_param
+from ..crawler import CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
+from ..http_crawler import HttpCrawler, HttpCrawlerSection
+from .file_templates import Links, learning_module_template
+from .ilias_html_cleaner import clean, insert_base_markup
+from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
+                             IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
+
+TargetType = Union[str, int]
+_ILIAS_URL = "https://ilias.studium.kit.edu"
+
+
+class IliasWebCrawlerSection(HttpCrawlerSection):
+    def target(self) -> TargetType:
+        target = self.s.get("target")
+        if not target:
+            self.missing_value("target")
+
+        if re.fullmatch(r"\d+", target):
+            # Course id
+            return int(target)
+        if target == "desktop":
+            # Full personal desktop
+            return target
+        if target.startswith(_ILIAS_URL):
+            # ILIAS URL
+            return target
+
+        self.invalid_value("target", target, "Should be <course id | desktop | kit ilias URL>")
+
+    def links(self) -> Links:
+        type_str: Optional[str] = self.s.get("links")
+
+        if type_str is None:
+            return Links.FANCY
+
+        try:
+            return Links.from_string(type_str)
+        except ValueError as e:
+            self.invalid_value("links", type_str, str(e).capitalize())
+
+    def link_redirect_delay(self) -> int:
+        return self.s.getint("link_redirect_delay", fallback=-1)
+
+    def videos(self) -> bool:
+        return self.s.getboolean("videos", fallback=False)
+
+    def forums(self) -> bool:
+        return self.s.getboolean("forums", fallback=False)
+
+
+_DIRECTORY_PAGES: Set[IliasElementType] = {
+    IliasElementType.EXERCISE,
+    IliasElementType.EXERCISE_FILES,
+    IliasElementType.FOLDER,
+    IliasElementType.INFO_TAB,
+    IliasElementType.MEETING,
+    IliasElementType.MEDIACAST_VIDEO_FOLDER,
+    IliasElementType.OPENCAST_VIDEO_FOLDER,
+    IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
+}
+
+_VIDEO_ELEMENTS: Set[IliasElementType] = {
+    IliasElementType.MEDIACAST_VIDEO_FOLDER,
+    IliasElementType.MEDIACAST_VIDEO,
+    IliasElementType.OPENCAST_VIDEO,
+    IliasElementType.OPENCAST_VIDEO_PLAYER,
+    IliasElementType.OPENCAST_VIDEO_FOLDER,
+    IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
+}
+
+
+def _get_video_cache_key(element: IliasPageElement) -> str:
+    return f"ilias-video-cache-{element.id()}"
+
+
+# Crawler control flow:
+#
+#     crawl_desktop -+
+#                    |
+#     crawl_course --+
+#                    |
+#     @_io_repeat    |        # retries internally (before the bar)
+#  +- crawl_url    <-+
+#  |
+#  |
+#  |  @_wrap_io_exception     # does not need to retry as children acquire bars
+#  +> crawl_ilias_element -+
+#  ^                       |
+#  |  @_io_repeat          |  # retries internally (before the bar)
+#  +- crawl_ilias_page <---+
+#  |                       |
+#  +> get_page             |  # Handles and retries authentication
+#                          |
+#     @_io_repeat          |  # retries internally (before the bar)
+#  +- download_link    <---+
+#  |                       |
+#  +> resolve_target       |  # Handles and retries authentication
+#                          |
+#     @_io_repeat          |  # retries internally (before the bar)
+#  +- download_video   <---+
+#  |                       |
+#  |  @_io_repeat          |  # retries internally (before the bar)
+#  +- download_file    <---+
+#  |
+#  +> stream_from_url         # Handles and retries authentication
+class IliasWebCrawler(HttpCrawler):
+    def __init__(
+        self,
+        name: str,
+        section: IliasWebCrawlerSection,
+        config: Config,
+        authenticators: Dict[str, Authenticator]
+    ):
+        # Setting a main authenticator for cookie sharing
+        auth = section.auth(authenticators)
+        super().__init__(name, section, config, shared_auth=auth)
+
+        if section.tasks() > 1:
+            log.warn("""
+Please avoid using too many parallel requests as these are the KIT ILIAS
+instance's greatest bottleneck.
+            """.strip())
+
+        self._auth = auth
+        self._base_url = _ILIAS_URL
+
+        self._target = section.target()
+        self._link_file_redirect_delay = section.link_redirect_delay()
+        self._links = section.links()
+        self._videos = section.videos()
+        self._forums = section.forums()
+        self._visited_urls: Dict[str, PurePath] = dict()
+
+    async def _run(self) -> None:
+        if isinstance(self._target, int):
+            log.explain_topic(f"Inferred crawl target: Course with id {self._target}")
+            await self._crawl_course(self._target)
+        elif self._target == "desktop":
+            log.explain_topic("Inferred crawl target: Personal desktop")
+            await self._crawl_desktop()
+        else:
+            log.explain_topic(f"Inferred crawl target: URL {self._target}")
+            await self._crawl_url(self._target)
+
+    async def _crawl_course(self, course_id: int) -> None:
+        # Start crawling at the given course
+        root_url = url_set_query_param(
+            self._base_url + "/goto.php", "target", f"crs_{course_id}"
+        )
+
+        await self._crawl_url(root_url, expected_id=course_id)
+
+    async def _crawl_desktop(self) -> None:
+        appendix = r"ILIAS\Repository\Provider\RepositoryMainBarProvider|mm_pd_sel_items"
+        appendix = appendix.encode("ASCII").hex()
+        await self._crawl_url(self._base_url + "/gs_content.php?item=" + appendix)
+
+    async def _crawl_url(self, url: str, expected_id: Optional[int] = None) -> None:
+        maybe_cl = await self.crawl(PurePath("."))
+        if not maybe_cl:
+            return
+        cl = maybe_cl  # Not mypy's fault, but explained here: https://github.com/python/mypy/issues/2608
+
+        elements: List[IliasPageElement] = []
+        # A list as variable redefinitions are not propagated to outer scopes
+        description: List[BeautifulSoup] = []
+
+        @_iorepeat(3, "crawling url")
+        async def gather_elements() -> None:
+            elements.clear()
+            async with cl:
+                next_stage_url: Optional[str] = url
+                current_parent = None
+
+                # Duplicated code, but the root page is special - we want to avoid fetching it twice!
+                while next_stage_url:
+                    soup = await self._get_page(next_stage_url, root_page_allowed=True)
+
+                    if current_parent is None and expected_id is not None:
+                        perma_link = IliasPage.get_soup_permalink(soup)
+                        if not perma_link or "crs_" not in perma_link:
+                            raise CrawlError("Invalid course id? Didn't find anything looking like a course")
+
+                    log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
+                    log.explain(f"URL: {next_stage_url}")
+                    page = IliasPage(soup, next_stage_url, current_parent)
+                    if next_element := page.get_next_stage_element():
+                        current_parent = next_element
+                        next_stage_url = next_element.url
+                    else:
+                        next_stage_url = None
+
+                elements.extend(page.get_child_elements())
+                if info_tab := page.get_info_tab():
+                    elements.append(info_tab)
+                if description_string := page.get_description():
+                    description.append(description_string)
+
+        # Fill up our task list with the found elements
+        await gather_elements()
+
+        if description:
+            await self._download_description(PurePath("."), description[0])
+
+        elements.sort(key=lambda e: e.id())
+
+        tasks: List[Awaitable[None]] = []
+        for element in elements:
+            if handle := await self._handle_ilias_element(PurePath("."), element):
+                tasks.append(asyncio.create_task(handle))
+
+        # And execute them
+        await self.gather(tasks)
+
+    async def _handle_ilias_page(
+        self,
+        url: str,
+        parent: IliasPageElement,
+        path: PurePath,
+    ) -> Optional[Coroutine[Any, Any, None]]:
+        maybe_cl = await self.crawl(path)
+        if not maybe_cl:
+            return None
+        return self._crawl_ilias_page(url, parent, maybe_cl)
+
+    @anoncritical
+    async def _crawl_ilias_page(
+        self,
+        url: str,
+        parent: IliasPageElement,
+        cl: CrawlToken,
+    ) -> None:
+        elements: List[IliasPageElement] = []
+        # A list as variable redefinitions are not propagated to outer scopes
+        description: List[BeautifulSoup] = []
+
+        @_iorepeat(3, "crawling folder")
+        async def gather_elements() -> None:
+            elements.clear()
+            async with cl:
+                next_stage_url: Optional[str] = url
+                current_parent = parent
+
+                while next_stage_url:
+                    soup = await self._get_page(next_stage_url)
+                    log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
+                    log.explain(f"URL: {next_stage_url}")
+                    page = IliasPage(soup, next_stage_url, current_parent)
+                    if next_element := page.get_next_stage_element():
+                        current_parent = next_element
+                        next_stage_url = next_element.url
+                    else:
+                        next_stage_url = None
+
+                elements.extend(page.get_child_elements())
+                if description_string := page.get_description():
+                    description.append(description_string)
+
+        # Fill up our task list with the found elements
+        await gather_elements()
+
+        if description:
+            await self._download_description(cl.path, description[0])
+
+        elements.sort(key=lambda e: e.id())
+
+        tasks: List[Awaitable[None]] = []
+        for element in elements:
+            if handle := await self._handle_ilias_element(cl.path, element):
+                tasks.append(asyncio.create_task(handle))
+
+        # And execute them
+        await self.gather(tasks)
+
+    # These decorators only apply *to this method* and *NOT* to the returned
+    # awaitables!
+    # This method does not await the handlers but returns them instead.
+    # This ensures one level is handled at a time and name deduplication
+    # works correctly.
+    @anoncritical
+    async def _handle_ilias_element(
+        self,
+        parent_path: PurePath,
+        element: IliasPageElement,
+    ) -> Optional[Coroutine[Any, Any, None]]:
+        if element.url in self._visited_urls:
+            raise CrawlWarning(
+                f"Found second path to element {element.name!r} at {element.url!r}. "
+                + f"First path: {fmt_path(self._visited_urls[element.url])}. "
+                + f"Second path: {fmt_path(parent_path)}."
+            )
+        self._visited_urls[element.url] = parent_path
+
+        element_path = PurePath(parent_path, element.name)
+
+        if element.type in _VIDEO_ELEMENTS:
+            if not self._videos:
+                log.status(
+                    "[bold bright_black]",
+                    "Ignored",
+                    fmt_path(element_path),
+                    "[bright_black](enable with option 'videos')"
+                )
+                return None
+
+        if element.type == IliasElementType.FILE:
+            return await self._handle_file(element, element_path)
+        elif element.type == IliasElementType.FORUM:
+            if not self._forums:
+                log.status(
+                    "[bold bright_black]",
+                    "Ignored",
+                    fmt_path(element_path),
+                    "[bright_black](enable with option 'forums')"
+                )
+                return None
+            return await self._handle_forum(element, element_path)
+        elif element.type == IliasElementType.TEST:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](tests contain no relevant data)"
+            )
+            return None
+        elif element.type == IliasElementType.SURVEY:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](surveys contain no relevant data)"
+            )
+            return None
+        elif element.type == IliasElementType.SCORM_LEARNING_MODULE:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](scorm learning modules are not supported)"
+            )
+            return None
+        elif element.type == IliasElementType.LEARNING_MODULE:
+            return await self._handle_learning_module(element, element_path)
+        elif element.type == IliasElementType.LINK:
+            return await self._handle_link(element, element_path)
+        elif element.type == IliasElementType.BOOKING:
+            return await self._handle_booking(element, element_path)
+        elif element.type == IliasElementType.OPENCAST_VIDEO:
+            return await self._handle_file(element, element_path)
+        elif element.type == IliasElementType.OPENCAST_VIDEO_PLAYER:
+            return await self._handle_opencast_video(element, element_path)
+        elif element.type == IliasElementType.MEDIACAST_VIDEO:
+            return await self._handle_file(element, element_path)
+        elif element.type in _DIRECTORY_PAGES:
+            return await self._handle_ilias_page(element.url, element, element_path)
+        else:
+            # This will retry it a few times, failing everytime. It doesn't make any network
+            # requests, so that's fine.
+            raise CrawlWarning(f"Unknown element type: {element.type!r}")
+
+    async def _handle_link(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Coroutine[Any, Any, None]]:
+        log.explain_topic(f"Decision: Crawl Link {fmt_path(element_path)}")
+        log.explain(f"Links type is {self._links}")
+
+        link_template_maybe = self._links.template()
+        link_extension = self._links.extension()
+        if not link_template_maybe or not link_extension:
+            log.explain("Answer: No")
+            return None
+        else:
+            log.explain("Answer: Yes")
+        element_path = element_path.with_name(element_path.name + link_extension)
+
+        maybe_dl = await self.download(element_path, mtime=element.mtime)
+        if not maybe_dl:
+            return None
+
+        return self._download_link(element, link_template_maybe, maybe_dl)
+
+    @anoncritical
+    @_iorepeat(3, "resolving link")
+    async def _download_link(self, element: IliasPageElement, link_template: str, dl: DownloadToken) -> None:
+        async with dl as (bar, sink):
+            export_url = element.url.replace("cmd=calldirectlink", "cmd=exportHTML")
+            real_url = await self._resolve_link_target(export_url)
+            self._write_link_content(link_template, real_url, element.name, element.description, sink)
+
+    def _write_link_content(
+        self,
+        link_template: str,
+        url: str,
+        name: str,
+        description: Optional[str],
+        sink: FileSink,
+    ) -> None:
+        content = link_template
+        content = content.replace("{{link}}", url)
+        content = content.replace("{{name}}", name)
+        content = content.replace("{{description}}", str(description))
+        content = content.replace("{{redirect_delay}}", str(self._link_file_redirect_delay))
+        sink.file.write(content.encode("utf-8"))
+        sink.done()
+
+    async def _handle_booking(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Coroutine[Any, Any, None]]:
+        log.explain_topic(f"Decision: Crawl Booking Link {fmt_path(element_path)}")
+        log.explain(f"Links type is {self._links}")
+
+        link_template_maybe = self._links.template()
+        link_extension = self._links.extension()
+        if not link_template_maybe or not link_extension:
+            log.explain("Answer: No")
+            return None
+        else:
+            log.explain("Answer: Yes")
+        element_path = element_path.with_name(element_path.name + link_extension)
+
+        maybe_dl = await self.download(element_path, mtime=element.mtime)
+        if not maybe_dl:
+            return None
+
+        return self._download_booking(element, link_template_maybe, maybe_dl)
+
+    @anoncritical
+    @_iorepeat(1, "downloading description")
+    async def _download_description(self, parent_path: PurePath, description: BeautifulSoup) -> None:
+        path = parent_path / "Description.html"
+        dl = await self.download(path, redownload=Redownload.ALWAYS)
+        if not dl:
+            return
+
+        async with dl as (bar, sink):
+            description = clean(insert_base_markup(description))
+            sink.file.write(description.prettify().encode("utf-8"))
+            sink.done()
+
+    @anoncritical
+    @_iorepeat(3, "resolving booking")
+    async def _download_booking(
+        self,
+        element: IliasPageElement,
+        link_template: str,
+        dl: DownloadToken,
+    ) -> None:
+        async with dl as (bar, sink):
+            self._write_link_content(link_template, element.url, element.name, element.description, sink)
+
+    async def _resolve_link_target(self, export_url: str) -> str:
+        async with self.session.get(export_url, allow_redirects=False) as resp:
+            # No redirect means we were authenticated
+            if hdrs.LOCATION not in resp.headers:
+                return soupify(await resp.read()).select_one("a").get("href").strip()
+
+        await self._authenticate()
+
+        async with self.session.get(export_url, allow_redirects=False) as resp:
+            # No redirect means we were authenticated
+            if hdrs.LOCATION not in resp.headers:
+                return soupify(await resp.read()).select_one("a").get("href").strip()
+
+        raise CrawlError("resolve_link_target failed even after authenticating")
+
+    async def _handle_opencast_video(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Coroutine[Any, Any, None]]:
+        # Copy old mapping as it is likely still relevant
+        if self.prev_report:
+            self.report.add_custom_value(
+                _get_video_cache_key(element),
+                self.prev_report.get_custom_value(_get_video_cache_key(element))
+            )
+
+        # A video might contain other videos, so let's "crawl" the video first
+        # to ensure rate limits apply. This must be a download as *this token*
+        # is re-used if the video consists of a single stream. In that case the
+        # file name is used and *not* the stream name the ilias html parser reported
+        # to ensure backwards compatibility.
+        maybe_dl = await self.download(element_path, mtime=element.mtime, redownload=Redownload.ALWAYS)
+
+        # If we do not want to crawl it (user filter), we can move on
+        if not maybe_dl:
+            return None
+
+        # If we have every file from the cached mapping already, we can ignore this and bail
+        if self._all_opencast_videos_locally_present(element, maybe_dl.path):
+            # Mark all existing videos as known to ensure they do not get deleted during cleanup.
+            # We "downloaded" them, just without actually making a network request as we assumed
+            # they did not change.
+            contained = self._previous_contained_opencast_videos(element, maybe_dl.path)
+            if len(contained) > 1:
+                # Only do this if we threw away the original dl token,
+                # to not download single-stream videos twice
+                for video in contained:
+                    await self.download(video)
+
+            return None
+
+        return self._download_opencast_video(element, maybe_dl)
+
+    def _previous_contained_opencast_videos(
+        self, element: IliasPageElement, element_path: PurePath
+    ) -> List[PurePath]:
+        if not self.prev_report:
+            return []
+        custom_value = self.prev_report.get_custom_value(_get_video_cache_key(element))
+        if not custom_value:
+            return []
+        cached_value = cast(dict[str, Any], custom_value)
+        if "known_paths" not in cached_value or "own_path" not in cached_value:
+            log.explain(f"'known_paths' or 'own_path' missing from cached value: {cached_value}")
+            return []
+        transformed_own_path = self._transformer.transform(element_path)
+        if cached_value["own_path"] != str(transformed_own_path):
+            log.explain(
+                f"own_path '{transformed_own_path}' does not match cached value: '{cached_value['own_path']}"
+            )
+            return []
+        return [PurePath(name) for name in cached_value["known_paths"]]
+
+    def _all_opencast_videos_locally_present(self, element: IliasPageElement, element_path: PurePath) -> bool:
+        log.explain_topic(f"Checking local cache for video {fmt_path(element_path)}")
+        if contained_videos := self._previous_contained_opencast_videos(element, element_path):
+            log.explain(
+                f"The following contained videos are known: {','.join(map(fmt_path, contained_videos))}"
+            )
+            if all(self._output_dir.resolve(path).exists() for path in contained_videos):
+                log.explain("Found all known videos locally, skipping enumeration request")
+                return True
+            log.explain("Missing at least one video, continuing with requests!")
+        else:
+            log.explain("No local cache present")
+        return False
+
+    @anoncritical
+    @_iorepeat(3, "downloading video")
+    async def _download_opencast_video(self, element: IliasPageElement, dl: DownloadToken) -> None:
+        def add_to_report(paths: list[str]) -> None:
+            self.report.add_custom_value(
+                _get_video_cache_key(element),
+                {"known_paths": paths, "own_path": str(self._transformer.transform(dl.path))}
+            )
+
+        async with dl as (bar, sink):
+            page = IliasPage(await self._get_page(element.url), element.url, element)
+            stream_elements = page.get_child_elements()
+
+            if len(stream_elements) > 1:
+                log.explain(f"Found multiple video streams for {element.name}")
+            else:
+                log.explain(f"Using single video mode for {element.name}")
+                stream_element = stream_elements[0]
+
+                # We do not have a local cache yet
+                await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
+                add_to_report([str(self._transformer.transform(dl.path))])
+                return
+
+        contained_video_paths: List[str] = []
+
+        for stream_element in stream_elements:
+            video_path = dl.path.parent / stream_element.name
+
+            maybe_dl = await self.download(video_path, mtime=element.mtime, redownload=Redownload.NEVER)
+            if not maybe_dl:
+                continue
+            async with maybe_dl as (bar, sink):
+                log.explain(f"Streaming video from real url {stream_element.url}")
+                contained_video_paths.append(str(self._transformer.transform(maybe_dl.path)))
+                await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
+
+        add_to_report(contained_video_paths)
+
+    async def _handle_file(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Coroutine[Any, Any, None]]:
+        maybe_dl = await self.download(element_path, mtime=element.mtime)
+        if not maybe_dl:
+            return None
+        return self._download_file(element, maybe_dl)
+
+    @_iorepeat(3, "downloading file")
+    @anoncritical
+    async def _download_file(self, element: IliasPageElement, dl: DownloadToken) -> None:
+        assert dl  # The function is only reached when dl is not None
+        async with dl as (bar, sink):
+            await self._stream_from_url(element.url, sink, bar, is_video=False)
+
+    async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar, is_video: bool) -> None:
+        async def try_stream() -> bool:
+            next_url = url
+
+            # Normal files redirect to the magazine if we are not authenticated. As files could be HTML,
+            # we can not match on the content type here. Instead, we disallow redirects and inspect the
+            # new location. If we are redirected anywhere but the ILIAS 8 "sendfile" command, we assume
+            # our authentication expired.
+            if not is_video:
+                async with self.session.get(url, allow_redirects=False) as resp:
+                    # Redirect to anything except a "sendfile" means we weren't authenticated
+                    if hdrs.LOCATION in resp.headers:
+                        if "&cmd=sendfile" not in resp.headers[hdrs.LOCATION]:
+                            return False
+                        # Directly follow the redirect to not make a second, unnecessary request
+                        next_url = resp.headers[hdrs.LOCATION]
+
+            # Let's try this again and follow redirects
+            return await fetch_follow_redirects(next_url)
+
+        async def fetch_follow_redirects(file_url: str) -> bool:
+            async with self.session.get(file_url) as resp:
+                # We wanted a video but got HTML => Forbidden, auth expired. Logging in won't really
+                # solve that depending on the setup, but it is better than nothing.
+                if is_video and "html" in resp.content_type:
+                    return False
+
+                if resp.content_length:
+                    bar.set_total(resp.content_length)
+
+                async for data in resp.content.iter_chunked(1024):
+                    sink.file.write(data)
+                    bar.advance(len(data))
+
+                sink.done()
+            return True
+
+        auth_id = await self._current_auth_id()
+        if await try_stream():
+            return
+
+        await self.authenticate(auth_id)
+
+        if not await try_stream():
+            raise CrawlError("File streaming failed after authenticate()")
+
+    async def _handle_forum(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Coroutine[Any, Any, None]]:
+        maybe_cl = await self.crawl(element_path)
+        if not maybe_cl:
+            return None
+        return self._crawl_forum(element, maybe_cl)
+
+    @_iorepeat(3, "crawling forum")
+    @anoncritical
+    async def _crawl_forum(self, element: IliasPageElement, cl: CrawlToken) -> None:
+        elements: List[IliasForumThread] = []
+
+        async with cl:
+            next_stage_url = element.url
+            while next_stage_url:
+                log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
+                log.explain(f"URL: {next_stage_url}")
+
+                soup = await self._get_page(next_stage_url)
+                page = IliasPage(soup, next_stage_url, element)
+
+                if next := page.get_next_stage_element():
+                    next_stage_url = next.url
+                else:
+                    break
+
+            download_data = page.get_download_forum_data()
+            if not download_data:
+                raise CrawlWarning("Failed to extract forum data")
+            if download_data.empty:
+                log.explain("Forum had no threads")
+                return
+            html = await self._post_authenticated(download_data.url, download_data.form_data)
+            elements = parse_ilias_forum_export(soupify(html))
+
+        elements.sort(key=lambda elem: elem.title)
+
+        tasks: List[Awaitable[None]] = []
+        for elem in elements:
+            tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, elem)))
+
+        # And execute them
+        await self.gather(tasks)
+
+    @anoncritical
+    @_iorepeat(3, "saving forum thread")
+    async def _download_forum_thread(
+        self,
+        parent_path: PurePath,
+        element: IliasForumThread,
+    ) -> None:
+        path = parent_path / (_sanitize_path_name(element.title) + ".html")
+        maybe_dl = await self.download(path, mtime=element.mtime)
+        if not maybe_dl:
+            return
+
+        async with maybe_dl as (bar, sink):
+            content = element.title_tag.prettify()
+            content += element.content_tag.prettify()
+            sink.file.write(content.encode("utf-8"))
+            sink.done()
+
+    async def _handle_learning_module(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Coroutine[Any, Any, None]]:
+        maybe_cl = await self.crawl(element_path)
+        if not maybe_cl:
+            return None
+        return self._crawl_learning_module(element, maybe_cl)
+
+    @_iorepeat(3, "crawling learning module")
+    @anoncritical
+    async def _crawl_learning_module(self, element: IliasPageElement, cl: CrawlToken) -> None:
+        elements: List[IliasLearningModulePage] = []
+
+        async with cl:
+            log.explain_topic(f"Parsing initial HTML page for {fmt_path(cl.path)}")
+            log.explain(f"URL: {element.url}")
+            soup = await self._get_page(element.url)
+            page = IliasPage(soup, element.url, element)
+            if next := page.get_learning_module_data():
+                elements.extend(await self._crawl_learning_module_direction(
+                    cl.path, next.previous_url, "left", element
+                ))
+                elements.append(next)
+                elements.extend(await self._crawl_learning_module_direction(
+                    cl.path, next.next_url, "right", element
+                ))
+
+        # Reflect their natural ordering in the file names
+        for index, lm_element in enumerate(elements):
+            lm_element.title = f"{index:02}_{lm_element.title}"
+
+        tasks: List[Awaitable[None]] = []
+        for index, elem in enumerate(elements):
+            prev_url = elements[index - 1].title if index > 0 else None
+            next_url = elements[index + 1].title if index < len(elements) - 1 else None
+            tasks.append(asyncio.create_task(
+                self._download_learning_module_page(cl.path, elem, prev_url, next_url)
+            ))
+
+        # And execute them
+        await self.gather(tasks)
+
+    async def _crawl_learning_module_direction(
+        self,
+        path: PurePath,
+        start_url: Optional[str],
+        dir: Union[Literal["left"], Literal["right"]],
+        parent_element: IliasPageElement
+    ) -> List[IliasLearningModulePage]:
+        elements: List[IliasLearningModulePage] = []
+
+        if not start_url:
+            return elements
+
+        next_element_url: Optional[str] = start_url
+        counter = 0
+        while next_element_url:
+            log.explain_topic(f"Parsing HTML page for {fmt_path(path)} ({dir}-{counter})")
+            log.explain(f"URL: {next_element_url}")
+            soup = await self._get_page(next_element_url)
+            page = IliasPage(soup, next_element_url, parent_element)
+            if next := page.get_learning_module_data():
+                elements.append(next)
+                if dir == "left":
+                    next_element_url = next.previous_url
+                else:
+                    next_element_url = next.next_url
+            counter += 1
+
+        return elements
+
+    @anoncritical
+    @_iorepeat(3, "saving learning module page")
+    async def _download_learning_module_page(
+        self,
+        parent_path: PurePath,
+        element: IliasLearningModulePage,
+        prev: Optional[str],
+        next: Optional[str]
+    ) -> None:
+        path = parent_path / (_sanitize_path_name(element.title) + ".html")
+        maybe_dl = await self.download(path)
+        if not maybe_dl:
+            return
+        my_path = self._transformer.transform(maybe_dl.path)
+        if not my_path:
+            return
+
+        if prev:
+            prev_p = self._transformer.transform(parent_path / (_sanitize_path_name(prev) + ".html"))
+            if prev_p:
+                prev = os.path.relpath(prev_p, my_path.parent)
+            else:
+                prev = None
+        if next:
+            next_p = self._transformer.transform(parent_path / (_sanitize_path_name(next) + ".html"))
+            if next_p:
+                next = os.path.relpath(next_p, my_path.parent)
+            else:
+                next = None
+
+        async with maybe_dl as (bar, sink):
+            content = element.content
+            content = await self.internalize_images(content)
+            sink.file.write(learning_module_template(content, maybe_dl.path.name, prev, next).encode("utf-8"))
+            sink.done()
+
+    async def internalize_images(self, tag: Tag) -> Tag:
+        """
+        Tries to fetch ILIAS images and embed them as base64 data.
+        """
+        log.explain_topic("Internalizing images")
+        for elem in tag.find_all(recursive=True):
+            if not isinstance(elem, Tag):
+                continue
+            if elem.name == "img":
+                if src := elem.attrs.get("src", None):
+                    url = urljoin(_ILIAS_URL, src)
+                    if not url.startswith(_ILIAS_URL):
+                        continue
+                    log.explain(f"Internalizing {url!r}")
+                    img = await self._get_authenticated(url)
+                    elem.attrs["src"] = "data:;base64," + base64.b64encode(img).decode()
+            if elem.name == "iframe" and elem.attrs.get("src", "").startswith("//"):
+                # For unknown reasons the protocol seems to be stripped.
+                elem.attrs["src"] = "https:" + elem.attrs["src"]
+        return tag
+
+    async def _get_page(self, url: str, root_page_allowed: bool = False) -> BeautifulSoup:
+        auth_id = await self._current_auth_id()
+        async with self.session.get(url) as request:
+            soup = soupify(await request.read())
+            if IliasPage.is_logged_in(soup):
+                return self._verify_page(soup, url, root_page_allowed)
+
+        # We weren't authenticated, so try to do that
+        await self.authenticate(auth_id)
+
+        # Retry once after authenticating. If this fails, we will die.
+        async with self.session.get(url) as request:
+            soup = soupify(await request.read())
+            if IliasPage.is_logged_in(soup):
+                return self._verify_page(soup, url, root_page_allowed)
+        raise CrawlError(f"get_page failed even after authenticating on {url!r}")
+
+    @staticmethod
+    def _verify_page(soup: BeautifulSoup, url: str, root_page_allowed: bool) -> BeautifulSoup:
+        if IliasPage.is_root_page(soup) and not root_page_allowed:
+            raise CrawlError(
+                "Unexpectedly encountered ILIAS root page. "
+                "This usually happens because the ILIAS instance is broken. "
+                "If so, wait a day or two and try again. "
+                "It could also happen because a crawled element links to the ILIAS root page. "
+                "If so, use a transform with a ! as target to ignore the particular element. "
+                f"The redirect came from {url}"
+            )
+        return soup
+
+    async def _post_authenticated(
+        self,
+        url: str,
+        data: dict[str, Union[str, List[str]]]
+    ) -> bytes:
+        auth_id = await self._current_auth_id()
+
+        form_data = aiohttp.FormData()
+        for key, val in data.items():
+            form_data.add_field(key, val)
+
+        async with self.session.post(url, data=form_data(), allow_redirects=False) as request:
+            if request.status == 200:
+                return await request.read()
+
+        # We weren't authenticated, so try to do that
+        await self.authenticate(auth_id)
+
+        # Retry once after authenticating. If this fails, we will die.
+        async with self.session.post(url, data=data, allow_redirects=False) as request:
+            if request.status == 200:
+                return await request.read()
+        raise CrawlError("post_authenticated failed even after authenticating")
+
+    async def _get_authenticated(self, url: str) -> bytes:
+        auth_id = await self._current_auth_id()
+
+        async with self.session.get(url, allow_redirects=False) as request:
+            if request.status == 200:
+                return await request.read()
+
+        # We weren't authenticated, so try to do that
+        await self.authenticate(auth_id)
+
+        # Retry once after authenticating. If this fails, we will die.
+        async with self.session.get(url, allow_redirects=False) as request:
+            if request.status == 200:
+                return await request.read()
+        raise CrawlError("get_authenticated failed even after authenticating")
+
+    # ToDo: Is this still required?
+    @_iorepeat(3, "Login", failure_is_error=True)
+    async def _authenticate(self) -> None:
+        pass
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 371ffb3..e9d1475 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -1,28 +1,15 @@
-import asyncio
-import base64
-import os
-import re
-from collections.abc import Awaitable, Coroutine
-from pathlib import PurePath
-from typing import Any, Callable, Dict, List, Literal, Optional, Set, Union, cast
-from urllib.parse import urljoin
+from typing import Any, Dict, Optional, Union
 
 import aiohttp
 import yarl
-from aiohttp import hdrs
-from bs4 import BeautifulSoup, Tag
+from bs4 import BeautifulSoup
 
 from ...auth import Authenticator, TfaAuthenticator
 from ...config import Config
-from ...logging import ProgressBar, log
-from ...output_dir import FileSink, Redownload
-from ...utils import fmt_path, soupify, url_set_query_param
-from ..crawler import AWrapped, CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
-from ..http_crawler import HttpCrawler, HttpCrawlerSection
-from .file_templates import Links, learning_module_template
-from .ilias_html_cleaner import clean, insert_base_markup
-from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
-                             IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
+from ...logging import log
+from ...utils import _iorepeat, soupify
+from ..crawler import CrawlError, CrawlWarning
+from .ilias_web_crawler import IliasWebCrawler, IliasWebCrawlerSection
 
 TargetType = Union[str, int]
 
@@ -33,24 +20,7 @@ class KitShibbolethBackgroundLoginSuccessful():
     pass
 
 
-class KitIliasWebCrawlerSection(HttpCrawlerSection):
-    def target(self) -> TargetType:
-        target = self.s.get("target")
-        if not target:
-            self.missing_value("target")
-
-        if re.fullmatch(r"\d+", target):
-            # Course id
-            return int(target)
-        if target == "desktop":
-            # Full personal desktop
-            return target
-        if target.startswith(_ILIAS_URL):
-            # ILIAS URL
-            return target
-
-        self.invalid_value("target", target, "Should be <course id | desktop | kit ilias URL>")
-
+class KitIliasWebCrawlerSection(IliasWebCrawlerSection):
     def tfa_auth(self, authenticators: Dict[str, Authenticator]) -> Optional[Authenticator]:
         value: Optional[str] = self.s.get("tfa_auth")
         if value is None:
@@ -60,123 +30,8 @@ class KitIliasWebCrawlerSection(HttpCrawlerSection):
             self.invalid_value("tfa_auth", value, "No such auth section exists")
         return auth
 
-    def links(self) -> Links:
-        type_str: Optional[str] = self.s.get("links")
 
-        if type_str is None:
-            return Links.FANCY
-
-        try:
-            return Links.from_string(type_str)
-        except ValueError as e:
-            self.invalid_value("links", type_str, str(e).capitalize())
-
-    def link_redirect_delay(self) -> int:
-        return self.s.getint("link_redirect_delay", fallback=-1)
-
-    def videos(self) -> bool:
-        return self.s.getboolean("videos", fallback=False)
-
-    def forums(self) -> bool:
-        return self.s.getboolean("forums", fallback=False)
-
-
-_DIRECTORY_PAGES: Set[IliasElementType] = {
-    IliasElementType.EXERCISE,
-    IliasElementType.EXERCISE_FILES,
-    IliasElementType.FOLDER,
-    IliasElementType.INFO_TAB,
-    IliasElementType.MEETING,
-    IliasElementType.MEDIACAST_VIDEO_FOLDER,
-    IliasElementType.OPENCAST_VIDEO_FOLDER,
-    IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
-}
-
-_VIDEO_ELEMENTS: Set[IliasElementType] = {
-    IliasElementType.MEDIACAST_VIDEO_FOLDER,
-    IliasElementType.MEDIACAST_VIDEO,
-    IliasElementType.OPENCAST_VIDEO,
-    IliasElementType.OPENCAST_VIDEO_PLAYER,
-    IliasElementType.OPENCAST_VIDEO_FOLDER,
-    IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
-}
-
-
-def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Callable[[AWrapped], AWrapped]:
-    def decorator(f: AWrapped) -> AWrapped:
-        async def wrapper(*args: Any, **kwargs: Any) -> Optional[Any]:
-            last_exception: Optional[BaseException] = None
-            for round in range(attempts):
-                try:
-                    return await f(*args, **kwargs)
-                except aiohttp.ContentTypeError:  # invalid content type
-                    raise CrawlWarning("ILIAS returned an invalid content type")
-                except aiohttp.TooManyRedirects:
-                    raise CrawlWarning("Got stuck in a redirect loop")
-                except aiohttp.ClientPayloadError as e:  # encoding or not enough bytes
-                    last_exception = e
-                except aiohttp.ClientConnectionError as e:  # e.g. timeout, disconnect, resolve failed, etc.
-                    last_exception = e
-                except asyncio.exceptions.TimeoutError as e:  # explicit http timeouts in HttpCrawler
-                    last_exception = e
-                log.explain_topic(f"Retrying operation {name}. Retries left: {attempts - 1 - round}")
-
-            if last_exception:
-                message = f"Error in I/O Operation: {last_exception}"
-                if failure_is_error:
-                    raise CrawlError(message) from last_exception
-                else:
-                    raise CrawlWarning(message) from last_exception
-            raise CrawlError("Impossible return in ilias _iorepeat")
-
-        return wrapper  # type: ignore
-
-    return decorator
-
-
-def _wrap_io_in_warning(name: str) -> Callable[[AWrapped], AWrapped]:
-    """
-    Wraps any I/O exception in a CrawlWarning.
-    """
-    return _iorepeat(1, name)
-
-
-def _get_video_cache_key(element: IliasPageElement) -> str:
-    return f"ilias-video-cache-{element.id()}"
-
-
-# Crawler control flow:
-#
-#     crawl_desktop -+
-#                    |
-#     crawl_course --+
-#                    |
-#     @_io_repeat    |        # retries internally (before the bar)
-#  +- crawl_url    <-+
-#  |
-#  |
-#  |  @_wrap_io_exception     # does not need to retry as children acquire bars
-#  +> crawl_ilias_element -+
-#  ^                       |
-#  |  @_io_repeat          |  # retries internally (before the bar)
-#  +- crawl_ilias_page <---+
-#  |                       |
-#  +> get_page             |  # Handles and retries authentication
-#                          |
-#     @_io_repeat          |  # retries internally (before the bar)
-#  +- download_link    <---+
-#  |                       |
-#  +> resolve_target       |  # Handles and retries authentication
-#                          |
-#     @_io_repeat          |  # retries internally (before the bar)
-#  +- download_video   <---+
-#  |                       |
-#  |  @_io_repeat          |  # retries internally (before the bar)
-#  +- download_file    <---+
-#  |
-#  +> stream_from_url         # Handles and retries authentication
-
-class KitIliasWebCrawler(HttpCrawler):
+class KitIliasWebCrawler(IliasWebCrawler):
     def __init__(
         self,
         name: str,
@@ -184,806 +39,13 @@ class KitIliasWebCrawler(HttpCrawler):
         config: Config,
         authenticators: Dict[str, Authenticator]
     ):
-        # Setting a main authenticator for cookie sharing
-        auth = section.auth(authenticators)
-        super().__init__(name, section, config, shared_auth=auth)
-
-        if section.tasks() > 1:
-            log.warn("""
-Please avoid using too many parallel requests as these are the KIT ILIAS
-instance's greatest bottleneck.
-            """.strip())
+        super().__init__(name, section, config, authenticators)
 
         self._shibboleth_login = KitShibbolethLogin(
-            auth,
+            self._auth,
             section.tfa_auth(authenticators),
         )
 
-        self._base_url = _ILIAS_URL
-
-        self._target = section.target()
-        self._link_file_redirect_delay = section.link_redirect_delay()
-        self._links = section.links()
-        self._videos = section.videos()
-        self._forums = section.forums()
-        self._visited_urls: Dict[str, PurePath] = dict()
-
-    async def _run(self) -> None:
-        if isinstance(self._target, int):
-            log.explain_topic(f"Inferred crawl target: Course with id {self._target}")
-            await self._crawl_course(self._target)
-        elif self._target == "desktop":
-            log.explain_topic("Inferred crawl target: Personal desktop")
-            await self._crawl_desktop()
-        else:
-            log.explain_topic(f"Inferred crawl target: URL {self._target}")
-            await self._crawl_url(self._target)
-
-    async def _crawl_course(self, course_id: int) -> None:
-        # Start crawling at the given course
-        root_url = url_set_query_param(
-            self._base_url + "/goto.php", "target", f"crs_{course_id}"
-        )
-
-        await self._crawl_url(root_url, expected_id=course_id)
-
-    async def _crawl_desktop(self) -> None:
-        appendix = r"ILIAS\Repository\Provider\RepositoryMainBarProvider|mm_pd_sel_items"
-        appendix = appendix.encode("ASCII").hex()
-        await self._crawl_url(self._base_url + "/gs_content.php?item=" + appendix)
-
-    async def _crawl_url(self, url: str, expected_id: Optional[int] = None) -> None:
-        maybe_cl = await self.crawl(PurePath("."))
-        if not maybe_cl:
-            return
-        cl = maybe_cl  # Not mypy's fault, but explained here: https://github.com/python/mypy/issues/2608
-
-        elements: List[IliasPageElement] = []
-        # A list as variable redefinitions are not propagated to outer scopes
-        description: List[BeautifulSoup] = []
-
-        @_iorepeat(3, "crawling url")
-        async def gather_elements() -> None:
-            elements.clear()
-            async with cl:
-                next_stage_url: Optional[str] = url
-                current_parent = None
-
-                # Duplicated code, but the root page is special - we want to avoid fetching it twice!
-                while next_stage_url:
-                    soup = await self._get_page(next_stage_url, root_page_allowed=True)
-
-                    if current_parent is None and expected_id is not None:
-                        perma_link = IliasPage.get_soup_permalink(soup)
-                        if not perma_link or "crs_" not in perma_link:
-                            raise CrawlError("Invalid course id? Didn't find anything looking like a course")
-
-                    log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
-                    log.explain(f"URL: {next_stage_url}")
-                    page = IliasPage(soup, next_stage_url, current_parent)
-                    if next_element := page.get_next_stage_element():
-                        current_parent = next_element
-                        next_stage_url = next_element.url
-                    else:
-                        next_stage_url = None
-
-                elements.extend(page.get_child_elements())
-                if info_tab := page.get_info_tab():
-                    elements.append(info_tab)
-                if description_string := page.get_description():
-                    description.append(description_string)
-
-        # Fill up our task list with the found elements
-        await gather_elements()
-
-        if description:
-            await self._download_description(PurePath("."), description[0])
-
-        elements.sort(key=lambda e: e.id())
-
-        tasks: List[Awaitable[None]] = []
-        for element in elements:
-            if handle := await self._handle_ilias_element(PurePath("."), element):
-                tasks.append(asyncio.create_task(handle))
-
-        # And execute them
-        await self.gather(tasks)
-
-    async def _handle_ilias_page(
-        self,
-        url: str,
-        parent: IliasPageElement,
-        path: PurePath,
-    ) -> Optional[Coroutine[Any, Any, None]]:
-        maybe_cl = await self.crawl(path)
-        if not maybe_cl:
-            return None
-        return self._crawl_ilias_page(url, parent, maybe_cl)
-
-    @anoncritical
-    async def _crawl_ilias_page(
-        self,
-        url: str,
-        parent: IliasPageElement,
-        cl: CrawlToken,
-    ) -> None:
-        elements: List[IliasPageElement] = []
-        # A list as variable redefinitions are not propagated to outer scopes
-        description: List[BeautifulSoup] = []
-
-        @_iorepeat(3, "crawling folder")
-        async def gather_elements() -> None:
-            elements.clear()
-            async with cl:
-                next_stage_url: Optional[str] = url
-                current_parent = parent
-
-                while next_stage_url:
-                    soup = await self._get_page(next_stage_url)
-                    log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
-                    log.explain(f"URL: {next_stage_url}")
-                    page = IliasPage(soup, next_stage_url, current_parent)
-                    if next_element := page.get_next_stage_element():
-                        current_parent = next_element
-                        next_stage_url = next_element.url
-                    else:
-                        next_stage_url = None
-
-                elements.extend(page.get_child_elements())
-                if description_string := page.get_description():
-                    description.append(description_string)
-
-        # Fill up our task list with the found elements
-        await gather_elements()
-
-        if description:
-            await self._download_description(cl.path, description[0])
-
-        elements.sort(key=lambda e: e.id())
-
-        tasks: List[Awaitable[None]] = []
-        for element in elements:
-            if handle := await self._handle_ilias_element(cl.path, element):
-                tasks.append(asyncio.create_task(handle))
-
-        # And execute them
-        await self.gather(tasks)
-
-    # These decorators only apply *to this method* and *NOT* to the returned
-    # awaitables!
-    # This method does not await the handlers but returns them instead.
-    # This ensures one level is handled at a time and name deduplication
-    # works correctly.
-    @anoncritical
-    async def _handle_ilias_element(
-        self,
-        parent_path: PurePath,
-        element: IliasPageElement,
-    ) -> Optional[Coroutine[Any, Any, None]]:
-        if element.url in self._visited_urls:
-            raise CrawlWarning(
-                f"Found second path to element {element.name!r} at {element.url!r}. "
-                + f"First path: {fmt_path(self._visited_urls[element.url])}. "
-                + f"Second path: {fmt_path(parent_path)}."
-            )
-        self._visited_urls[element.url] = parent_path
-
-        element_path = PurePath(parent_path, element.name)
-
-        if element.type in _VIDEO_ELEMENTS:
-            if not self._videos:
-                log.status(
-                    "[bold bright_black]",
-                    "Ignored",
-                    fmt_path(element_path),
-                    "[bright_black](enable with option 'videos')"
-                )
-                return None
-
-        if element.type == IliasElementType.FILE:
-            return await self._handle_file(element, element_path)
-        elif element.type == IliasElementType.FORUM:
-            if not self._forums:
-                log.status(
-                    "[bold bright_black]",
-                    "Ignored",
-                    fmt_path(element_path),
-                    "[bright_black](enable with option 'forums')"
-                )
-                return None
-            return await self._handle_forum(element, element_path)
-        elif element.type == IliasElementType.TEST:
-            log.status(
-                "[bold bright_black]",
-                "Ignored",
-                fmt_path(element_path),
-                "[bright_black](tests contain no relevant data)"
-            )
-            return None
-        elif element.type == IliasElementType.SURVEY:
-            log.status(
-                "[bold bright_black]",
-                "Ignored",
-                fmt_path(element_path),
-                "[bright_black](surveys contain no relevant data)"
-            )
-            return None
-        elif element.type == IliasElementType.SCORM_LEARNING_MODULE:
-            log.status(
-                "[bold bright_black]",
-                "Ignored",
-                fmt_path(element_path),
-                "[bright_black](scorm learning modules are not supported)"
-            )
-            return None
-        elif element.type == IliasElementType.LEARNING_MODULE:
-            return await self._handle_learning_module(element, element_path)
-        elif element.type == IliasElementType.LINK:
-            return await self._handle_link(element, element_path)
-        elif element.type == IliasElementType.BOOKING:
-            return await self._handle_booking(element, element_path)
-        elif element.type == IliasElementType.OPENCAST_VIDEO:
-            return await self._handle_file(element, element_path)
-        elif element.type == IliasElementType.OPENCAST_VIDEO_PLAYER:
-            return await self._handle_opencast_video(element, element_path)
-        elif element.type == IliasElementType.MEDIACAST_VIDEO:
-            return await self._handle_file(element, element_path)
-        elif element.type in _DIRECTORY_PAGES:
-            return await self._handle_ilias_page(element.url, element, element_path)
-        else:
-            # This will retry it a few times, failing everytime. It doesn't make any network
-            # requests, so that's fine.
-            raise CrawlWarning(f"Unknown element type: {element.type!r}")
-
-    async def _handle_link(
-        self,
-        element: IliasPageElement,
-        element_path: PurePath,
-    ) -> Optional[Coroutine[Any, Any, None]]:
-        log.explain_topic(f"Decision: Crawl Link {fmt_path(element_path)}")
-        log.explain(f"Links type is {self._links}")
-
-        link_template_maybe = self._links.template()
-        link_extension = self._links.extension()
-        if not link_template_maybe or not link_extension:
-            log.explain("Answer: No")
-            return None
-        else:
-            log.explain("Answer: Yes")
-        element_path = element_path.with_name(element_path.name + link_extension)
-
-        maybe_dl = await self.download(element_path, mtime=element.mtime)
-        if not maybe_dl:
-            return None
-
-        return self._download_link(element, link_template_maybe, maybe_dl)
-
-    @anoncritical
-    @_iorepeat(3, "resolving link")
-    async def _download_link(self, element: IliasPageElement, link_template: str, dl: DownloadToken) -> None:
-        async with dl as (bar, sink):
-            export_url = element.url.replace("cmd=calldirectlink", "cmd=exportHTML")
-            real_url = await self._resolve_link_target(export_url)
-            self._write_link_content(link_template, real_url, element.name, element.description, sink)
-
-    def _write_link_content(
-        self,
-        link_template: str,
-        url: str,
-        name: str,
-        description: Optional[str],
-        sink: FileSink,
-    ) -> None:
-        content = link_template
-        content = content.replace("{{link}}", url)
-        content = content.replace("{{name}}", name)
-        content = content.replace("{{description}}", str(description))
-        content = content.replace("{{redirect_delay}}", str(self._link_file_redirect_delay))
-        sink.file.write(content.encode("utf-8"))
-        sink.done()
-
-    async def _handle_booking(
-        self,
-        element: IliasPageElement,
-        element_path: PurePath,
-    ) -> Optional[Coroutine[Any, Any, None]]:
-        log.explain_topic(f"Decision: Crawl Booking Link {fmt_path(element_path)}")
-        log.explain(f"Links type is {self._links}")
-
-        link_template_maybe = self._links.template()
-        link_extension = self._links.extension()
-        if not link_template_maybe or not link_extension:
-            log.explain("Answer: No")
-            return None
-        else:
-            log.explain("Answer: Yes")
-        element_path = element_path.with_name(element_path.name + link_extension)
-
-        maybe_dl = await self.download(element_path, mtime=element.mtime)
-        if not maybe_dl:
-            return None
-
-        return self._download_booking(element, link_template_maybe, maybe_dl)
-
-    @anoncritical
-    @_iorepeat(1, "downloading description")
-    async def _download_description(self, parent_path: PurePath, description: BeautifulSoup) -> None:
-        path = parent_path / "Description.html"
-        dl = await self.download(path, redownload=Redownload.ALWAYS)
-        if not dl:
-            return
-
-        async with dl as (bar, sink):
-            description = clean(insert_base_markup(description))
-            sink.file.write(description.prettify().encode("utf-8"))
-            sink.done()
-
-    @anoncritical
-    @_iorepeat(3, "resolving booking")
-    async def _download_booking(
-        self,
-        element: IliasPageElement,
-        link_template: str,
-        dl: DownloadToken,
-    ) -> None:
-        async with dl as (bar, sink):
-            self._write_link_content(link_template, element.url, element.name, element.description, sink)
-
-    async def _resolve_link_target(self, export_url: str) -> str:
-        async with self.session.get(export_url, allow_redirects=False) as resp:
-            # No redirect means we were authenticated
-            if hdrs.LOCATION not in resp.headers:
-                return soupify(await resp.read()).select_one("a").get("href").strip()
-
-        await self._authenticate()
-
-        async with self.session.get(export_url, allow_redirects=False) as resp:
-            # No redirect means we were authenticated
-            if hdrs.LOCATION not in resp.headers:
-                return soupify(await resp.read()).select_one("a").get("href").strip()
-
-        raise CrawlError("resolve_link_target failed even after authenticating")
-
-    async def _handle_opencast_video(
-        self,
-        element: IliasPageElement,
-        element_path: PurePath,
-    ) -> Optional[Coroutine[Any, Any, None]]:
-        # Copy old mapping as it is likely still relevant
-        if self.prev_report:
-            self.report.add_custom_value(
-                _get_video_cache_key(element),
-                self.prev_report.get_custom_value(_get_video_cache_key(element))
-            )
-
-        # A video might contain other videos, so let's "crawl" the video first
-        # to ensure rate limits apply. This must be a download as *this token*
-        # is re-used if the video consists of a single stream. In that case the
-        # file name is used and *not* the stream name the ilias html parser reported
-        # to ensure backwards compatibility.
-        maybe_dl = await self.download(element_path, mtime=element.mtime, redownload=Redownload.ALWAYS)
-
-        # If we do not want to crawl it (user filter), we can move on
-        if not maybe_dl:
-            return None
-
-        # If we have every file from the cached mapping already, we can ignore this and bail
-        if self._all_opencast_videos_locally_present(element, maybe_dl.path):
-            # Mark all existing videos as known to ensure they do not get deleted during cleanup.
-            # We "downloaded" them, just without actually making a network request as we assumed
-            # they did not change.
-            contained = self._previous_contained_opencast_videos(element, maybe_dl.path)
-            if len(contained) > 1:
-                # Only do this if we threw away the original dl token,
-                # to not download single-stream videos twice
-                for video in contained:
-                    await self.download(video)
-
-            return None
-
-        return self._download_opencast_video(element, maybe_dl)
-
-    def _previous_contained_opencast_videos(
-        self, element: IliasPageElement, element_path: PurePath
-    ) -> List[PurePath]:
-        if not self.prev_report:
-            return []
-        custom_value = self.prev_report.get_custom_value(_get_video_cache_key(element))
-        if not custom_value:
-            return []
-        cached_value = cast(dict[str, Any], custom_value)
-        if "known_paths" not in cached_value or "own_path" not in cached_value:
-            log.explain(f"'known_paths' or 'own_path' missing from cached value: {cached_value}")
-            return []
-        transformed_own_path = self._transformer.transform(element_path)
-        if cached_value["own_path"] != str(transformed_own_path):
-            log.explain(
-                f"own_path '{transformed_own_path}' does not match cached value: '{cached_value['own_path']}"
-            )
-            return []
-        return [PurePath(name) for name in cached_value["known_paths"]]
-
-    def _all_opencast_videos_locally_present(self, element: IliasPageElement, element_path: PurePath) -> bool:
-        log.explain_topic(f"Checking local cache for video {fmt_path(element_path)}")
-        if contained_videos := self._previous_contained_opencast_videos(element, element_path):
-            log.explain(
-                f"The following contained videos are known: {','.join(map(fmt_path, contained_videos))}"
-            )
-            if all(self._output_dir.resolve(path).exists() for path in contained_videos):
-                log.explain("Found all known videos locally, skipping enumeration request")
-                return True
-            log.explain("Missing at least one video, continuing with requests!")
-        else:
-            log.explain("No local cache present")
-        return False
-
-    @anoncritical
-    @_iorepeat(3, "downloading video")
-    async def _download_opencast_video(self, element: IliasPageElement, dl: DownloadToken) -> None:
-        def add_to_report(paths: list[str]) -> None:
-            self.report.add_custom_value(
-                _get_video_cache_key(element),
-                {"known_paths": paths, "own_path": str(self._transformer.transform(dl.path))}
-            )
-
-        async with dl as (bar, sink):
-            page = IliasPage(await self._get_page(element.url), element.url, element)
-            stream_elements = page.get_child_elements()
-
-            if len(stream_elements) > 1:
-                log.explain(f"Found multiple video streams for {element.name}")
-            else:
-                log.explain(f"Using single video mode for {element.name}")
-                stream_element = stream_elements[0]
-
-                # We do not have a local cache yet
-                await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
-                add_to_report([str(self._transformer.transform(dl.path))])
-                return
-
-        contained_video_paths: List[str] = []
-
-        for stream_element in stream_elements:
-            video_path = dl.path.parent / stream_element.name
-
-            maybe_dl = await self.download(video_path, mtime=element.mtime, redownload=Redownload.NEVER)
-            if not maybe_dl:
-                continue
-            async with maybe_dl as (bar, sink):
-                log.explain(f"Streaming video from real url {stream_element.url}")
-                contained_video_paths.append(str(self._transformer.transform(maybe_dl.path)))
-                await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
-
-        add_to_report(contained_video_paths)
-
-    async def _handle_file(
-        self,
-        element: IliasPageElement,
-        element_path: PurePath,
-    ) -> Optional[Coroutine[Any, Any, None]]:
-        maybe_dl = await self.download(element_path, mtime=element.mtime)
-        if not maybe_dl:
-            return None
-        return self._download_file(element, maybe_dl)
-
-    @_iorepeat(3, "downloading file")
-    @anoncritical
-    async def _download_file(self, element: IliasPageElement, dl: DownloadToken) -> None:
-        assert dl  # The function is only reached when dl is not None
-        async with dl as (bar, sink):
-            await self._stream_from_url(element.url, sink, bar, is_video=False)
-
-    async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar, is_video: bool) -> None:
-        async def try_stream() -> bool:
-            next_url = url
-
-            # Normal files redirect to the magazine if we are not authenticated. As files could be HTML,
-            # we can not match on the content type here. Instead, we disallow redirects and inspect the
-            # new location. If we are redirected anywhere but the ILIAS 8 "sendfile" command, we assume
-            # our authentication expired.
-            if not is_video:
-                async with self.session.get(url, allow_redirects=False) as resp:
-                    # Redirect to anything except a "sendfile" means we weren't authenticated
-                    if hdrs.LOCATION in resp.headers:
-                        if "&cmd=sendfile" not in resp.headers[hdrs.LOCATION]:
-                            return False
-                        # Directly follow the redirect to not make a second, unnecessary request
-                        next_url = resp.headers[hdrs.LOCATION]
-
-            # Let's try this again and follow redirects
-            return await fetch_follow_redirects(next_url)
-
-        async def fetch_follow_redirects(file_url: str) -> bool:
-            async with self.session.get(file_url) as resp:
-                # We wanted a video but got HTML => Forbidden, auth expired. Logging in won't really
-                # solve that depending on the setup, but it is better than nothing.
-                if is_video and "html" in resp.content_type:
-                    return False
-
-                if resp.content_length:
-                    bar.set_total(resp.content_length)
-
-                async for data in resp.content.iter_chunked(1024):
-                    sink.file.write(data)
-                    bar.advance(len(data))
-
-                sink.done()
-            return True
-
-        auth_id = await self._current_auth_id()
-        if await try_stream():
-            return
-
-        await self.authenticate(auth_id)
-
-        if not await try_stream():
-            raise CrawlError("File streaming failed after authenticate()")
-
-    async def _handle_forum(
-        self,
-        element: IliasPageElement,
-        element_path: PurePath,
-    ) -> Optional[Coroutine[Any, Any, None]]:
-        maybe_cl = await self.crawl(element_path)
-        if not maybe_cl:
-            return None
-        return self._crawl_forum(element, maybe_cl)
-
-    @_iorepeat(3, "crawling forum")
-    @anoncritical
-    async def _crawl_forum(self, element: IliasPageElement, cl: CrawlToken) -> None:
-        elements: List[IliasForumThread] = []
-
-        async with cl:
-            next_stage_url = element.url
-            while next_stage_url:
-                log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
-                log.explain(f"URL: {next_stage_url}")
-
-                soup = await self._get_page(next_stage_url)
-                page = IliasPage(soup, next_stage_url, element)
-
-                if next := page.get_next_stage_element():
-                    next_stage_url = next.url
-                else:
-                    break
-
-            download_data = page.get_download_forum_data()
-            if not download_data:
-                raise CrawlWarning("Failed to extract forum data")
-            if download_data.empty:
-                log.explain("Forum had no threads")
-                return
-            html = await self._post_authenticated(download_data.url, download_data.form_data)
-            elements = parse_ilias_forum_export(soupify(html))
-
-        elements.sort(key=lambda elem: elem.title)
-
-        tasks: List[Awaitable[None]] = []
-        for elem in elements:
-            tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, elem)))
-
-        # And execute them
-        await self.gather(tasks)
-
-    @anoncritical
-    @_iorepeat(3, "saving forum thread")
-    async def _download_forum_thread(
-        self,
-        parent_path: PurePath,
-        element: IliasForumThread,
-    ) -> None:
-        path = parent_path / (_sanitize_path_name(element.title) + ".html")
-        maybe_dl = await self.download(path, mtime=element.mtime)
-        if not maybe_dl:
-            return
-
-        async with maybe_dl as (bar, sink):
-            content = element.title_tag.prettify()
-            content += element.content_tag.prettify()
-            sink.file.write(content.encode("utf-8"))
-            sink.done()
-
-    async def _handle_learning_module(
-        self,
-        element: IliasPageElement,
-        element_path: PurePath,
-    ) -> Optional[Coroutine[Any, Any, None]]:
-        maybe_cl = await self.crawl(element_path)
-        if not maybe_cl:
-            return None
-        return self._crawl_learning_module(element, maybe_cl)
-
-    @_iorepeat(3, "crawling learning module")
-    @anoncritical
-    async def _crawl_learning_module(self, element: IliasPageElement, cl: CrawlToken) -> None:
-        elements: List[IliasLearningModulePage] = []
-
-        async with cl:
-            log.explain_topic(f"Parsing initial HTML page for {fmt_path(cl.path)}")
-            log.explain(f"URL: {element.url}")
-            soup = await self._get_page(element.url)
-            page = IliasPage(soup, element.url, element)
-            if next := page.get_learning_module_data():
-                elements.extend(await self._crawl_learning_module_direction(
-                    cl.path, next.previous_url, "left", element
-                ))
-                elements.append(next)
-                elements.extend(await self._crawl_learning_module_direction(
-                    cl.path, next.next_url, "right", element
-                ))
-
-        # Reflect their natural ordering in the file names
-        for index, lm_element in enumerate(elements):
-            lm_element.title = f"{index:02}_{lm_element.title}"
-
-        tasks: List[Awaitable[None]] = []
-        for index, elem in enumerate(elements):
-            prev_url = elements[index - 1].title if index > 0 else None
-            next_url = elements[index + 1].title if index < len(elements) - 1 else None
-            tasks.append(asyncio.create_task(
-                self._download_learning_module_page(cl.path, elem, prev_url, next_url)
-            ))
-
-        # And execute them
-        await self.gather(tasks)
-
-    async def _crawl_learning_module_direction(
-        self,
-        path: PurePath,
-        start_url: Optional[str],
-        dir: Union[Literal["left"], Literal["right"]],
-        parent_element: IliasPageElement
-    ) -> List[IliasLearningModulePage]:
-        elements: List[IliasLearningModulePage] = []
-
-        if not start_url:
-            return elements
-
-        next_element_url: Optional[str] = start_url
-        counter = 0
-        while next_element_url:
-            log.explain_topic(f"Parsing HTML page for {fmt_path(path)} ({dir}-{counter})")
-            log.explain(f"URL: {next_element_url}")
-            soup = await self._get_page(next_element_url)
-            page = IliasPage(soup, next_element_url, parent_element)
-            if next := page.get_learning_module_data():
-                elements.append(next)
-                if dir == "left":
-                    next_element_url = next.previous_url
-                else:
-                    next_element_url = next.next_url
-            counter += 1
-
-        return elements
-
-    @anoncritical
-    @_iorepeat(3, "saving learning module page")
-    async def _download_learning_module_page(
-        self,
-        parent_path: PurePath,
-        element: IliasLearningModulePage,
-        prev: Optional[str],
-        next: Optional[str]
-    ) -> None:
-        path = parent_path / (_sanitize_path_name(element.title) + ".html")
-        maybe_dl = await self.download(path)
-        if not maybe_dl:
-            return
-        my_path = self._transformer.transform(maybe_dl.path)
-        if not my_path:
-            return
-
-        if prev:
-            prev_p = self._transformer.transform(parent_path / (_sanitize_path_name(prev) + ".html"))
-            if prev_p:
-                prev = os.path.relpath(prev_p, my_path.parent)
-            else:
-                prev = None
-        if next:
-            next_p = self._transformer.transform(parent_path / (_sanitize_path_name(next) + ".html"))
-            if next_p:
-                next = os.path.relpath(next_p, my_path.parent)
-            else:
-                next = None
-
-        async with maybe_dl as (bar, sink):
-            content = element.content
-            content = await self.internalize_images(content)
-            sink.file.write(learning_module_template(content, maybe_dl.path.name, prev, next).encode("utf-8"))
-            sink.done()
-
-    async def internalize_images(self, tag: Tag) -> Tag:
-        """
-        Tries to fetch ILIAS images and embed them as base64 data.
-        """
-        log.explain_topic("Internalizing images")
-        for elem in tag.find_all(recursive=True):
-            if not isinstance(elem, Tag):
-                continue
-            if elem.name == "img":
-                if src := elem.attrs.get("src", None):
-                    url = urljoin(_ILIAS_URL, src)
-                    if not url.startswith(_ILIAS_URL):
-                        continue
-                    log.explain(f"Internalizing {url!r}")
-                    img = await self._get_authenticated(url)
-                    elem.attrs["src"] = "data:;base64," + base64.b64encode(img).decode()
-            if elem.name == "iframe" and elem.attrs.get("src", "").startswith("//"):
-                # For unknown reasons the protocol seems to be stripped.
-                elem.attrs["src"] = "https:" + elem.attrs["src"]
-        return tag
-
-    async def _get_page(self, url: str, root_page_allowed: bool = False) -> BeautifulSoup:
-        auth_id = await self._current_auth_id()
-        async with self.session.get(url) as request:
-            soup = soupify(await request.read())
-            if IliasPage.is_logged_in(soup):
-                return self._verify_page(soup, url, root_page_allowed)
-
-        # We weren't authenticated, so try to do that
-        await self.authenticate(auth_id)
-
-        # Retry once after authenticating. If this fails, we will die.
-        async with self.session.get(url) as request:
-            soup = soupify(await request.read())
-            if IliasPage.is_logged_in(soup):
-                return self._verify_page(soup, url, root_page_allowed)
-        raise CrawlError(f"get_page failed even after authenticating on {url!r}")
-
-    @staticmethod
-    def _verify_page(soup: BeautifulSoup, url: str, root_page_allowed: bool) -> BeautifulSoup:
-        if IliasPage.is_root_page(soup) and not root_page_allowed:
-            raise CrawlError(
-                "Unexpectedly encountered ILIAS root page. "
-                "This usually happens because the ILIAS instance is broken. "
-                "If so, wait a day or two and try again. "
-                "It could also happen because a crawled element links to the ILIAS root page. "
-                "If so, use a transform with a ! as target to ignore the particular element. "
-                f"The redirect came from {url}"
-            )
-        return soup
-
-    async def _post_authenticated(
-        self,
-        url: str,
-        data: dict[str, Union[str, List[str]]]
-    ) -> bytes:
-        auth_id = await self._current_auth_id()
-
-        form_data = aiohttp.FormData()
-        for key, val in data.items():
-            form_data.add_field(key, val)
-
-        async with self.session.post(url, data=form_data(), allow_redirects=False) as request:
-            if request.status == 200:
-                return await request.read()
-
-        # We weren't authenticated, so try to do that
-        await self.authenticate(auth_id)
-
-        # Retry once after authenticating. If this fails, we will die.
-        async with self.session.post(url, data=data, allow_redirects=False) as request:
-            if request.status == 200:
-                return await request.read()
-        raise CrawlError("post_authenticated failed even after authenticating")
-
-    async def _get_authenticated(self, url: str) -> bytes:
-        auth_id = await self._current_auth_id()
-
-        async with self.session.get(url, allow_redirects=False) as request:
-            if request.status == 200:
-                return await request.read()
-
-        # We weren't authenticated, so try to do that
-        await self.authenticate(auth_id)
-
-        # Retry once after authenticating. If this fails, we will die.
-        async with self.session.get(url, allow_redirects=False) as request:
-            if request.status == 200:
-                return await request.read()
-        raise CrawlError("get_authenticated failed even after authenticating")
-
     # We repeat this as the login method in shibboleth doesn't handle I/O errors.
     # Shibboleth is quite reliable as well, the repeat is likely not critical here.
     @_iorepeat(3, "Login", failure_is_error=True)
diff --git a/PFERD/utils.py b/PFERD/utils.py
index 7c7b6f4..9f5d4d5 100644
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -9,11 +9,47 @@ from types import TracebackType
 from typing import Any, Callable, Dict, Generic, Optional, Type, TypeVar
 from urllib.parse import parse_qs, urlencode, urlsplit, urlunsplit
 
+import aiohttp
 import bs4
 
+from .crawl.crawler import AWrapped, CrawlError, CrawlWarning
+from .logging import log
+
 T = TypeVar("T")
 
 
+def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Callable[[AWrapped], AWrapped]:
+    def decorator(f: AWrapped) -> AWrapped:
+        async def wrapper(*args: Any, **kwargs: Any) -> Optional[Any]:
+            last_exception: Optional[BaseException] = None
+            for round in range(attempts):
+                try:
+                    return await f(*args, **kwargs)
+                except aiohttp.ContentTypeError:  # invalid content type
+                    raise CrawlWarning("ILIAS returned an invalid content type")
+                except aiohttp.TooManyRedirects:
+                    raise CrawlWarning("Got stuck in a redirect loop")
+                except aiohttp.ClientPayloadError as e:  # encoding or not enough bytes
+                    last_exception = e
+                except aiohttp.ClientConnectionError as e:  # e.g. timeout, disconnect, resolve failed, etc.
+                    last_exception = e
+                except asyncio.exceptions.TimeoutError as e:  # explicit http timeouts in HttpCrawler
+                    last_exception = e
+                log.explain_topic(f"Retrying operation {name}. Retries left: {attempts - 1 - round}")
+
+            if last_exception:
+                message = f"Error in I/O Operation: {last_exception}"
+                if failure_is_error:
+                    raise CrawlError(message) from last_exception
+                else:
+                    raise CrawlWarning(message) from last_exception
+            raise CrawlError("Impossible return in ilias _iorepeat")
+
+        return wrapper  # type: ignore
+
+    return decorator
+
+
 async def in_daemon_thread(func: Callable[..., T], *args: Any, **kwargs: Any) -> T:
     loop = asyncio.get_running_loop()
     future: asyncio.Future[T] = asyncio.Future()

From df98153169257317301392e7c7ea5a24c183722e Mon Sep 17 00:00:00 2001
From: Philipp Fruck <dev@p-fruck.de>
Date: Mon, 8 Apr 2024 10:58:23 +0200
Subject: [PATCH 443/524] refactor: Extract generic settings from ilias command

Preparation for generic ilias_web command
---
 PFERD/cli/command_kit_ilias_web.py | 107 ++++-------------------------
 PFERD/cli/common_ilias_args.py     | 104 ++++++++++++++++++++++++++++
 2 files changed, 116 insertions(+), 95 deletions(-)
 create mode 100644 PFERD/cli/common_ilias_args.py

diff --git a/PFERD/cli/command_kit_ilias_web.py b/PFERD/cli/command_kit_ilias_web.py
index de74fc3..10797c2 100644
--- a/PFERD/cli/command_kit_ilias_web.py
+++ b/PFERD/cli/command_kit_ilias_web.py
@@ -1,120 +1,37 @@
 import argparse
 import configparser
-from pathlib import Path
 
-from ..crawl.ilias.file_templates import Links
 from ..logging import log
-from .parser import (CRAWLER_PARSER, SUBPARSERS, BooleanOptionalAction, ParserLoadError, load_crawler,
-                     show_value_error)
+from .common_ilias_args import configure_common_group_args, load_common
+from .parser import CRAWLER_PARSER, SUBPARSERS, load_crawler
+
+COMMAND_NAME = "kit-ilias-web"
 
 SUBPARSER = SUBPARSERS.add_parser(
-    "kit-ilias-web",
+    COMMAND_NAME,
     parents=[CRAWLER_PARSER],
 )
 
 GROUP = SUBPARSER.add_argument_group(
-    title="kit-ilias-web crawler arguments",
-    description="arguments for the 'kit-ilias-web' crawler",
-)
-GROUP.add_argument(
-    "target",
-    type=str,
-    metavar="TARGET",
-    help="course id, 'desktop', or ILIAS URL to crawl"
-)
-GROUP.add_argument(
-    "output",
-    type=Path,
-    metavar="OUTPUT",
-    help="output directory"
-)
-GROUP.add_argument(
-    "--username", "-u",
-    type=str,
-    metavar="USERNAME",
-    help="user name for authentication"
-)
-GROUP.add_argument(
-    "--keyring",
-    action=BooleanOptionalAction,
-    help="use the system keyring to store and retrieve passwords"
-)
-GROUP.add_argument(
-    "--credential-file",
-    type=Path,
-    metavar="PATH",
-    help="read username and password from a credential file"
-)
-GROUP.add_argument(
-    "--links",
-    type=show_value_error(Links.from_string),
-    metavar="OPTION",
-    help="how to represent external links"
-)
-GROUP.add_argument(
-    "--link-redirect-delay",
-    type=int,
-    metavar="SECONDS",
-    help="time before 'fancy' links redirect to to their target (-1 to disable)"
-)
-GROUP.add_argument(
-    "--videos",
-    action=BooleanOptionalAction,
-    help="crawl and download videos"
-)
-GROUP.add_argument(
-    "--forums",
-    action=BooleanOptionalAction,
-    help="crawl and download forum posts"
-)
-GROUP.add_argument(
-    "--http-timeout", "-t",
-    type=float,
-    metavar="SECONDS",
-    help="timeout for all HTTP requests"
+    title=f"{COMMAND_NAME} crawler arguments",
+    description=f"arguments for the '{COMMAND_NAME}' crawler",
 )
 
+configure_common_group_args(GROUP)
+
 
 def load(
         args: argparse.Namespace,
         parser: configparser.ConfigParser,
 ) -> None:
-    log.explain("Creating config for command 'kit-ilias-web'")
+    log.explain(f"Creating config for command '{COMMAND_NAME}'")
 
     parser["crawl:ilias"] = {}
     section = parser["crawl:ilias"]
     load_crawler(args, section)
 
-    section["type"] = "kit-ilias-web"
-    section["target"] = str(args.target)
-    section["output_dir"] = str(args.output)
-    section["auth"] = "auth:ilias"
-    if args.links is not None:
-        section["links"] = str(args.links.value)
-    if args.link_redirect_delay is not None:
-        section["link_redirect_delay"] = str(args.link_redirect_delay)
-    if args.videos is not None:
-        section["videos"] = "yes" if args.videos else "no"
-    if args.forums is not None:
-        section["forums"] = "yes" if args.forums else "no"
-    if args.http_timeout is not None:
-        section["http_timeout"] = str(args.http_timeout)
-
-    parser["auth:ilias"] = {}
-    auth_section = parser["auth:ilias"]
-    if args.credential_file is not None:
-        if args.username is not None:
-            raise ParserLoadError("--credential-file and --username can't be used together")
-        if args.keyring:
-            raise ParserLoadError("--credential-file and --keyring can't be used together")
-        auth_section["type"] = "credential-file"
-        auth_section["path"] = str(args.credential_file)
-    elif args.keyring:
-        auth_section["type"] = "keyring"
-    else:
-        auth_section["type"] = "simple"
-    if args.username is not None:
-        auth_section["username"] = args.username
+    section["type"] = COMMAND_NAME
+    load_common(section, args, parser)
 
 
 SUBPARSER.set_defaults(command=load)
diff --git a/PFERD/cli/common_ilias_args.py b/PFERD/cli/common_ilias_args.py
new file mode 100644
index 0000000..bbbbee5
--- /dev/null
+++ b/PFERD/cli/common_ilias_args.py
@@ -0,0 +1,104 @@
+import argparse
+import configparser
+from pathlib import Path
+
+from ..crawl.ilias.file_templates import Links
+from .parser import BooleanOptionalAction, ParserLoadError, show_value_error
+
+
+def configure_common_group_args(group: argparse._ArgumentGroup) -> None:
+    """These arguments are shared between the KIT and generic Ilias web command."""
+    group.add_argument(
+        "target",
+        type=str,
+        metavar="TARGET",
+        help="course id, 'desktop', or ILIAS URL to crawl"
+    )
+    group.add_argument(
+        "output",
+        type=Path,
+        metavar="OUTPUT",
+        help="output directory"
+    )
+    group.add_argument(
+        "--username", "-u",
+        type=str,
+        metavar="USERNAME",
+        help="user name for authentication"
+    )
+    group.add_argument(
+        "--keyring",
+        action=BooleanOptionalAction,
+        help="use the system keyring to store and retrieve passwords"
+    )
+    group.add_argument(
+        "--credential-file",
+        type=Path,
+        metavar="PATH",
+        help="read username and password from a credential file"
+    )
+    group.add_argument(
+        "--links",
+        type=show_value_error(Links.from_string),
+        metavar="OPTION",
+        help="how to represent external links"
+    )
+    group.add_argument(
+        "--link-redirect-delay",
+        type=int,
+        metavar="SECONDS",
+        help="time before 'fancy' links redirect to to their target (-1 to disable)"
+    )
+    group.add_argument(
+        "--videos",
+        action=BooleanOptionalAction,
+        help="crawl and download videos"
+    )
+    group.add_argument(
+        "--forums",
+        action=BooleanOptionalAction,
+        help="crawl and download forum posts"
+    )
+    group.add_argument(
+        "--http-timeout", "-t",
+        type=float,
+        metavar="SECONDS",
+        help="timeout for all HTTP requests"
+    )
+
+
+def load_common(
+    section: configparser.SectionProxy,
+    args: argparse.Namespace,
+    parser: configparser.ConfigParser,
+) -> None:
+    """Load common config between generic and KIT ilias web command"""
+    section["target"] = str(args.target)
+    section["output_dir"] = str(args.output)
+    section["auth"] = "auth:ilias"
+    if args.links is not None:
+        section["links"] = str(args.links.value)
+    if args.link_redirect_delay is not None:
+        section["link_redirect_delay"] = str(args.link_redirect_delay)
+    if args.videos is not None:
+        section["videos"] = "yes" if args.videos else "no"
+    if args.forums is not None:
+        section["forums"] = "yes" if args.forums else "no"
+    if args.http_timeout is not None:
+        section["http_timeout"] = str(args.http_timeout)
+
+    parser["auth:ilias"] = {}
+    auth_section = parser["auth:ilias"]
+    if args.credential_file is not None:
+        if args.username is not None:
+            raise ParserLoadError("--credential-file and --username can't be used together")
+        if args.keyring:
+            raise ParserLoadError("--credential-file and --keyring can't be used together")
+        auth_section["type"] = "credential-file"
+        auth_section["path"] = str(args.credential_file)
+    elif args.keyring:
+        auth_section["type"] = "keyring"
+    else:
+        auth_section["type"] = "simple"
+    if args.username is not None:
+        auth_section["username"] = args.username

From 5d0621420e3c7394506acb5db12d914c63f9dcbf Mon Sep 17 00:00:00 2001
From: Philipp Fruck <dev@p-fruck.de>
Date: Mon, 8 Apr 2024 11:03:09 +0200
Subject: [PATCH 444/524] feat: Generic ilias_web command

---
 PFERD/cli/__init__.py          |  1 +
 PFERD/cli/command_ilias_web.py | 56 ++++++++++++++++++++++++++++++++++
 PFERD/crawl/__init__.py        |  4 ++-
 PFERD/crawl/ilias/__init__.py  | 10 ++++--
 4 files changed, 68 insertions(+), 3 deletions(-)
 create mode 100644 PFERD/cli/command_ilias_web.py

diff --git a/PFERD/cli/__init__.py b/PFERD/cli/__init__.py
index efa8f00..c89f6f4 100644
--- a/PFERD/cli/__init__.py
+++ b/PFERD/cli/__init__.py
@@ -8,6 +8,7 @@
 # well.
 
 from . import command_local  # noqa: F401 imported but unused
+from . import command_ilias_web  # noqa: F401 imported but unused
 from . import command_kit_ilias_web  # noqa: F401 imported but unused
 from . import command_kit_ipd  # noqa: F401 imported but unused
 from .parser import PARSER, ParserLoadError, load_default_section  # noqa: F401 imported but unused
diff --git a/PFERD/cli/command_ilias_web.py b/PFERD/cli/command_ilias_web.py
new file mode 100644
index 0000000..58a7934
--- /dev/null
+++ b/PFERD/cli/command_ilias_web.py
@@ -0,0 +1,56 @@
+import argparse
+import configparser
+
+from ..logging import log
+from .common_ilias_args import configure_common_group_args, load_common
+from .parser import CRAWLER_PARSER, SUBPARSERS, load_crawler
+
+COMMAND_NAME = "ilias-web"
+
+SUBPARSER = SUBPARSERS.add_parser(
+    COMMAND_NAME,
+    parents=[CRAWLER_PARSER],
+)
+
+GROUP = SUBPARSER.add_argument_group(
+    title=f"{COMMAND_NAME} crawler arguments",
+    description=f"arguments for the '{COMMAND_NAME}' crawler",
+)
+
+GROUP.add_argument(
+    "--ilias-url",
+    type=str,
+    metavar="BASE_URL",
+    help="The base url of the ilias instance"
+)
+
+GROUP.add_argument(
+    "--client-id",
+    type=str,
+    metavar="CLIENT_ID",
+    help="The client id of the ilias instance"
+)
+
+configure_common_group_args(GROUP)
+
+
+def load(
+        args: argparse.Namespace,
+        parser: configparser.ConfigParser,
+) -> None:
+    log.explain(f"Creating config for command '{COMMAND_NAME}'")
+
+    parser["crawl:ilias"] = {}
+    section = parser["crawl:ilias"]
+    load_crawler(args, section)
+
+    section["type"] = COMMAND_NAME
+    if args.ilias_url is not None:
+        section["base_url"] = args.ilias_url
+    if args.client_id is not None:
+        section["client_id"] = args.client_id
+
+    load_common(section, args, parser)
+
+
+SUBPARSER.set_defaults(command=load)
diff --git a/PFERD/crawl/__init__.py b/PFERD/crawl/__init__.py
index 1f8bd59..9a0e080 100644
--- a/PFERD/crawl/__init__.py
+++ b/PFERD/crawl/__init__.py
@@ -4,7 +4,7 @@ from typing import Callable, Dict
 from ..auth import Authenticator
 from ..config import Config
 from .crawler import Crawler, CrawlError, CrawlerSection  # noqa: F401
-from .ilias import KitIliasWebCrawler, KitIliasWebCrawlerSection
+from .ilias import IliasWebCrawler, IliasWebCrawlerSection, KitIliasWebCrawler, KitIliasWebCrawlerSection
 from .kit_ipd_crawler import KitIpdCrawler, KitIpdCrawlerSection
 from .local_crawler import LocalCrawler, LocalCrawlerSection
 
@@ -18,6 +18,8 @@ CrawlerConstructor = Callable[[
 CRAWLERS: Dict[str, CrawlerConstructor] = {
     "local": lambda n, s, c, a:
         LocalCrawler(n, LocalCrawlerSection(s), c),
+    "ilias-web": lambda n, s, c, a:
+        IliasWebCrawler(n, IliasWebCrawlerSection(s), c, a),
     "kit-ilias-web": lambda n, s, c, a:
         KitIliasWebCrawler(n, KitIliasWebCrawlerSection(s), c, a),
     "kit-ipd": lambda n, s, c, a:
diff --git a/PFERD/crawl/ilias/__init__.py b/PFERD/crawl/ilias/__init__.py
index 26618a8..287bd3d 100644
--- a/PFERD/crawl/ilias/__init__.py
+++ b/PFERD/crawl/ilias/__init__.py
@@ -1,3 +1,9 @@
-from .kit_ilias_web_crawler import KitIliasWebCrawler, KitIliasWebCrawlerSection
+from .kit_ilias_web_crawler import (IliasWebCrawler, IliasWebCrawlerSection, KitIliasWebCrawler,
+                                    KitIliasWebCrawlerSection)
 
-__all__ = ["KitIliasWebCrawler", "KitIliasWebCrawlerSection"]
+__all__ = [
+    "IliasWebCrawler",
+    "IliasWebCrawlerSection",
+    "KitIliasWebCrawler",
+    "KitIliasWebCrawlerSection",
+]

From 7a00f73e0ec4de7008990cc836d24edda8cad69b Mon Sep 17 00:00:00 2001
From: Philipp Fruck <dev@p-fruck.de>
Date: Sat, 4 May 2024 17:51:38 +0200
Subject: [PATCH 445/524] feat: Add authentication to generic ilias dl

---
 PFERD/crawl/ilias/ilias_web_crawler.py | 98 +++++++++++++++++++++++---
 1 file changed, 89 insertions(+), 9 deletions(-)

diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index ba7d564..166034f 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -24,10 +24,34 @@ from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningMo
                              IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
 
 TargetType = Union[str, int]
-_ILIAS_URL = "https://ilias.studium.kit.edu"
+
+
+class IliasConfig():
+    def __init__(self, base_url: str, client_id: str):
+        self._base_url = base_url
+        self._client_id = client_id
+
+    @property
+    def base_url(self) -> str:
+        return self._base_url
+
+    @property
+    def client_id(self) -> str:
+        return self._client_id
 
 
 class IliasWebCrawlerSection(HttpCrawlerSection):
+    def conf(self) -> IliasConfig:
+        base_url = self.s.get("base_url")
+        if not base_url:
+            self.missing_value("base_url")
+
+        client_id = self.s.get("client_id")
+        if not client_id:
+            self.missing_value("client_id")
+
+        return IliasConfig(base_url, client_id)
+
     def target(self) -> TargetType:
         target = self.s.get("target")
         if not target:
@@ -39,7 +63,7 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
         if target == "desktop":
             # Full personal desktop
             return target
-        if target.startswith(_ILIAS_URL):
+        if target.startswith(self.conf().base_url):
             # ILIAS URL
             return target
 
@@ -140,7 +164,7 @@ instance's greatest bottleneck.
             """.strip())
 
         self._auth = auth
-        self._base_url = _ILIAS_URL
+        self._conf = section.conf()
 
         self._target = section.target()
         self._link_file_redirect_delay = section.link_redirect_delay()
@@ -163,7 +187,7 @@ instance's greatest bottleneck.
     async def _crawl_course(self, course_id: int) -> None:
         # Start crawling at the given course
         root_url = url_set_query_param(
-            self._base_url + "/goto.php", "target", f"crs_{course_id}"
+            self._conf.base_url + "/goto.php", "target", f"crs_{course_id}"
         )
 
         await self._crawl_url(root_url, expected_id=course_id)
@@ -171,7 +195,7 @@ instance's greatest bottleneck.
     async def _crawl_desktop(self) -> None:
         appendix = r"ILIAS\Repository\Provider\RepositoryMainBarProvider|mm_pd_sel_items"
         appendix = appendix.encode("ASCII").hex()
-        await self._crawl_url(self._base_url + "/gs_content.php?item=" + appendix)
+        await self._crawl_url(self._conf.base_url + "/gs_content.php?item=" + appendix)
 
     async def _crawl_url(self, url: str, expected_id: Optional[int] = None) -> None:
         maybe_cl = await self.crawl(PurePath("."))
@@ -844,8 +868,8 @@ instance's greatest bottleneck.
                 continue
             if elem.name == "img":
                 if src := elem.attrs.get("src", None):
-                    url = urljoin(_ILIAS_URL, src)
-                    if not url.startswith(_ILIAS_URL):
+                    url = urljoin(self._conf.base_url, src)
+                    if not url.startswith(self._conf.base_url):
                         continue
                     log.explain(f"Internalizing {url!r}")
                     img = await self._get_authenticated(url)
@@ -925,7 +949,63 @@ instance's greatest bottleneck.
                 return await request.read()
         raise CrawlError("get_authenticated failed even after authenticating")
 
-    # ToDo: Is this still required?
+    # ToDo: Is iorepeat still required?
     @_iorepeat(3, "Login", failure_is_error=True)
     async def _authenticate(self) -> None:
-        pass
+        # fill the session with the correct cookies
+        params = {
+            "client_id": self._conf.client_id,
+            "cmd": "force_login",
+        }
+        async with self.session.get(f"{self._conf.base_url}/login.php", params=params) as request:
+            login_page = soupify(await request.read())
+
+        login_form = login_page.find("form", attrs={"name": "formlogin"})
+        if login_form is None:
+            raise CrawlError("Could not find the login form! Specified client id might be invalid.")
+
+        login_url = login_form.attrs.get("action")
+        if login_url is None:
+            raise CrawlError("Could not find the action URL in the login form!")
+
+        username, password = await self._auth.credentials()
+
+        login_data = {
+            "username": username,
+            "password": password,
+            "cmd[doStandardAuthentication]": "Login",
+        }
+
+        # do the actual login
+        async with self.session.post(f"{self._conf.base_url}/{login_url}", data=login_data) as request:
+            soup = soupify(await request.read())
+            if not self._is_logged_in(soup):
+                self._auth.invalidate_credentials()
+
+    @ staticmethod
+    def _is_logged_in(soup: BeautifulSoup) -> bool:
+        # Normal ILIAS pages
+        mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")
+        if mainbar is not None:
+            login_button = mainbar.find(attrs={"href": lambda x: x and "login.php" in x})
+            shib_login = soup.find(id="button_shib_login")
+            return not login_button and not shib_login
+
+        # Personal Desktop
+        if soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x}):
+            return True
+
+        # Video listing embeds do not have complete ILIAS html. Try to match them by
+        # their video listing table
+        video_table = soup.find(
+            recursive=True,
+            name="table",
+            attrs={"id": lambda x: x is not None and x.startswith("tbl_xoct")}
+        )
+        if video_table is not None:
+            return True
+        # The individual video player wrapper page has nothing of the above.
+        # Match it by its playerContainer.
+        if soup.select_one("#playerContainer") is not None:
+            return True
+        return False

From 3a05b905251f0430ca8d34a353ffe9983304bbfc Mon Sep 17 00:00:00 2001
From: Philipp Fruck <dev@p-fruck.de>
Date: Mon, 8 Apr 2024 11:49:28 +0200
Subject: [PATCH 446/524] fix circular import for _io_repeat

---
 PFERD/crawl/ilias/async_helper.py          | 39 ++++++++++++++++++++++
 PFERD/crawl/ilias/ilias_web_crawler.py     |  3 +-
 PFERD/crawl/ilias/kit_ilias_web_crawler.py |  3 +-
 PFERD/utils.py                             | 36 --------------------
 4 files changed, 43 insertions(+), 38 deletions(-)
 create mode 100644 PFERD/crawl/ilias/async_helper.py

diff --git a/PFERD/crawl/ilias/async_helper.py b/PFERD/crawl/ilias/async_helper.py
new file mode 100644
index 0000000..527a819
--- /dev/null
+++ b/PFERD/crawl/ilias/async_helper.py
@@ -0,0 +1,39 @@
+import asyncio
+from typing import Any, Callable, Optional
+
+import aiohttp
+
+from ...logging import log
+from ..crawler import AWrapped, CrawlError, CrawlWarning
+
+
+def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Callable[[AWrapped], AWrapped]:
+    def decorator(f: AWrapped) -> AWrapped:
+        async def wrapper(*args: Any, **kwargs: Any) -> Optional[Any]:
+            last_exception: Optional[BaseException] = None
+            for round in range(attempts):
+                try:
+                    return await f(*args, **kwargs)
+                except aiohttp.ContentTypeError:  # invalid content type
+                    raise CrawlWarning("ILIAS returned an invalid content type")
+                except aiohttp.TooManyRedirects:
+                    raise CrawlWarning("Got stuck in a redirect loop")
+                except aiohttp.ClientPayloadError as e:  # encoding or not enough bytes
+                    last_exception = e
+                except aiohttp.ClientConnectionError as e:  # e.g. timeout, disconnect, resolve failed, etc.
+                    last_exception = e
+                except asyncio.exceptions.TimeoutError as e:  # explicit http timeouts in HttpCrawler
+                    last_exception = e
+                log.explain_topic(f"Retrying operation {name}. Retries left: {attempts - 1 - round}")
+
+            if last_exception:
+                message = f"Error in I/O Operation: {last_exception}"
+                if failure_is_error:
+                    raise CrawlError(message) from last_exception
+                else:
+                    raise CrawlWarning(message) from last_exception
+            raise CrawlError("Impossible return in ilias _iorepeat")
+
+        return wrapper  # type: ignore
+
+    return decorator
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 166034f..1048c30 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -15,9 +15,10 @@ from ...auth import Authenticator
 from ...config import Config
 from ...logging import ProgressBar, log
 from ...output_dir import FileSink, Redownload
-from ...utils import _iorepeat, fmt_path, soupify, url_set_query_param
+from ...utils import fmt_path, soupify, url_set_query_param
 from ..crawler import CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
 from ..http_crawler import HttpCrawler, HttpCrawlerSection
+from .async_helper import _iorepeat
 from .file_templates import Links, learning_module_template
 from .ilias_html_cleaner import clean, insert_base_markup
 from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index e9d1475..3cd0334 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -7,8 +7,9 @@ from bs4 import BeautifulSoup
 from ...auth import Authenticator, TfaAuthenticator
 from ...config import Config
 from ...logging import log
-from ...utils import _iorepeat, soupify
+from ...utils import soupify
 from ..crawler import CrawlError, CrawlWarning
+from .async_helper import _iorepeat
 from .ilias_web_crawler import IliasWebCrawler, IliasWebCrawlerSection
 
 TargetType = Union[str, int]
diff --git a/PFERD/utils.py b/PFERD/utils.py
index 9f5d4d5..7c7b6f4 100644
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -9,47 +9,11 @@ from types import TracebackType
 from typing import Any, Callable, Dict, Generic, Optional, Type, TypeVar
 from urllib.parse import parse_qs, urlencode, urlsplit, urlunsplit
 
-import aiohttp
 import bs4
 
-from .crawl.crawler import AWrapped, CrawlError, CrawlWarning
-from .logging import log
-
 T = TypeVar("T")
 
 
-def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Callable[[AWrapped], AWrapped]:
-    def decorator(f: AWrapped) -> AWrapped:
-        async def wrapper(*args: Any, **kwargs: Any) -> Optional[Any]:
-            last_exception: Optional[BaseException] = None
-            for round in range(attempts):
-                try:
-                    return await f(*args, **kwargs)
-                except aiohttp.ContentTypeError:  # invalid content type
-                    raise CrawlWarning("ILIAS returned an invalid content type")
-                except aiohttp.TooManyRedirects:
-                    raise CrawlWarning("Got stuck in a redirect loop")
-                except aiohttp.ClientPayloadError as e:  # encoding or not enough bytes
-                    last_exception = e
-                except aiohttp.ClientConnectionError as e:  # e.g. timeout, disconnect, resolve failed, etc.
-                    last_exception = e
-                except asyncio.exceptions.TimeoutError as e:  # explicit http timeouts in HttpCrawler
-                    last_exception = e
-                log.explain_topic(f"Retrying operation {name}. Retries left: {attempts - 1 - round}")
-
-            if last_exception:
-                message = f"Error in I/O Operation: {last_exception}"
-                if failure_is_error:
-                    raise CrawlError(message) from last_exception
-                else:
-                    raise CrawlWarning(message) from last_exception
-            raise CrawlError("Impossible return in ilias _iorepeat")
-
-        return wrapper  # type: ignore
-
-    return decorator
-
-
 async def in_daemon_thread(func: Callable[..., T], *args: Any, **kwargs: Any) -> T:
     loop = asyncio.get_running_loop()
     future: asyncio.Future[T] = asyncio.Future()

From b01f0934749ba613881446dfa0b41ebf803c3204 Mon Sep 17 00:00:00 2001
From: Philipp Fruck <dev@p-fruck.de>
Date: Mon, 8 Apr 2024 11:55:48 +0200
Subject: [PATCH 447/524] fix: Element detection for other universities

Other universities might use other URL schemes
for different element types
---
 PFERD/crawl/ilias/kit_ilias_html.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 866f7c0..54d56a0 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -48,6 +48,10 @@ class IliasPageElement:
         regexes = [
             r"eid=(?P<id>[0-9a-z\-]+)",
             r"file_(?P<id>\d+)",
+            r"copa_(?P<id>\d+)",
+            r"fold_(?P<id>\d+)",
+            r"frm_(?P<id>\d+)",
+            r"exc_(?P<id>\d+)",
             r"ref_id=(?P<id>\d+)",
             r"target=[a-z]+_(?P<id>\d+)",
             r"mm_(?P<id>\d+)"
@@ -997,6 +1001,19 @@ class IliasPage:
         if "baseClass=ilSAHSPresentationGUI" in parsed_url.query:
             return IliasElementType.SCORM_LEARNING_MODULE
 
+        # other universities might have content type specified in URL path
+        if "_file_" in parsed_url.path:
+            return IliasElementType.FILE
+
+        if "_fold_" in parsed_url.path or "_copa_" in parsed_url.path:
+            return IliasElementType.FOLDER
+
+        if "_frm_" in parsed_url.path:
+            return IliasElementType.FORUM
+
+        if "_exc_" in parsed_url.path:
+            return IliasElementType.EXERCISE
+
         # Booking and Meeting can not be detected based on the link. They do have a ref_id though, so
         # try to guess it from the image.
 

From 5c87517ceb178240651787be76f968f1b320dad2 Mon Sep 17 00:00:00 2001
From: Philipp Fruck <dev@p-fruck.de>
Date: Mon, 8 Apr 2024 12:02:20 +0200
Subject: [PATCH 448/524] docs: Explain usage with generic ilias

---
 README.md | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/README.md b/README.md
index 31a3475..54e77be 100644
--- a/README.md
+++ b/README.md
@@ -56,6 +56,8 @@ Also, you can download most ILIAS pages directly like this:
 $ pferd kit-ilias-web <url> <output_directory>
 ```
 
+[You want to use another ILIAS instance? Read here!](#other-ilias-instances)
+
 However, the CLI only lets you download a single thing at a time, and the
 resulting command can grow long quite quickly. Because of this, PFERD can also
 be used with a config file.
@@ -145,3 +147,53 @@ type = kit-ilias-web
 auth = auth:ilias
 target = 1337420
 ```
+
+## Other ILIAS instances
+
+PFERD was originally developed for the KIT ILIAS instance, so you might have to adapt some parts of the code for full compatibility with your instance.
+
+To run PFERD against any instance using the integrated ILIAS authentication, simply add the `--base-url` and `--client-id` flags to the CLI command, e.g.
+
+```
+$ pferd ilias-web --base-url https://ilias.my-university.example --client-id My_University desktop <output_directory>
+```
+
+To use a config file for the given instance, just extend the example above as follows:
+
+```ini
+[DEFAULT]
+# instance related settings
+base_url = https://ilias.my-university.example
+client_id = My_University
+type = ilias-web
+
+# same as described above
+[auth:ilias]
+...
+
+[crawl:Foo]
+auth = auth:ilias
+target = 1337420
+```
+
+## Example configuration
+
+Inspired by [this ILIAS downloader](https://github.com/V3lop5/ilias-downloader/blob/main/configs), the following configurations should work out of the box for the corresponding universities:
+
+| University    | `base_url`                           | `client_id`   |
+|---------------|--------------------------------------|---------------|
+| FH Aachen     | https://www.ili.fh-aachen.de         | elearning     |
+| Uni Koeln     | https://www.ilias.uni-koeln.de/ilias | uk            |
+| Uni Konstanz  | https://ilias.uni-konstanz.de        | ILIASKONSTANZ |
+| Uni Stuttgart | https://ilias3.uni-stuttgart.de      | Uni_Stuttgart |
+
+
+### My university isn't listed
+
+No problem, your university might also just work fine. To retrieve the values required for your instance, navigate to the corresponding login page. You should be redirected to
+
+```jinja
+{{ base_url }}/login.php?client_id={{ client_id }}&cmd=force_login&lang=
+```
+
+From this URL, you can simply extract your required values. Feel free to open a PR to add your configuration to the table above. This will help people find out if their ILIAS instance is supported too.

From fd6cb7b9660439e26c4523da5be037e75bbd547c Mon Sep 17 00:00:00 2001
From: Philipp Fruck <dev@p-fruck.de>
Date: Sun, 5 May 2024 02:34:00 +0200
Subject: [PATCH 449/524] docs: Remove some filler words

---
 README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 54e77be..abdf607 100644
--- a/README.md
+++ b/README.md
@@ -152,13 +152,13 @@ target = 1337420
 
 PFERD was originally developed for the KIT ILIAS instance, so you might have to adapt some parts of the code for full compatibility with your instance.
 
-To run PFERD against any instance using the integrated ILIAS authentication, simply add the `--base-url` and `--client-id` flags to the CLI command, e.g.
+To run PFERD against any instance using the integrated ILIAS authentication, add the `--base-url` and `--client-id` flags to the CLI command, e.g.
 
 ```
 $ pferd ilias-web --base-url https://ilias.my-university.example --client-id My_University desktop <output_directory>
 ```
 
-To use a config file for the given instance, just extend the example above as follows:
+To use a config file for the given instance, extend the example above as follows:
 
 ```ini
 [DEFAULT]
@@ -196,4 +196,4 @@ No problem, your university might also just work fine. To retrieve the values re
 {{ base_url }}/login.php?client_id={{ client_id }}&cmd=force_login&lang=
 ```
 
-From this URL, you can simply extract your required values. Feel free to open a PR to add your configuration to the table above. This will help people find out if their ILIAS instance is supported too.
+From this URL, you can extract your required values. Feel free to open a PR to add your configuration to the table above. This will help people find out if their ILIAS instance is supported too.

From ade6309dd9ba1ff4f094d6af590f7f959a187880 Mon Sep 17 00:00:00 2001
From: Philipp Fruck <dev@p-fruck.de>
Date: Sun, 5 May 2024 02:34:26 +0200
Subject: [PATCH 450/524] Update copyright information

---
 LICENSE | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/LICENSE b/LICENSE
index d81e827..13fa307 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
-Copyright 2019-2021 Garmelon, I-Al-Istannen, danstooamerican, pavelzw,
+Copyright 2019-2024 Garmelon, I-Al-Istannen, danstooamerican, pavelzw,
                     TheChristophe, Scriptim, thelukasprobst, Toorero,
-                    Mr-Pine
+                    Mr-Pine, p-fruck
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in

From 428b0179fc0ab042a35407833f782c65ac7fef45 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 10 May 2024 14:09:07 +0200
Subject: [PATCH 451/524] Remove IliasConfig

Also uses urljoin() in a few places that previously used string
concatenation or fstrings.

At this point, there isn't yet a need for IliasConfig, so I'd rather
keep the code base simpler and more consistent. Should we need a
structure like IliasConfig in the future (maybe because we have a few
more ilias parsers), it's easy to add back.
---
 PFERD/crawl/ilias/ilias_web_crawler.py | 48 +++++++++++---------------
 1 file changed, 21 insertions(+), 27 deletions(-)

diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 1048c30..59f28b8 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -27,31 +27,20 @@ from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningMo
 TargetType = Union[str, int]
 
 
-class IliasConfig():
-    def __init__(self, base_url: str, client_id: str):
-        self._base_url = base_url
-        self._client_id = client_id
-
-    @property
-    def base_url(self) -> str:
-        return self._base_url
-
-    @property
-    def client_id(self) -> str:
-        return self._client_id
-
-
 class IliasWebCrawlerSection(HttpCrawlerSection):
-    def conf(self) -> IliasConfig:
+    def base_url(self) -> str:
         base_url = self.s.get("base_url")
         if not base_url:
             self.missing_value("base_url")
 
+        return base_url
+
+    def client_id(self) -> str:
         client_id = self.s.get("client_id")
         if not client_id:
             self.missing_value("client_id")
 
-        return IliasConfig(base_url, client_id)
+        return client_id
 
     def target(self) -> TargetType:
         target = self.s.get("target")
@@ -64,8 +53,8 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
         if target == "desktop":
             # Full personal desktop
             return target
-        if target.startswith(self.conf().base_url):
-            # ILIAS URL
+        if target.startswith(self.base_url()):
+            # URL
             return target
 
         self.invalid_value("target", target, "Should be <course id | desktop | kit ilias URL>")
@@ -165,7 +154,8 @@ instance's greatest bottleneck.
             """.strip())
 
         self._auth = auth
-        self._conf = section.conf()
+        self._base_url = section.base_url()
+        self._client_id = section.client_id()
 
         self._target = section.target()
         self._link_file_redirect_delay = section.link_redirect_delay()
@@ -188,7 +178,8 @@ instance's greatest bottleneck.
     async def _crawl_course(self, course_id: int) -> None:
         # Start crawling at the given course
         root_url = url_set_query_param(
-            self._conf.base_url + "/goto.php", "target", f"crs_{course_id}"
+            urljoin(self._base_url, "/goto.php"),
+            "target", f"crs_{course_id}",
         )
 
         await self._crawl_url(root_url, expected_id=course_id)
@@ -196,7 +187,10 @@ instance's greatest bottleneck.
     async def _crawl_desktop(self) -> None:
         appendix = r"ILIAS\Repository\Provider\RepositoryMainBarProvider|mm_pd_sel_items"
         appendix = appendix.encode("ASCII").hex()
-        await self._crawl_url(self._conf.base_url + "/gs_content.php?item=" + appendix)
+        await self._crawl_url(url_set_query_param(
+            urljoin(self._base_url, "/gs_content.php"),
+            "item=", appendix,
+        ))
 
     async def _crawl_url(self, url: str, expected_id: Optional[int] = None) -> None:
         maybe_cl = await self.crawl(PurePath("."))
@@ -869,8 +863,8 @@ instance's greatest bottleneck.
                 continue
             if elem.name == "img":
                 if src := elem.attrs.get("src", None):
-                    url = urljoin(self._conf.base_url, src)
-                    if not url.startswith(self._conf.base_url):
+                    url = urljoin(self._base_url, src)
+                    if not url.startswith(self._base_url):
                         continue
                     log.explain(f"Internalizing {url!r}")
                     img = await self._get_authenticated(url)
@@ -955,10 +949,10 @@ instance's greatest bottleneck.
     async def _authenticate(self) -> None:
         # fill the session with the correct cookies
         params = {
-            "client_id": self._conf.client_id,
+            "client_id": self._client_id,
             "cmd": "force_login",
         }
-        async with self.session.get(f"{self._conf.base_url}/login.php", params=params) as request:
+        async with self.session.get(urljoin(self._base_url, "/login.php"), params=params) as request:
             login_page = soupify(await request.read())
 
         login_form = login_page.find("form", attrs={"name": "formlogin"})
@@ -978,12 +972,12 @@ instance's greatest bottleneck.
         }
 
         # do the actual login
-        async with self.session.post(f"{self._conf.base_url}/{login_url}", data=login_data) as request:
+        async with self.session.post(urljoin(self._base_url, login_url), data=login_data) as request:
             soup = soupify(await request.read())
             if not self._is_logged_in(soup):
                 self._auth.invalidate_credentials()
 
-    @ staticmethod
+    @staticmethod
     def _is_logged_in(soup: BeautifulSoup) -> bool:
         # Normal ILIAS pages
         mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")

From 778517d8c625ca5a8b967efb761a555ec03da136 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 10 May 2024 14:12:45 +0200
Subject: [PATCH 452/524] Fix KIT crawler requiring base_url and client_id
 options

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 3cd0334..558221d 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -22,6 +22,14 @@ class KitShibbolethBackgroundLoginSuccessful():
 
 
 class KitIliasWebCrawlerSection(IliasWebCrawlerSection):
+    def base_url(self) -> str:
+        return _ILIAS_URL
+
+    def client_id(self) -> str:
+        # KIT ILIAS uses the Shibboleth service for authentication. There's no
+        # use for a client id.
+        return "unused"
+
     def tfa_auth(self, authenticators: Dict[str, Authenticator]) -> Optional[Authenticator]:
         value: Optional[str] = self.s.get("tfa_auth")
         if value is None:

From 4b4f72b2ca9e003e9cc92dd097ddc0ddce870e02 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 10 May 2024 14:34:20 +0200
Subject: [PATCH 453/524] Fix command name

---
 PFERD/cli/command_ilias_web.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/cli/command_ilias_web.py b/PFERD/cli/command_ilias_web.py
index 58a7934..77a1657 100644
--- a/PFERD/cli/command_ilias_web.py
+++ b/PFERD/cli/command_ilias_web.py
@@ -18,7 +18,7 @@ GROUP = SUBPARSER.add_argument_group(
 )
 
 GROUP.add_argument(
-    "--ilias-url",
+    "--base-url",
     type=str,
     metavar="BASE_URL",
     help="The base url of the ilias instance"

From 89b44c69a71f07885a2118467012802abf7cc52f Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 10 May 2024 14:35:19 +0200
Subject: [PATCH 454/524] Update docs

All config file options must be documented in CONFIG.md. The README.md
is just a starting point. To avoid duplicated info, I've moved most of
the docs to CONFIG.md.
---
 CONFIG.md | 68 ++++++++++++++++++++++++++++++++++++++++++++++---------
 README.md | 61 ++++++++-----------------------------------------
 2 files changed, 67 insertions(+), 62 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index 25496e0..7766d39 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -4,11 +4,11 @@ A config file consists of sections. A section begins with a `[section]` header,
 which is followed by a list of `key = value` pairs. Comments must be on their
 own line and start with `#`. Multiline values must be indented beyond their key.
 Boolean values can be `yes` or `no`. For more details and some examples on the
-format, see the [configparser documentation][1] ([interpolation][2] is
-disabled).
+format, see the [configparser documentation][cp-file]
+([interpolation][cp-interp] is disabled).
 
-[1]: <https://docs.python.org/3/library/configparser.html#supported-ini-file-structure> "Supported INI File Structure"
-[2]: <https://docs.python.org/3/library/configparser.html#interpolation-of-values> "Interpolation of values"
+[cp-file]: <https://docs.python.org/3/library/configparser.html#supported-ini-file-structure> "Supported INI File Structure"
+[cp-interp]: <https://docs.python.org/3/library/configparser.html#interpolation-of-values> "Interpolation of values"
 
 ## The `DEFAULT` section
 
@@ -154,6 +154,52 @@ requests is likely a good idea.
   matches, the given link is downloaded as a file. This is used to extract
   files from KIT-IPD pages. (Default: `^.*?[^/]+\.(pdf|zip|c|cpp|java)$`)
 
+### The `ilias-web` crawler
+
+This crawler crawls a generic ILIAS instance.
+
+Inspired by [this ILIAS downloader][ilias-dl], the following configurations should work
+out of the box for the corresponding universities:
+
+[ilias-dl]: https://github.com/V3lop5/ilias-downloader/blob/main/configs "ilias-downloader configs"
+
+| University    | `base_url`                           | `client_id`   |
+|---------------|--------------------------------------|---------------|
+| FH Aachen     | https://www.ili.fh-aachen.de         | elearning     |
+| Uni Köln      | https://www.ilias.uni-koeln.de/ilias | uk            |
+| Uni Konstanz  | https://ilias.uni-konstanz.de        | ILIASKONSTANZ |
+| Uni Stuttgart | https://ilias3.uni-stuttgart.de      | Uni_Stuttgart |
+
+If your university isn't listed, try navigating to your instance's login page.
+Assuming no custom login service is used, the URL will look something like this:
+
+```jinja
+{{ base_url }}/login.php?client_id={{ client_id }}&cmd=force_login&lang=
+```
+
+If the values work, feel free to submit a PR and add them to the table above.
+
+- `base_url`: The URL where the ILIAS instance is located. (Required)
+- `client_id`: An ID used for authentication. (Required)
+- `target`: The ILIAS element to crawl. (Required)
+    - `desktop`: Crawl your personal desktop
+    - `<course id>`: Crawl the course with the given id
+    - `<url>`: Crawl a given element by URL (preferably the permanent URL linked
+      at the bottom of its ILIAS page)
+- `auth`: Name of auth section to use for login. (Required)
+- `links`: How to represent external links. (Default: `fancy`)
+    - `ignore`: Don't download links.
+    - `plaintext`: A text file containing only the URL.
+    - `fancy`: A HTML file looking like the ILIAS link element.
+    - `internet-shortcut`: An internet shortcut file (`.url` file).
+- `link_redirect_delay`: Time (in seconds) until `fancy` link files will
+  redirect to the actual URL. Set to a negative value to disable the automatic
+  redirect. (Default: `-1`)
+- `videos`: Whether to download videos. (Default: `no`)
+- `forums`: Whether to download forum threads. (Default: `no`)
+- `http_timeout`: The timeout (in seconds) for all HTTP requests. (Default:
+  `20.0`)
+
 ### The `kit-ilias-web` crawler
 
 This crawler crawls the KIT ILIAS instance.
@@ -232,10 +278,10 @@ is stored in the keyring.
 
 ### The `pass` authenticator
 
-This authenticator queries the [`pass` password manager][3] for a username and
-password. It tries to be mostly compatible with [browserpass][4] and
-[passff][5], so see those links for an overview of the format. If PFERD fails
-to load your password, you can use the `--explain` flag to see why.
+This authenticator queries the [`pass` password manager][pass] for a username
+and password. It tries to be mostly compatible with [browserpass][browserpass]
+and [passff][passff], so see those links for an overview of the format. If PFERD
+fails to load your password, you can use the `--explain` flag to see why.
 
 - `passname`: The name of the password to use (Required)
 - `username_prefixes`: A comma-separated list of username line prefixes
@@ -243,9 +289,9 @@ to load your password, you can use the `--explain` flag to see why.
 - `password_prefixes`: A comma-separated list of password line prefixes
   (Default: `password,pass,secret`)
 
-[3]: <https://www.passwordstore.org/> "Pass: The Standard Unix Password Manager"
-[4]: <https://github.com/browserpass/browserpass-extension#organizing-password-store> "Organizing password store"
-[5]: <https://github.com/passff/passff#multi-line-format> "Multi-line format"
+[pass]: <https://www.passwordstore.org/> "Pass: The Standard Unix Password Manager"
+[browserpass]: <https://github.com/browserpass/browserpass-extension#organizing-password-store> "Organizing password store"
+[passff]: <https://github.com/passff/passff#multi-line-format> "Multi-line format"
 
 ### The `tfa` authenticator
 
diff --git a/README.md b/README.md
index abdf607..d5d7980 100644
--- a/README.md
+++ b/README.md
@@ -56,7 +56,16 @@ Also, you can download most ILIAS pages directly like this:
 $ pferd kit-ilias-web <url> <output_directory>
 ```
 
-[You want to use another ILIAS instance? Read here!](#other-ilias-instances)
+PFERD supports other ILIAS instances as well, using the `ilias-web` crawler (see
+the [config section on `ilias-web`](CONFIG.md#the-ilias-web-crawler) for more
+detail on the `base-url` and `client-id` parameters):
+
+```
+$ pferd ilias-web \
+    --base-url https://ilias.my-university.example \
+    --client-id My_University desktop \
+    <output_directory>
+```
 
 However, the CLI only lets you download a single thing at a time, and the
 resulting command can grow long quite quickly. Because of this, PFERD can also
@@ -147,53 +156,3 @@ type = kit-ilias-web
 auth = auth:ilias
 target = 1337420
 ```
-
-## Other ILIAS instances
-
-PFERD was originally developed for the KIT ILIAS instance, so you might have to adapt some parts of the code for full compatibility with your instance.
-
-To run PFERD against any instance using the integrated ILIAS authentication, add the `--base-url` and `--client-id` flags to the CLI command, e.g.
-
-```
-$ pferd ilias-web --base-url https://ilias.my-university.example --client-id My_University desktop <output_directory>
-```
-
-To use a config file for the given instance, extend the example above as follows:
-
-```ini
-[DEFAULT]
-# instance related settings
-base_url = https://ilias.my-university.example
-client_id = My_University
-type = ilias-web
-
-# same as described above
-[auth:ilias]
-...
-
-[crawl:Foo]
-auth = auth:ilias
-target = 1337420
-```
-
-## Example configuration
-
-Inspired by [this ILIAS downloader](https://github.com/V3lop5/ilias-downloader/blob/main/configs), the following configurations should work out of the box for the corresponding universities:
-
-| University    | `base_url`                           | `client_id`   |
-|---------------|--------------------------------------|---------------|
-| FH Aachen     | https://www.ili.fh-aachen.de         | elearning     |
-| Uni Koeln     | https://www.ilias.uni-koeln.de/ilias | uk            |
-| Uni Konstanz  | https://ilias.uni-konstanz.de        | ILIASKONSTANZ |
-| Uni Stuttgart | https://ilias3.uni-stuttgart.de      | Uni_Stuttgart |
-
-
-### My university isn't listed
-
-No problem, your university might also just work fine. To retrieve the values required for your instance, navigate to the corresponding login page. You should be redirected to
-
-```jinja
-{{ base_url }}/login.php?client_id={{ client_id }}&cmd=force_login&lang=
-```
-
-From this URL, you can extract your required values. Feel free to open a PR to add your configuration to the table above. This will help people find out if their ILIAS instance is supported too.

From 819c6673c7724a8810ccceb0c09f2f214dea4763 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 10 May 2024 14:37:12 +0200
Subject: [PATCH 455/524] Update changelog

---
 CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f244a9b..6de08a3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Added
+- Generic `ilias-web` crawler and `ilias-web` CLI command
+
 ### Fixed
 - Normalization of meeting names in cards
 

From 422cf05f15e1d7acf095c7dfe888e2c572c9b83a Mon Sep 17 00:00:00 2001
From: Pavel Zwerschke <pavelzw@gmail.com>
Date: Sat, 11 May 2024 10:26:19 +0200
Subject: [PATCH 456/524] Move all configuration into pyproject.toml, add x86
 mac to CI

---
 .github/dependabot.yml                  | 10 +++++++
 .github/workflows/build-and-release.yml | 23 ++++++++++-----
 mypy.ini                                | 11 -------
 pyproject.toml                          | 39 +++++++++++++++++++++++++
 scripts/check                           |  2 +-
 scripts/format                          |  4 +--
 scripts/setup                           |  2 +-
 setup.cfg                               | 23 ---------------
 8 files changed, 69 insertions(+), 45 deletions(-)
 create mode 100644 .github/dependabot.yml
 delete mode 100644 mypy.ini
 delete mode 100644 setup.cfg

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000..3891848
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,10 @@
+version: 2
+updates:
+  - package-ecosystem: github-actions
+    directory: /
+    schedule:
+      interval: monthly
+    groups:
+      gh-actions:
+        patterns:
+          - "*"
diff --git a/.github/workflows/build-and-release.yml b/.github/workflows/build-and-release.yml
index 83a36e4..740c233 100644
--- a/.github/workflows/build-and-release.yml
+++ b/.github/workflows/build-and-release.yml
@@ -1,6 +1,9 @@
 name: build-and-release
 
-on: push
+on:
+  push:
+    branches: [master]
+  pull_request:
 
 defaults:
   run:
@@ -13,13 +16,12 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-latest, windows-latest, macos-latest]
+        os: [ubuntu-latest, windows-latest, macos-13, macos-latest]
         python: ["3.9"]
     steps:
+      - uses: actions/checkout@v4
 
-      - uses: actions/checkout@v3
-
-      - uses: actions/setup-python@v4
+      - uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python }}
 
@@ -34,7 +36,12 @@ jobs:
         run: ./scripts/setup --no-pip
 
       - name: Run checks
-        run: ./scripts/check
+        run: |
+          ./scripts/check
+          ./scripts/format
+
+      - name: Assert no changes
+        run: git diff --exit-code
 
       - name: Build
         run: ./scripts/build
@@ -65,10 +72,11 @@ jobs:
         run: |
           mv pferd-ubuntu-latest pferd-linux
           mv pferd-windows-latest pferd-windows.exe
+          mv pferd-macos-13 pferd-mac-x86_64
           mv pferd-macos-latest pferd-mac
 
       - name: Create release
-        uses: softprops/action-gh-release@v1
+        uses: softprops/action-gh-release@v2
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         with:
@@ -76,3 +84,4 @@ jobs:
             pferd-linux
             pferd-windows.exe
             pferd-mac
+            pferd-mac-x86_64
diff --git a/mypy.ini b/mypy.ini
deleted file mode 100644
index 14509d6..0000000
--- a/mypy.ini
+++ /dev/null
@@ -1,11 +0,0 @@
-[mypy]
-disallow_any_generics = True
-disallow_untyped_defs = True
-disallow_incomplete_defs = True
-no_implicit_optional = True
-warn_unused_ignores = True
-warn_unreachable = True
-show_error_context = True
-
-[mypy-rich.*,bs4,keyring]
-ignore_missing_imports = True
diff --git a/pyproject.toml b/pyproject.toml
index 9787c3b..bc67e1c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,3 +1,42 @@
 [build-system]
 requires = ["setuptools", "wheel"]
 build-backend = "setuptools.build_meta"
+
+[project]
+name = "PFERD"
+dependencies = [
+  "aiohttp>=3.8.1",
+  "beautifulsoup4>=4.10.0",
+  "rich>=11.0.0",
+  "keyring>=23.5.0",
+  "certifi>=2021.10.8"
+]
+dynamic = ["version"]
+requires-python = ">=3.9"
+
+[project.scripts]
+pferd = "PFERD.__main__:main"
+
+[tool.setuptools.dynamic]
+version = {attr = "PFERD.version.VERSION"}
+
+[tool.flake8]
+max-line-length = 110
+
+[tool.isort]
+line_length = 110
+
+[tool.autopep8]
+max_line_length = 110
+in-place = true
+recursive = true
+
+[tool.mypy]
+disallow_any_generics = true
+disallow_untyped_defs = true
+disallow_incomplete_defs = true
+no_implicit_optional = true
+warn_unused_ignores = true
+warn_unreachable = true
+show_error_context = true
+ignore_missing_imports = true
diff --git a/scripts/check b/scripts/check
index 2283951..aea2783 100755
--- a/scripts/check
+++ b/scripts/check
@@ -2,5 +2,5 @@
 
 set -e
 
-mypy PFERD
+mypy .
 flake8 PFERD
diff --git a/scripts/format b/scripts/format
index d8917ef..981cd75 100755
--- a/scripts/format
+++ b/scripts/format
@@ -2,5 +2,5 @@
 
 set -e
 
-autopep8 --recursive --in-place PFERD
-isort PFERD
+autopep8 .
+isort .
diff --git a/scripts/setup b/scripts/setup
index f6680bb..0114266 100755
--- a/scripts/setup
+++ b/scripts/setup
@@ -13,5 +13,5 @@ pip install --upgrade setuptools
 pip install --editable .
 
 # Installing tools and type hints
-pip install --upgrade mypy flake8 autopep8 isort pyinstaller
+pip install --upgrade mypy flake8 flake8-pyproject autopep8 isort pyinstaller
 pip install --upgrade types-chardet types-certifi
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index 2378c48..0000000
--- a/setup.cfg
+++ /dev/null
@@ -1,23 +0,0 @@
-[metadata]
-name = PFERD
-version = attr: PFERD.version.VERSION
-
-[options]
-packages = find:
-python_requires = >=3.9
-install_requires =
-  aiohttp>=3.8.1
-  beautifulsoup4>=4.10.0
-  rich>=11.0.0
-  keyring>=23.5.0
-  certifi>=2021.10.8
-
-[options.entry_points]
-console_scripts =
-  pferd = PFERD.__main__:main
-
-[flake8]
-max_line_length = 110
-
-[isort]
-line_length = 110

From 318226d7cba2a2191c3021ee526237c8c82f0808 Mon Sep 17 00:00:00 2001
From: Pavel Zwerschke <pavelzw@gmail.com>
Date: Sat, 11 May 2024 10:27:54 +0200
Subject: [PATCH 457/524] fix bump-version script

---
 scripts/bump-version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/bump-version b/scripts/bump-version
index 4479ef8..e341a4e 100755
--- a/scripts/bump-version
+++ b/scripts/bump-version
@@ -1,8 +1,8 @@
 #!/usr/bin/env python3
 
 import argparse
-import time
 import re
+import time
 from subprocess import run
 
 

From b29b6f93f81bed50afdc248757e467ff0db0cb68 Mon Sep 17 00:00:00 2001
From: Pavel Zwerschke <pavelzw@gmail.com>
Date: Sat, 11 May 2024 16:09:46 +0200
Subject: [PATCH 458/524] run ci twice

Co-authored-by: Garmelon <joscha@plugh.de>
---
 .github/workflows/build-and-release.yml | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/.github/workflows/build-and-release.yml b/.github/workflows/build-and-release.yml
index 740c233..dc7d4cc 100644
--- a/.github/workflows/build-and-release.yml
+++ b/.github/workflows/build-and-release.yml
@@ -1,9 +1,6 @@
 name: build-and-release
 
-on:
-  push:
-    branches: [master]
-  pull_request:
+on: [push, pull_request]
 
 defaults:
   run:

From 21a266e302034289507ee1e4b21da1d3d55a46ae Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 11 May 2024 16:33:10 +0200
Subject: [PATCH 459/524] Update upload-artifact action to v4

https://github.com/actions/upload-artifact/blob/main/docs/MIGRATION.md#multiple-uploads-to-the-same-named-artifact
---
 .github/workflows/build-and-release.yml | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/build-and-release.yml b/.github/workflows/build-and-release.yml
index dc7d4cc..1f60c59 100644
--- a/.github/workflows/build-and-release.yml
+++ b/.github/workflows/build-and-release.yml
@@ -49,9 +49,9 @@ jobs:
         run: mv dist/pferd* dist/pferd-${{ matrix.os }}
 
       - name: Upload binary
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
-          name: Binaries
+          name: pferd-${{ matrix.os }}
           path: dist/pferd-${{ matrix.os }}
 
   release:
@@ -61,9 +61,10 @@ jobs:
     steps:
 
       - name: Download binaries
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@v4
         with:
-          name: Binaries
+          pattern: pferd-*
+          merge-multiple: true
 
       - name: Rename binaries
         run: |

From c897d9e2f50d3c281bbf4a10c2dc7bb960ec202f Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 26 Jun 2024 16:39:24 +0200
Subject: [PATCH 460/524] Support finding entries for course overview page

Related to issue #93
---
 CHANGELOG.md                        |  2 ++
 PFERD/crawl/ilias/kit_ilias_html.py | 10 +++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6de08a3..b93bd33 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,8 @@ ambiguous situations.
 
 ### Added
 - Generic `ilias-web` crawler and `ilias-web` CLI command
+- Support for the course overview page. Using this URL as a target might cause
+  duplication warnings, as subgroups are listed separately.
 
 ### Fixed
 - Normalization of meeting names in cards
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 54d56a0..4c1d798 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -349,6 +349,9 @@ class IliasPage:
         might_be_info = self._soup.find("form", attrs={"name": lambda x: x == "formInfoScreen"}) is not None
         return self._page_type == IliasElementType.INFO_TAB and might_be_info
 
+    def _is_course_overview_page(self) -> bool:
+        return "baseClass=ilmembershipoverviewgui" in self._page_url
+
     def _select_content_page_url(self) -> Optional[IliasPageElement]:
         tab = self._soup.find(
             id="tab_view_content",
@@ -686,8 +689,13 @@ class IliasPage:
     def _find_normal_entries(self) -> List[IliasPageElement]:
         result: List[IliasPageElement] = []
 
+        links: List[Tag] = []
         # Fetch all links and throw them to the general interpreter
-        links: List[Tag] = self._soup.select("a.il_ContainerItemTitle")
+        if self._is_course_overview_page():
+            log.explain("Page is a course overview page, adjusting link selector")
+            links.extend(self._soup.select(".il-item-title > a"))
+        else:
+            links.extend(self._soup.select("a.il_ContainerItemTitle"))
 
         for link in links:
             abs_url = self._abs_url_from_link(link)

From 19beb8f07b68c39e04d9c24a897a3d08c919e529 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 31 Jul 2024 22:02:43 +0200
Subject: [PATCH 461/524] Document course overview downloading in config.md

---
 CONFIG.md | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index 7766d39..9a6eb4a 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -182,10 +182,14 @@ If the values work, feel free to submit a PR and add them to the table above.
 - `base_url`: The URL where the ILIAS instance is located. (Required)
 - `client_id`: An ID used for authentication. (Required)
 - `target`: The ILIAS element to crawl. (Required)
-    - `desktop`: Crawl your personal desktop
+    - `desktop`: Crawl your personal desktop / dashboard
     - `<course id>`: Crawl the course with the given id
     - `<url>`: Crawl a given element by URL (preferably the permanent URL linked
-      at the bottom of its ILIAS page)
+      at the bottom of its ILIAS page).  
+      This also supports the "My Courses" overview page to download *all*
+      courses. Note that this might produce confusing local directory layouts
+      and duplication warnings if you are a member of an ILIAS group. The
+      `desktop` target is generally preferable.
 - `auth`: Name of auth section to use for login. (Required)
 - `links`: How to represent external links. (Default: `fancy`)
     - `ignore`: Don't download links.

From 4f9e2ab48d5dc2a1ba4c2ccb8de987db900d2ed8 Mon Sep 17 00:00:00 2001
From: "Mr. Pine" <git@mr-pine.de>
Date: Mon, 21 Oct 2024 15:21:33 +0200
Subject: [PATCH 462/524] Support named capture groups in regex transformers
 (#94)

---
 CHANGELOG.md         | 1 +
 CONFIG.md            | 3 ++-
 PFERD/transformer.py | 4 ++++
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b93bd33..e56b011 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -26,6 +26,7 @@ ambiguous situations.
 - Generic `ilias-web` crawler and `ilias-web` CLI command
 - Support for the course overview page. Using this URL as a target might cause
   duplication warnings, as subgroups are listed separately.
+- Support for named capture groups in regex transforms
 
 ### Fixed
 - Normalization of meeting names in cards
diff --git a/CONFIG.md b/CONFIG.md
index 9a6eb4a..a52506d 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -394,7 +394,8 @@ matches `SOURCE`, the output path is created using `TARGET` as template.
 be referred to as `{g<n>}` (e.g. `{g3}`). `{g0}` refers to the original path.
 If capturing group *n*'s contents are a valid integer, the integer value is
 available as `{i<n>}` (e.g. `{i3}`). If capturing group *n*'s contents are a
-valid float, the float value is available as `{f<n>}` (e.g. `{f3}`). If a
+valid float, the float value is available as `{f<n>}` (e.g. `{f3}`). Named capture
+groups (e.g. `(?P<name>)`) are available by their name (e.g. `{name}`). If a
 capturing group is not present (e.g. when matching the string `cd` with the
 regex `(ab)?cd`), the corresponding variables are not defined.
 
diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index 1a56e27..a48c827 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -110,6 +110,10 @@ class ExactReTf(Transformation):
             except ValueError:
                 pass
 
+        named_groups: Dict[str, str] = match.groupdict()
+        for name, capture in named_groups.items():
+            locals_dir[name] = capture
+
         result = eval(f"f{right!r}", {}, locals_dir)
         return Transformed(PurePath(result))
 

From f9bb2e41cfa28443d22a41953a79262b2e2b83d1 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 21 Oct 2024 16:28:49 +0200
Subject: [PATCH 463/524] Sanitize slashes in exercise container names

---
 CHANGELOG.md                           |  1 +
 PFERD/crawl/ilias/ilias_web_crawler.py |  3 +++
 PFERD/crawl/ilias/kit_ilias_html.py    | 36 ++++++++++++++++++--------
 3 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e56b011..5f6e5d0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,6 +30,7 @@ ambiguous situations.
 
 ### Fixed
 - Normalization of meeting names in cards
+- Sanitization of slashes in exercise container names
 
 ## 3.5.2 - 2024-04-14
 
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 59f28b8..b77f4fc 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -328,6 +328,9 @@ instance's greatest bottleneck.
             )
         self._visited_urls[element.url] = parent_path
 
+        # element.name might contain `/` if the crawler created nested elements,
+        # so we can not sanitize it here. We trust in the output dir to thwart worst-case
+        # directory escape attacks.
         element_path = PurePath(parent_path, element.name)
 
         if element.type in _VIDEO_ELEMENTS:
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 4c1d798..a3b9459 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -71,12 +71,17 @@ class IliasPageElement:
         url: str,
         name: str,
         mtime: Optional[datetime] = None,
-        description: Optional[str] = None
+        description: Optional[str] = None,
+        skip_sanitize: bool = False
     ) -> 'IliasPageElement':
         if typ == IliasElementType.MEETING:
-            normalized = _sanitize_path_name(IliasPageElement._normalize_meeting_name(name))
+            normalized = IliasPageElement._normalize_meeting_name(name)
             log.explain(f"Normalized meeting name from {name!r} to {normalized!r}")
             name = normalized
+
+        if not skip_sanitize:
+            name = _sanitize_path_name(name)
+
         return IliasPageElement(typ, url, name, mtime, description)
 
     @staticmethod
@@ -648,15 +653,15 @@ class IliasPage:
                 # Two divs, side by side. Left is the name, right is the link ==> get left
                 # sibling
                 file_name = file_link.parent.findPrevious(name="div").getText().strip()
-                file_name = _sanitize_path_name(file_name)
                 url = self._abs_url_from_link(file_link)
 
                 log.explain(f"Found exercise entry {file_name!r}")
                 results.append(IliasPageElement.create_new(
                     IliasElementType.FILE,
                     url,
-                    container_name + "/" + file_name,
-                    None  # We do not have any timestamp
+                    _sanitize_path_name(container_name) + "/" + _sanitize_path_name(file_name),
+                    mtime=None,  # We do not have any timestamp
+                    skip_sanitize=True
                 ))
 
             # Find all links to file listings (e.g. "Submitted Files" for groups)
@@ -674,14 +679,15 @@ class IliasPage:
                 label_container: Tag = parent_container.find(
                     attrs={"class": lambda x: x and "control-label" in x}
                 )
-                file_name = _sanitize_path_name(label_container.getText().strip())
+                file_name = label_container.getText().strip()
                 url = self._abs_url_from_link(listing)
                 log.explain(f"Found exercise detail {file_name!r} at {url}")
                 results.append(IliasPageElement.create_new(
                     IliasElementType.EXERCISE_FILES,
                     url,
-                    container_name + "/" + file_name,
-                    None  # we do not have any timestamp
+                    _sanitize_path_name(container_name) + "/" + _sanitize_path_name(file_name),
+                    None,  # we do not have any timestamp
+                    skip_sanitize=True
                 ))
 
         return results
@@ -699,7 +705,8 @@ class IliasPage:
 
         for link in links:
             abs_url = self._abs_url_from_link(link)
-            parents = self._find_upwards_folder_hierarchy(link)
+            # Make sure parents are sanitized. We do not want accidental parents
+            parents = [_sanitize_path_name(x) for x in self._find_upwards_folder_hierarchy(link)]
 
             if parents:
                 element_name = "/".join(parents) + "/" + _sanitize_path_name(link.getText())
@@ -723,7 +730,12 @@ class IliasPage:
 
             log.explain(f"Found {element_name!r}")
             result.append(IliasPageElement.create_new(
-                element_type, abs_url, element_name, description=description))
+                element_type,
+                abs_url,
+                element_name,
+                description=description,
+                skip_sanitize=True
+            ))
 
         result += self._find_cards()
         result += self._find_mediacast_videos()
@@ -869,7 +881,9 @@ class IliasPage:
         full_path = name + "." + file_type
 
         log.explain(f"Found file {full_path!r}")
-        return IliasPageElement.create_new(IliasElementType.FILE, url, full_path, modification_date)
+        return IliasPageElement.create_new(
+            IliasElementType.FILE, url, full_path, modification_date, skip_sanitize=True
+        )
 
     def _find_cards(self) -> List[IliasPageElement]:
         result: List[IliasPageElement] = []

From 52fdeae7528473b7f8a8fc9e4bdb4e6029a0be8a Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 21 Oct 2024 23:41:08 +0200
Subject: [PATCH 464/524] Crawl custom item groups as folders

---
 CHANGELOG.md                        | 1 +
 PFERD/crawl/ilias/kit_ilias_html.py | 7 +++++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5f6e5d0..a755d93 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,7 @@ ambiguous situations.
 - Support for the course overview page. Using this URL as a target might cause
   duplication warnings, as subgroups are listed separately.
 - Support for named capture groups in regex transforms
+- Crawl custom item groups as folders
 
 ### Fixed
 - Normalization of meeting names in cards
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index a3b9459..34e02ba 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -817,11 +817,14 @@ class IliasPage:
             # ILIAS has proper accordions and weird blocks that look like normal headings,
             # but some JS later transforms them into an accordion.
 
-            # This is for these weird JS-y blocks
+            # This is for these weird JS-y blocks and custom item groups
             if "ilContainerItemsContainer" in parent.get("class"):
+                data_store_url = parent.parent.get("data-store-url", "").lower()
+                is_custom_item_group = "baseclass=ilcontainerblockpropertiesstoragegui" in data_store_url \
+                                       and "cont_block_id=" in data_store_url
                 # I am currently under the impression that *only* those JS blocks have an
                 # ilNoDisplay class.
-                if "ilNoDisplay" not in parent.get("class"):
+                if not is_custom_item_group and "ilNoDisplay" not in parent.get("class"):
                     continue
                 prev: Tag = parent.findPreviousSibling("div")
                 if "ilContainerBlockHeader" in prev.get("class"):

From d7f2229978c902d3c9f51e5bb9dbfe99d122e980 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 23 Oct 2024 20:17:47 +0200
Subject: [PATCH 465/524] Bump version to 3.6.0

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a755d93..573cad9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.6.0 - 2024-10-23
+
 ### Added
 - Generic `ilias-web` crawler and `ilias-web` CLI command
 - Support for the course overview page. Using this URL as a target might cause
diff --git a/PFERD/version.py b/PFERD/version.py
index 47da4a6..0bf695b 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.5.2"
+VERSION = "3.6.0"

From c54c3bcfa157631af1d55a210b60ad3bfc64f972 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 27 Oct 2024 10:50:59 +0100
Subject: [PATCH 466/524] Fix crawling of favorites

---
 CHANGELOG.md                           | 3 +++
 PFERD/crawl/ilias/ilias_web_crawler.py | 9 +++------
 PFERD/crawl/ilias/kit_ilias_html.py    | 9 +++++++--
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 573cad9..ce20269 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Fixed
+- Personal desktop/dashboard/favorites crawling
+
 ## 3.6.0 - 2024-10-23
 
 ### Added
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index b77f4fc..a566ce5 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -185,12 +185,9 @@ instance's greatest bottleneck.
         await self._crawl_url(root_url, expected_id=course_id)
 
     async def _crawl_desktop(self) -> None:
-        appendix = r"ILIAS\Repository\Provider\RepositoryMainBarProvider|mm_pd_sel_items"
-        appendix = appendix.encode("ASCII").hex()
-        await self._crawl_url(url_set_query_param(
-            urljoin(self._base_url, "/gs_content.php"),
-            "item=", appendix,
-        ))
+        await self._crawl_url(
+            urljoin(self._base_url, "/ilias.php?baseClass=ilDashboardGUI&cmd=show")
+        )
 
     async def _crawl_url(self, url: str, expected_id: Optional[int] = None) -> None:
         maybe_cl = await self.crawl(PurePath("."))
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 34e02ba..98b32c3 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -322,7 +322,7 @@ class IliasPage:
         return False
 
     def _is_personal_desktop(self) -> bool:
-        return self._soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x})
+        return "baseclass=ildashboardgui" in self._page_url.lower() and "&cmd=show" in self._page_url.lower()
 
     def _is_content_page(self) -> bool:
         if link := self.get_permalink():
@@ -427,9 +427,14 @@ class IliasPage:
     def _find_personal_desktop_entries(self) -> List[IliasPageElement]:
         items: List[IliasPageElement] = []
 
-        titles: List[Tag] = self._soup.select(".il-item-title")
+        titles: List[Tag] = self._soup.select("#block_pditems_0 .il-item-title")
         for title in titles:
             link = title.find("a")
+
+            if not link:
+                log.explain(f"Skipping offline item: {title.getText().strip()!r}")
+                continue
+
             name = _sanitize_path_name(link.text.strip())
             url = self._abs_url_from_link(link)
 

From 739dd958500349dfc54f6a8370a10b122b1e1bee Mon Sep 17 00:00:00 2001
From: Tim <me@scriptim.dev>
Date: Sun, 27 Oct 2024 19:03:47 +0100
Subject: [PATCH 467/524] Use Last-Modified and ETag headers to determine
 KIT-IPD file versions (#95)

Co-authored-by: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
---
 PFERD/crawl/crawler.py         | 11 ++++++-
 PFERD/crawl/http_crawler.py    | 52 +++++++++++++++++++++++++++++++++-
 PFERD/crawl/kit_ipd_crawler.py | 36 +++++++++++++++++++----
 PFERD/output_dir.py            | 15 ++++++++--
 4 files changed, 104 insertions(+), 10 deletions(-)

diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index 0e67c02..dd500e6 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -293,6 +293,8 @@ class Crawler(ABC):
     async def download(
             self,
             path: PurePath,
+            *,
+            etag_differs: Optional[bool] = None,
             mtime: Optional[datetime] = None,
             redownload: Optional[Redownload] = None,
             on_conflict: Optional[OnConflict] = None,
@@ -307,7 +309,14 @@ class Crawler(ABC):
             log.status("[bold bright_black]", "Ignored", fmt_path(path))
             return None
 
-        fs_token = await self._output_dir.download(path, transformed_path, mtime, redownload, on_conflict)
+        fs_token = await self._output_dir.download(
+            path,
+            transformed_path,
+            etag_differs=etag_differs,
+            mtime=mtime,
+            redownload=redownload,
+            on_conflict=on_conflict
+        )
         if fs_token is None:
             log.explain("Answer: No")
             return None
diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index 44ec4dd..39b22f3 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -1,8 +1,9 @@
 import asyncio
 import http.cookies
 import ssl
+from datetime import datetime
 from pathlib import Path, PurePath
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Tuple
 
 import aiohttp
 import certifi
@@ -15,6 +16,8 @@ from ..utils import fmt_real_path
 from ..version import NAME, VERSION
 from .crawler import Crawler, CrawlerSection
 
+ETAGS_CUSTOM_REPORT_VALUE_KEY = "etags"
+
 
 class HttpCrawlerSection(CrawlerSection):
     def http_timeout(self) -> float:
@@ -169,6 +172,53 @@ class HttpCrawler(Crawler):
             log.warn(f"Failed to save cookies to {fmt_real_path(self._cookie_jar_path)}")
             log.warn(str(e))
 
+    def _get_previous_etag_from_report(self, path: PurePath) -> Optional[str]:
+        """
+        If available, retrieves the entity tag for a given path which was stored in the previous report.
+        """
+        if not self._output_dir.prev_report:
+            return None
+
+        etags = self._output_dir.prev_report.get_custom_value(ETAGS_CUSTOM_REPORT_VALUE_KEY) or {}
+        return etags.get(str(path))
+
+    def _add_etag_to_report(self, path: PurePath, etag: Optional[str]) -> None:
+        """
+        Adds an entity tag for a given path to the report's custom values.
+        """
+        if not etag:
+            return
+
+        etags = self._output_dir.report.get_custom_value(ETAGS_CUSTOM_REPORT_VALUE_KEY) or {}
+        etags[str(path)] = etag
+        self._output_dir.report.add_custom_value(ETAGS_CUSTOM_REPORT_VALUE_KEY, etags)
+
+    async def _request_resource_version(self, resource_url: str) -> Tuple[Optional[str], Optional[datetime]]:
+        """
+        Requests the ETag and Last-Modified headers of a resource via a HEAD request.
+        If no entity tag / modification date can be obtained, the according value will be None.
+        """
+        try:
+            async with self.session.head(resource_url) as resp:
+                if resp.status != 200:
+                    return None, None
+
+                etag_header = resp.headers.get("ETag")
+                last_modified_header = resp.headers.get("Last-Modified")
+
+                if last_modified_header:
+                    try:
+                        # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Last-Modified#directives
+                        datetime_format = "%a, %d %b %Y %H:%M:%S GMT"
+                        last_modified = datetime.strptime(last_modified_header, datetime_format)
+                    except ValueError:
+                        # last_modified remains None
+                        pass
+
+                return etag_header, last_modified
+        except aiohttp.ClientError:
+            return None, None
+
     async def run(self) -> None:
         self._request_count = 0
         self._cookie_jar = aiohttp.CookieJar()
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index c852be0..d9515e2 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -1,6 +1,7 @@
 import os
 import re
 from dataclasses import dataclass
+from datetime import datetime
 from pathlib import PurePath
 from typing import Awaitable, List, Optional, Pattern, Set, Tuple, Union
 from urllib.parse import urljoin
@@ -75,8 +76,11 @@ class KitIpdCrawler(HttpCrawler):
                 if isinstance(item, KitIpdFolder):
                     tasks.append(self._crawl_folder(item))
                 else:
+                    # do this here to at least be sequential and not parallel (rate limiting is hard, as the
+                    # crawl abstraction does not hold for these requests)
+                    etag, mtime = await self._request_resource_version(item.url)
                     # Orphan files are placed in the root folder
-                    tasks.append(self._download_file(PurePath("."), item))
+                    tasks.append(self._download_file(PurePath("."), item, etag, mtime))
 
         await self.gather(tasks)
 
@@ -85,18 +89,36 @@ class KitIpdCrawler(HttpCrawler):
         if not await self.crawl(path):
             return
 
-        tasks = [self._download_file(path, file) for file in folder.files]
+        tasks = []
+        for file in folder.files:
+            # do this here to at least be sequential and not parallel (rate limiting is hard, as the crawl
+            # abstraction does not hold for these requests)
+            etag, mtime = await self._request_resource_version(file.url)
+            tasks.append(self._download_file(path, file, etag, mtime))
 
         await self.gather(tasks)
 
-    async def _download_file(self, parent: PurePath, file: KitIpdFile) -> None:
+    async def _download_file(
+        self,
+        parent: PurePath,
+        file: KitIpdFile,
+        etag: Optional[str],
+        mtime: Optional[datetime]
+    ) -> None:
         element_path = parent / file.name
-        maybe_dl = await self.download(element_path)
+
+        prev_etag = self._get_previous_etag_from_report(element_path)
+        etag_differs = None if prev_etag is None else prev_etag != etag
+
+        maybe_dl = await self.download(element_path, etag_differs=etag_differs, mtime=mtime)
         if not maybe_dl:
+            # keep storing the known file's etag
+            if prev_etag:
+                self._add_etag_to_report(element_path, prev_etag)
             return
 
         async with maybe_dl as (bar, sink):
-            await self._stream_from_url(file.url, sink, bar)
+            await self._stream_from_url(file.url, element_path, sink, bar)
 
     async def _fetch_items(self) -> Set[Union[KitIpdFile, KitIpdFolder]]:
         page, url = await self.get_page()
@@ -146,7 +168,7 @@ class KitIpdCrawler(HttpCrawler):
     def _abs_url_from_link(self, url: str, link_tag: Tag) -> str:
         return urljoin(url, link_tag.get("href"))
 
-    async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar) -> None:
+    async def _stream_from_url(self, url: str, path: PurePath, sink: FileSink, bar: ProgressBar) -> None:
         async with self.session.get(url, allow_redirects=False) as resp:
             if resp.status == 403:
                 raise CrawlError("Received a 403. Are you within the KIT network/VPN?")
@@ -159,6 +181,8 @@ class KitIpdCrawler(HttpCrawler):
 
             sink.done()
 
+            self._add_etag_to_report(path, resp.headers.get("ETag"))
+
     async def get_page(self) -> Tuple[BeautifulSoup, str]:
         async with self.session.get(self._url) as request:
             # The web page for Algorithmen für Routenplanung contains some
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index e9e9b93..09cf133 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -57,6 +57,7 @@ class OnConflict(Enum):
 
 @dataclass
 class Heuristics:
+    etag_differs: Optional[bool]
     mtime: Optional[datetime]
 
 
@@ -233,8 +234,16 @@ class OutputDirectory:
 
         remote_newer = None
 
+        # ETag should be a more reliable indicator than mtime, so we check it first
+        if heuristics.etag_differs is not None:
+            remote_newer = heuristics.etag_differs
+            if remote_newer:
+                log.explain("Remote file's entity tag differs")
+            else:
+                log.explain("Remote file's entity tag is the same")
+
         # Python on Windows crashes when faced with timestamps around the unix epoch
-        if heuristics.mtime and (os.name != "nt" or heuristics.mtime.year > 1970):
+        if remote_newer is None and heuristics.mtime and (os.name != "nt" or heuristics.mtime.year > 1970):
             mtime = heuristics.mtime
             remote_newer = mtime.timestamp() > stat.st_mtime
             if remote_newer:
@@ -366,6 +375,8 @@ class OutputDirectory:
             self,
             remote_path: PurePath,
             path: PurePath,
+            *,
+            etag_differs: Optional[bool] = None,
             mtime: Optional[datetime] = None,
             redownload: Optional[Redownload] = None,
             on_conflict: Optional[OnConflict] = None,
@@ -375,7 +386,7 @@ class OutputDirectory:
         MarkConflictError.
         """
 
-        heuristics = Heuristics(mtime)
+        heuristics = Heuristics(etag_differs, mtime)
         redownload = self._redownload if redownload is None else redownload
         on_conflict = self._on_conflict if on_conflict is None else on_conflict
         local_path = self.resolve(path)

From 8fbd1978affb059f79bab374030afa139b341a6c Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 28 Oct 2024 18:52:09 +0100
Subject: [PATCH 468/524] Fix crawling of nested courses

---
 CHANGELOG.md                           |  1 +
 PFERD/crawl/ilias/ilias_web_crawler.py |  7 ++++---
 PFERD/crawl/ilias/kit_ilias_html.py    | 19 +++++++++++--------
 3 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ce20269..3ee3f43 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ ambiguous situations.
 
 ### Fixed
 - Personal desktop/dashboard/favorites crawling
+- Crawling of nested courses
 
 ## 3.6.0 - 2024-10-23
 
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index a566ce5..1ff4910 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -81,23 +81,24 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
 
 
 _DIRECTORY_PAGES: Set[IliasElementType] = {
+    IliasElementType.COURSE,
     IliasElementType.EXERCISE,
     IliasElementType.EXERCISE_FILES,
     IliasElementType.FOLDER,
     IliasElementType.INFO_TAB,
-    IliasElementType.MEETING,
     IliasElementType.MEDIACAST_VIDEO_FOLDER,
+    IliasElementType.MEETING,
     IliasElementType.OPENCAST_VIDEO_FOLDER,
     IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
 }
 
 _VIDEO_ELEMENTS: Set[IliasElementType] = {
-    IliasElementType.MEDIACAST_VIDEO_FOLDER,
     IliasElementType.MEDIACAST_VIDEO,
+    IliasElementType.MEDIACAST_VIDEO_FOLDER,
     IliasElementType.OPENCAST_VIDEO,
-    IliasElementType.OPENCAST_VIDEO_PLAYER,
     IliasElementType.OPENCAST_VIDEO_FOLDER,
     IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
+    IliasElementType.OPENCAST_VIDEO_PLAYER,
 }
 
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 98b32c3..31107cf 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -15,25 +15,26 @@ TargetType = Union[str, int]
 
 
 class IliasElementType(Enum):
+    BOOKING = "booking"
+    COURSE = "course"
     EXERCISE = "exercise"
     EXERCISE_FILES = "exercise_files"  # own submitted files
-    TEST = "test"  # an online test. Will be ignored currently.
     FILE = "file"
     FOLDER = "folder"
     FORUM = "forum"
-    LINK = "link"
     INFO_TAB = "info_tab"
     LEARNING_MODULE = "learning_module"
-    BOOKING = "booking"
-    MEETING = "meeting"
-    SURVEY = "survey"
-    SCORM_LEARNING_MODULE = "scorm_learning_module"
-    MEDIACAST_VIDEO_FOLDER = "mediacast_video_folder"
+    LINK = "link"
     MEDIACAST_VIDEO = "mediacast_video"
+    MEDIACAST_VIDEO_FOLDER = "mediacast_video_folder"
+    MEETING = "meeting"
     OPENCAST_VIDEO = "opencast_video"
-    OPENCAST_VIDEO_PLAYER = "opencast_video_player"
     OPENCAST_VIDEO_FOLDER = "opencast_video_folder"
     OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED = "opencast_video_folder_maybe_paginated"
+    OPENCAST_VIDEO_PLAYER = "opencast_video_player"
+    SCORM_LEARNING_MODULE = "scorm_learning_module"
+    SURVEY = "survey"
+    TEST = "test"  # an online test. Will be ignored currently.
 
 
 @dataclass
@@ -968,6 +969,8 @@ class IliasPage:
             return IliasElementType.LINK
         if "book" in icon["class"]:
             return IliasElementType.BOOKING
+        if "crsr" in icon["class"]:
+            return IliasElementType.COURSE
         if "frm" in icon["class"]:
             return IliasElementType.FORUM
         if "sess" in icon["class"]:

From c1046498e7ff6ab054c65db4a133af6e53e93f03 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 28 Oct 2024 19:15:40 +0100
Subject: [PATCH 469/524] Fix download of links without a target URL

They are now downloaded as links to the empty url.
---
 CHANGELOG.md                           |  1 +
 PFERD/crawl/ilias/ilias_web_crawler.py | 25 +++++++++++++++++--------
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3ee3f43..8bc6f06 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ ambiguous situations.
 ### Fixed
 - Personal desktop/dashboard/favorites crawling
 - Crawling of nested courses
+- Downloading of links with no target URL
 
 ## 3.6.0 - 2024-10-23
 
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 1ff4910..8fbd90f 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -491,17 +491,26 @@ instance's greatest bottleneck.
             self._write_link_content(link_template, element.url, element.name, element.description, sink)
 
     async def _resolve_link_target(self, export_url: str) -> str:
-        async with self.session.get(export_url, allow_redirects=False) as resp:
-            # No redirect means we were authenticated
-            if hdrs.LOCATION not in resp.headers:
-                return soupify(await resp.read()).select_one("a").get("href").strip()
+        async def impl() -> Optional[str]:
+            async with self.session.get(export_url, allow_redirects=False) as resp:
+                # No redirect means we were authenticated
+                if hdrs.LOCATION not in resp.headers:
+                    return soupify(await resp.read()).select_one("a").get("href").strip()
+                # We are either unauthenticated or the link is not active
+                new_url = resp.headers[hdrs.LOCATION].lower()
+                if "baseclass=illinkresourcehandlergui" in new_url and "cmd=infoscreen" in new_url:
+                    return ""
+                return None
+
+        target = await impl()
+        if target is not None:
+            return target
 
         await self._authenticate()
 
-        async with self.session.get(export_url, allow_redirects=False) as resp:
-            # No redirect means we were authenticated
-            if hdrs.LOCATION not in resp.headers:
-                return soupify(await resp.read()).select_one("a").get("href").strip()
+        target = await impl()
+        if target is not None:
+            return target
 
         raise CrawlError("resolve_link_target failed even after authenticating")
 

From 71c65e89d178cde2e2a625d078eba713139a3601 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 28 Oct 2024 19:31:50 +0100
Subject: [PATCH 470/524] Internalize images in course descriptions

---
 PFERD/crawl/ilias/ilias_web_crawler.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 8fbd90f..08add07 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -476,6 +476,7 @@ instance's greatest bottleneck.
 
         async with dl as (bar, sink):
             description = clean(insert_base_markup(description))
+            description = await self.internalize_images(description)
             sink.file.write(description.prettify().encode("utf-8"))
             sink.done()
 

From d7a2b6e019a994a9e18e00cffe14da2db763e025 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 28 Oct 2024 19:32:16 +0100
Subject: [PATCH 471/524] Delete videos from course descriptions

---
 CHANGELOG.md                            | 3 +++
 PFERD/crawl/ilias/ilias_html_cleaner.py | 5 +++++
 2 files changed, 8 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8bc6f06..f635719 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Changed
+- Remove videos from description pages
+
 ### Fixed
 - Personal desktop/dashboard/favorites crawling
 - Crawling of nested courses
diff --git a/PFERD/crawl/ilias/ilias_html_cleaner.py b/PFERD/crawl/ilias/ilias_html_cleaner.py
index 5495304..0075784 100644
--- a/PFERD/crawl/ilias/ilias_html_cleaner.py
+++ b/PFERD/crawl/ilias/ilias_html_cleaner.py
@@ -85,6 +85,11 @@ def clean(soup: BeautifulSoup) -> BeautifulSoup:
         if isinstance(type(children[0]), Comment):
             dummy.decompose()
 
+    # Delete video figures, as they can not be internalized anyway
+    for video in soup.select(".ilc_media_cont_MediaContainerHighlighted .ilPageVideo"):
+        if figure := video.find_parent("figure"):
+            figure.decompose()
+
     for hrule_imposter in soup.find_all(class_="ilc_section_Separator"):
         hrule_imposter.insert(0, soup.new_tag("hr"))
 

From 81d6ff53c43f0ed7cc49f66c5505f36c0bf0f1b3 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 28 Oct 2024 19:34:45 +0100
Subject: [PATCH 472/524] Respect row flex in descriptions

---
 CHANGELOG.md                            | 1 +
 PFERD/crawl/ilias/ilias_html_cleaner.py | 7 +++++++
 2 files changed, 8 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f635719..e14f785 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ ambiguous situations.
 - Personal desktop/dashboard/favorites crawling
 - Crawling of nested courses
 - Downloading of links with no target URL
+- Handle row flex on description pages
 
 ## 3.6.0 - 2024-10-23
 
diff --git a/PFERD/crawl/ilias/ilias_html_cleaner.py b/PFERD/crawl/ilias/ilias_html_cleaner.py
index 0075784..e82906f 100644
--- a/PFERD/crawl/ilias/ilias_html_cleaner.py
+++ b/PFERD/crawl/ilias/ilias_html_cleaner.py
@@ -12,6 +12,13 @@ _STYLE_TAG_CONTENT = """
       font-weight: bold;
     }
 
+    .row-flex {
+      display: flex;
+    }
+    .row-flex-wrap {
+      flex-wrap: wrap;
+    }
+
     .accordion-head {
       background-color: #f5f7fa;
       padding: 0.5rem 0;

From fa71a9f44fe11a367a396b0cd80b745fe7ef6fe8 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 28 Oct 2024 20:15:55 +0100
Subject: [PATCH 473/524] Add support for mob videos in page descriptions

---
 CHANGELOG.md                           |  3 +++
 PFERD/crawl/ilias/ilias_web_crawler.py | 16 ++++++++++---
 PFERD/crawl/ilias/kit_ilias_html.py    | 33 ++++++++++++++++++++++++++
 3 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e14f785..d9431bc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Added
+- Support for MOB videos in page descriptions
+
 ### Changed
 - Remove videos from description pages
 
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 08add07..73fed9c 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -389,6 +389,8 @@ instance's greatest bottleneck.
             return await self._handle_opencast_video(element, element_path)
         elif element.type == IliasElementType.MEDIACAST_VIDEO:
             return await self._handle_file(element, element_path)
+        elif element.type == IliasElementType.MOB_VIDEO:
+            return await self._handle_file(element, element_path, is_video=True)
         elif element.type in _DIRECTORY_PAGES:
             return await self._handle_ilias_page(element.url, element, element_path)
         else:
@@ -631,18 +633,19 @@ instance's greatest bottleneck.
         self,
         element: IliasPageElement,
         element_path: PurePath,
+        is_video: bool = False,
     ) -> Optional[Coroutine[Any, Any, None]]:
         maybe_dl = await self.download(element_path, mtime=element.mtime)
         if not maybe_dl:
             return None
-        return self._download_file(element, maybe_dl)
+        return self._download_file(element, maybe_dl, is_video)
 
     @_iorepeat(3, "downloading file")
     @anoncritical
-    async def _download_file(self, element: IliasPageElement, dl: DownloadToken) -> None:
+    async def _download_file(self, element: IliasPageElement, dl: DownloadToken, is_video: bool) -> None:
         assert dl  # The function is only reached when dl is not None
         async with dl as (bar, sink):
-            await self._stream_from_url(element.url, sink, bar, is_video=False)
+            await self._stream_from_url(element.url, sink, bar, is_video)
 
     async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar, is_video: bool) -> None:
         async def try_stream() -> bool:
@@ -671,6 +674,13 @@ instance's greatest bottleneck.
                 if is_video and "html" in resp.content_type:
                     return False
 
+                # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Range
+                if content_range := resp.headers.get(hdrs.CONTENT_RANGE, default=None):
+                    parts = content_range.split("/")
+                    if len(parts) == 2 and parts[1].isdigit():
+                        bar.set_total(int(parts[1]))
+
+                # Prefer the content length header
                 if resp.content_length:
                     bar.set_total(resp.content_length)
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 31107cf..e0c87ad 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -28,6 +28,7 @@ class IliasElementType(Enum):
     MEDIACAST_VIDEO = "mediacast_video"
     MEDIACAST_VIDEO_FOLDER = "mediacast_video_folder"
     MEETING = "meeting"
+    MOB_VIDEO = "mob_video"
     OPENCAST_VIDEO = "opencast_video"
     OPENCAST_VIDEO_FOLDER = "opencast_video_folder"
     OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED = "opencast_video_folder_maybe_paginated"
@@ -745,6 +746,7 @@ class IliasPage:
 
         result += self._find_cards()
         result += self._find_mediacast_videos()
+        result += self._find_mob_videos()
 
         return result
 
@@ -773,6 +775,37 @@ class IliasPage:
 
         return videos
 
+    def _find_mob_videos(self) -> List[IliasPageElement]:
+        videos: List[IliasPageElement] = []
+
+        for figure in self._soup.select("figure.ilc_media_cont_MediaContainerHighlighted"):
+            title = figure.select_one("figcaption").getText().strip() + ".mp4"
+            video_element = figure.select_one("video")
+            if not video_element:
+                _unexpected_html_warning()
+                log.warn_contd(f"No <video> element found for mob video '{title}'")
+                continue
+
+            url = None
+            for source in video_element.select("source"):
+                if source.get("type", "") == "video/mp4":
+                    url = source.get("src")
+                    break
+
+            if url is None:
+                _unexpected_html_warning()
+                log.warn_contd(f"No <source> element found for mob video '{title}'")
+                continue
+
+            videos.append(IliasPageElement.create_new(
+                typ=IliasElementType.MOB_VIDEO,
+                url=self._abs_url_from_relative(url),
+                name=_sanitize_path_name(title),
+                mtime=None
+            ))
+
+        return videos
+
     def _find_mediacast_video_mtime(self, enclosing_td: Tag) -> Optional[datetime]:
         description_td: Tag = enclosing_td.findPreviousSibling("td")
         if not description_td:

From f5273f7ca0c7a899bac2251aaa8087196db77c5e Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 2 Nov 2024 21:53:47 +0100
Subject: [PATCH 474/524] Collapse ilias url crawling into normal page crawling

---
 PFERD/crawl/ilias/ilias_web_crawler.py | 74 ++++++--------------------
 1 file changed, 16 insertions(+), 58 deletions(-)

diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 73fed9c..14dde89 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -191,79 +191,28 @@ instance's greatest bottleneck.
         )
 
     async def _crawl_url(self, url: str, expected_id: Optional[int] = None) -> None:
-        maybe_cl = await self.crawl(PurePath("."))
-        if not maybe_cl:
-            return
-        cl = maybe_cl  # Not mypy's fault, but explained here: https://github.com/python/mypy/issues/2608
-
-        elements: List[IliasPageElement] = []
-        # A list as variable redefinitions are not propagated to outer scopes
-        description: List[BeautifulSoup] = []
-
-        @_iorepeat(3, "crawling url")
-        async def gather_elements() -> None:
-            elements.clear()
-            async with cl:
-                next_stage_url: Optional[str] = url
-                current_parent = None
-
-                # Duplicated code, but the root page is special - we want to avoid fetching it twice!
-                while next_stage_url:
-                    soup = await self._get_page(next_stage_url, root_page_allowed=True)
-
-                    if current_parent is None and expected_id is not None:
-                        perma_link = IliasPage.get_soup_permalink(soup)
-                        if not perma_link or "crs_" not in perma_link:
-                            raise CrawlError("Invalid course id? Didn't find anything looking like a course")
-
-                    log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
-                    log.explain(f"URL: {next_stage_url}")
-                    page = IliasPage(soup, next_stage_url, current_parent)
-                    if next_element := page.get_next_stage_element():
-                        current_parent = next_element
-                        next_stage_url = next_element.url
-                    else:
-                        next_stage_url = None
-
-                elements.extend(page.get_child_elements())
-                if info_tab := page.get_info_tab():
-                    elements.append(info_tab)
-                if description_string := page.get_description():
-                    description.append(description_string)
-
-        # Fill up our task list with the found elements
-        await gather_elements()
-
-        if description:
-            await self._download_description(PurePath("."), description[0])
-
-        elements.sort(key=lambda e: e.id())
-
-        tasks: List[Awaitable[None]] = []
-        for element in elements:
-            if handle := await self._handle_ilias_element(PurePath("."), element):
-                tasks.append(asyncio.create_task(handle))
-
-        # And execute them
-        await self.gather(tasks)
+        if awaitable := await self._handle_ilias_page(url, None, PurePath("."), expected_id):
+            await awaitable
 
     async def _handle_ilias_page(
         self,
         url: str,
-        parent: IliasPageElement,
+        parent: Optional[IliasPageElement],
         path: PurePath,
+        expected_course_id: Optional[int] = None,
     ) -> Optional[Coroutine[Any, Any, None]]:
         maybe_cl = await self.crawl(path)
         if not maybe_cl:
             return None
-        return self._crawl_ilias_page(url, parent, maybe_cl)
+        return self._crawl_ilias_page(url, parent, maybe_cl, expected_course_id)
 
     @anoncritical
     async def _crawl_ilias_page(
         self,
         url: str,
-        parent: IliasPageElement,
+        parent: Optional[IliasPageElement],
         cl: CrawlToken,
+        expected_course_id: Optional[int] = None,
     ) -> None:
         elements: List[IliasPageElement] = []
         # A list as variable redefinitions are not propagated to outer scopes
@@ -280,6 +229,15 @@ instance's greatest bottleneck.
                     soup = await self._get_page(next_stage_url)
                     log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
                     log.explain(f"URL: {next_stage_url}")
+
+                    # If we expect to find a root course, enforce it
+                    if current_parent is None and expected_course_id is not None:
+                        perma_link = IliasPage.get_soup_permalink(soup)
+                        if not perma_link or "crs_" not in perma_link:
+                            raise CrawlError("Invalid course id? Didn't find anything looking like a course")
+                        if str(expected_course_id) not in perma_link:
+                            raise CrawlError(f"Expected course id {expected_course_id} but got {perma_link}")
+
                     page = IliasPage(soup, next_stage_url, current_parent)
                     if next_element := page.get_next_stage_element():
                         current_parent = next_element

From f5c4e828160cf408fcaffd1300ed5920976a8580 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 2 Nov 2024 22:17:26 +0100
Subject: [PATCH 475/524] Delay ilias loop detection after transform

This allows users to filter out duplicated elements and suppress the
warning.
---
 CHANGELOG.md                           |  2 ++
 PFERD/crawl/ilias/ilias_web_crawler.py | 36 +++++++++++++++++---------
 2 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d9431bc..3926f7a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,8 @@ ambiguous situations.
 
 ### Changed
 - Remove videos from description pages
+- Perform ILIAS cycle detection after processing the transform to allow
+  ignoring duplicated elements
 
 ### Fixed
 - Personal desktop/dashboard/favorites crawling
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 14dde89..941b265 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -197,20 +197,23 @@ instance's greatest bottleneck.
     async def _handle_ilias_page(
         self,
         url: str,
-        parent: Optional[IliasPageElement],
+        current_element: Optional[IliasPageElement],
         path: PurePath,
         expected_course_id: Optional[int] = None,
     ) -> Optional[Coroutine[Any, Any, None]]:
         maybe_cl = await self.crawl(path)
         if not maybe_cl:
             return None
-        return self._crawl_ilias_page(url, parent, maybe_cl, expected_course_id)
+        if current_element:
+            self._ensure_not_seen(current_element, path)
+
+        return self._crawl_ilias_page(url, current_element, maybe_cl, expected_course_id)
 
     @anoncritical
     async def _crawl_ilias_page(
         self,
         url: str,
-        parent: Optional[IliasPageElement],
+        current_element: Optional[IliasPageElement],
         cl: CrawlToken,
         expected_course_id: Optional[int] = None,
     ) -> None:
@@ -223,7 +226,7 @@ instance's greatest bottleneck.
             elements.clear()
             async with cl:
                 next_stage_url: Optional[str] = url
-                current_parent = parent
+                current_parent = current_element
 
                 while next_stage_url:
                     soup = await self._get_page(next_stage_url)
@@ -276,14 +279,6 @@ instance's greatest bottleneck.
         parent_path: PurePath,
         element: IliasPageElement,
     ) -> Optional[Coroutine[Any, Any, None]]:
-        if element.url in self._visited_urls:
-            raise CrawlWarning(
-                f"Found second path to element {element.name!r} at {element.url!r}. "
-                + f"First path: {fmt_path(self._visited_urls[element.url])}. "
-                + f"Second path: {fmt_path(parent_path)}."
-            )
-        self._visited_urls[element.url] = parent_path
-
         # element.name might contain `/` if the crawler created nested elements,
         # so we can not sanitize it here. We trust in the output dir to thwart worst-case
         # directory escape attacks.
@@ -424,6 +419,8 @@ instance's greatest bottleneck.
         if not maybe_dl:
             return None
 
+        self._ensure_not_seen(element, element_path)
+
         return self._download_booking(element, link_template_maybe, maybe_dl)
 
     @anoncritical
@@ -498,6 +495,8 @@ instance's greatest bottleneck.
         if not maybe_dl:
             return None
 
+        self._ensure_not_seen(element, element_path)
+
         # If we have every file from the cached mapping already, we can ignore this and bail
         if self._all_opencast_videos_locally_present(element, maybe_dl.path):
             # Mark all existing videos as known to ensure they do not get deleted during cleanup.
@@ -596,6 +595,8 @@ instance's greatest bottleneck.
         maybe_dl = await self.download(element_path, mtime=element.mtime)
         if not maybe_dl:
             return None
+        self._ensure_not_seen(element, element_path)
+
         return self._download_file(element, maybe_dl, is_video)
 
     @_iorepeat(3, "downloading file")
@@ -731,6 +732,8 @@ instance's greatest bottleneck.
         maybe_cl = await self.crawl(element_path)
         if not maybe_cl:
             return None
+        self._ensure_not_seen(element, element_path)
+
         return self._crawl_learning_module(element, maybe_cl)
 
     @_iorepeat(3, "crawling learning module")
@@ -853,6 +856,15 @@ instance's greatest bottleneck.
                 elem.attrs["src"] = "https:" + elem.attrs["src"]
         return tag
 
+    def _ensure_not_seen(self, element: IliasPageElement, parent_path: PurePath) -> None:
+        if element.url in self._visited_urls:
+            raise CrawlWarning(
+                f"Found second path to element {element.name!r} at {element.url!r}. "
+                + f"First path: {fmt_path(self._visited_urls[element.url])}. "
+                + f"Second path: {fmt_path(parent_path)}."
+            )
+        self._visited_urls[element.url] = parent_path
+
     async def _get_page(self, url: str, root_page_allowed: bool = False) -> BeautifulSoup:
         auth_id = await self._current_auth_id()
         async with self.session.get(url) as request:

From 26e802d88b367e5bcc9f72ab6d40b4f107dd9ca4 Mon Sep 17 00:00:00 2001
From: Tim <me@scriptim.dev>
Date: Mon, 4 Nov 2024 00:32:32 +0100
Subject: [PATCH 476/524] Add clickable links to file names in the printed
 report (#100)

Co-authored-by: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
---
 CHANGELOG.md           |  1 +
 PFERD/crawl/crawler.py |  4 ++++
 PFERD/pferd.py         | 14 ++++++++++----
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3926f7a..c6c9cb9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ ambiguous situations.
 
 ### Added
 - Support for MOB videos in page descriptions
+- Clickable links in the report to directly open new/modified/not-deleted files
 
 ### Changed
 - Remove videos from description pages
diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index dd500e6..fda1307 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -258,6 +258,10 @@ class Crawler(ABC):
     def prev_report(self) -> Optional[Report]:
         return self._output_dir.prev_report
 
+    @property
+    def output_dir(self) -> OutputDirectory:
+        return self._output_dir
+
     @staticmethod
     async def gather(awaitables: Sequence[Awaitable[Any]]) -> List[Any]:
         """
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index b30a04a..850e68e 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -1,5 +1,6 @@
-from pathlib import Path
+from pathlib import Path, PurePath
 from typing import Dict, List, Optional
+from urllib.parse import quote
 
 from rich.markup import escape
 
@@ -168,19 +169,24 @@ class Pferd:
             log.report("")
             log.report(f"[bold bright_cyan]Report[/] for {escape(name)}")
 
+            def fmt_path_link(relative_path: PurePath) -> str:
+                # We need to URL-encode the path because it might contain spaces or special characters
+                link = f"file://{quote(str(crawler.output_dir.resolve(relative_path).absolute()))}"
+                return f"[link={link}]{fmt_path(relative_path)}[/link]"
+
             something_changed = False
             for path in sorted(crawler.report.added_files):
                 something_changed = True
-                log.report(f"  [bold bright_green]Added[/] {fmt_path(path)}")
+                log.report(f"  [bold bright_green]Added[/] {fmt_path_link(path)}")
             for path in sorted(crawler.report.changed_files):
                 something_changed = True
-                log.report(f"  [bold bright_yellow]Changed[/] {fmt_path(path)}")
+                log.report(f"  [bold bright_yellow]Changed[/] {fmt_path_link(path)}")
             for path in sorted(crawler.report.deleted_files):
                 something_changed = True
                 log.report(f"  [bold bright_magenta]Deleted[/] {fmt_path(path)}")
             for path in sorted(crawler.report.not_deleted_files):
                 something_changed = True
-                log.report_not_deleted(f"  [bold bright_magenta]Not deleted[/] {fmt_path(path)}")
+                log.report_not_deleted(f"  [bold bright_magenta]Not deleted[/] {fmt_path_link(path)}")
 
             for warning in crawler.report.encountered_warnings:
                 something_changed = True

From 59832002470b8691f89e1bf822cee56c8e03ee10 Mon Sep 17 00:00:00 2001
From: Tim <me@scriptim.dev>
Date: Mon, 4 Nov 2024 23:53:48 +0100
Subject: [PATCH 477/524] Treat headings as folders in kit-ipd crawler (#99)

---
 CHANGELOG.md                   |  1 +
 PFERD/crawl/http_crawler.py    | 26 +++++++++++
 PFERD/crawl/kit_ipd_crawler.py | 84 ++++++++++++++++------------------
 3 files changed, 66 insertions(+), 45 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c6c9cb9..12cda26 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,6 +30,7 @@ ambiguous situations.
 - Remove videos from description pages
 - Perform ILIAS cycle detection after processing the transform to allow
   ignoring duplicated elements
+- Parse headings (h1-h3) as folders in kit-ipd crawler
 
 ### Fixed
 - Personal desktop/dashboard/favorites crawling
diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index 39b22f3..fe8a360 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -8,6 +8,7 @@ from typing import Any, Dict, List, Optional, Tuple
 import aiohttp
 import certifi
 from aiohttp.client import ClientTimeout
+from bs4 import Tag
 
 from ..auth import Authenticator
 from ..config import Config
@@ -172,6 +173,31 @@ class HttpCrawler(Crawler):
             log.warn(f"Failed to save cookies to {fmt_real_path(self._cookie_jar_path)}")
             log.warn(str(e))
 
+    @staticmethod
+    def get_folder_structure_from_heading_hierarchy(file_link: Tag, drop_h1: bool = False) -> PurePath:
+        """
+        Retrieves the hierarchy of headings associated with the give file link and constructs a folder
+        structure from them.
+
+        <h1> level headings usually only appear once and serve as the page title, so they would introduce
+        redundant nesting. To avoid this, <h1> headings are ignored via the drop_h1 parameter.
+        """
+
+        def find_associated_headings(tag: Tag, level: int) -> PurePath:
+            if level == 0 or (level == 1 and drop_h1):
+                return PurePath()
+
+            level_heading = tag.find_previous(name=f"h{level}")
+
+            if level_heading is None:
+                return find_associated_headings(tag, level - 1)
+
+            folder_name = level_heading.getText().strip()
+            return find_associated_headings(level_heading, level - 1) / folder_name
+
+        # start at level <h3> because paragraph-level headings are usually too granular for folder names
+        return find_associated_headings(file_link, 3)
+
     def _get_previous_etag_from_report(self, path: PurePath) -> Optional[str]:
         """
         If available, retrieves the entity tag for a given path which was stored in the previous report.
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index d9515e2..e1d13a7 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -3,7 +3,7 @@ import re
 from dataclasses import dataclass
 from datetime import datetime
 from pathlib import PurePath
-from typing import Awaitable, List, Optional, Pattern, Set, Tuple, Union
+from typing import Any, Awaitable, Generator, Iterable, List, Optional, Pattern, Tuple, Union
 from urllib.parse import urljoin
 
 from bs4 import BeautifulSoup, Tag
@@ -32,24 +32,24 @@ class KitIpdCrawlerSection(HttpCrawlerSection):
         return re.compile(regex)
 
 
-@dataclass(unsafe_hash=True)
+@dataclass
 class KitIpdFile:
     name: str
     url: str
 
+    def explain(self) -> None:
+        log.explain(f"File {self.name!r} (href={self.url!r})")
+
 
 @dataclass
 class KitIpdFolder:
     name: str
-    files: List[KitIpdFile]
+    entries: List[Union[KitIpdFile, "KitIpdFolder"]]
 
     def explain(self) -> None:
         log.explain_topic(f"Folder {self.name!r}")
-        for file in self.files:
-            log.explain(f"File {file.name!r} (href={file.url!r})")
-
-    def __hash__(self) -> int:
-        return self.name.__hash__()
+        for entry in self.entries:
+            entry.explain()
 
 
 class KitIpdCrawler(HttpCrawler):
@@ -73,28 +73,33 @@ class KitIpdCrawler(HttpCrawler):
 
         async with maybe_cl:
             for item in await self._fetch_items():
+                item.explain()
                 if isinstance(item, KitIpdFolder):
-                    tasks.append(self._crawl_folder(item))
+                    tasks.append(self._crawl_folder(PurePath("."), item))
                 else:
+                    log.explain_topic(f"Orphan file {item.name!r} (href={item.url!r})")
+                    log.explain("Attributing it to root folder")
                     # do this here to at least be sequential and not parallel (rate limiting is hard, as the
                     # crawl abstraction does not hold for these requests)
                     etag, mtime = await self._request_resource_version(item.url)
-                    # Orphan files are placed in the root folder
                     tasks.append(self._download_file(PurePath("."), item, etag, mtime))
 
         await self.gather(tasks)
 
-    async def _crawl_folder(self, folder: KitIpdFolder) -> None:
-        path = PurePath(folder.name)
+    async def _crawl_folder(self, parent: PurePath, folder: KitIpdFolder) -> None:
+        path = parent / folder.name
         if not await self.crawl(path):
             return
 
         tasks = []
-        for file in folder.files:
-            # do this here to at least be sequential and not parallel (rate limiting is hard, as the crawl
-            # abstraction does not hold for these requests)
-            etag, mtime = await self._request_resource_version(file.url)
-            tasks.append(self._download_file(path, file, etag, mtime))
+        for entry in folder.entries:
+            if isinstance(entry, KitIpdFolder):
+                tasks.append(self._crawl_folder(path, entry))
+            else:
+                # do this here to at least be sequential and not parallel (rate limiting is hard, as the crawl
+                # abstraction does not hold for these requests)
+                etag, mtime = await self._request_resource_version(entry.url)
+                tasks.append(self._download_file(path, entry, etag, mtime))
 
         await self.gather(tasks)
 
@@ -120,42 +125,31 @@ class KitIpdCrawler(HttpCrawler):
         async with maybe_dl as (bar, sink):
             await self._stream_from_url(file.url, element_path, sink, bar)
 
-    async def _fetch_items(self) -> Set[Union[KitIpdFile, KitIpdFolder]]:
+    async def _fetch_items(self) -> Iterable[Union[KitIpdFile, KitIpdFolder]]:
         page, url = await self.get_page()
         elements: List[Tag] = self._find_file_links(page)
-        items: Set[Union[KitIpdFile, KitIpdFolder]] = set()
 
+        # do not add unnecessary nesting for a single <h1> heading
+        drop_h1: bool = len(page.find_all(name="h1")) <= 1
+
+        folder_tree: KitIpdFolder = KitIpdFolder(".", [])
         for element in elements:
-            folder_label = self._find_folder_label(element)
-            if folder_label:
-                folder = self._extract_folder(folder_label, url)
-                if folder not in items:
-                    items.add(folder)
-                    folder.explain()
-            else:
-                file = self._extract_file(element, url)
-                items.add(file)
-                log.explain_topic(f"Orphan file {file.name!r} (href={file.url!r})")
-                log.explain("Attributing it to root folder")
+            parent = HttpCrawler.get_folder_structure_from_heading_hierarchy(element, drop_h1)
+            file = self._extract_file(element, url)
 
-        return items
+            current_folder: KitIpdFolder = folder_tree
+            for folder_name in parent.parts:
+                # helps the type checker to verify that current_folder is indeed a folder
+                def subfolders() -> Generator[KitIpdFolder, Any, None]:
+                    return (entry for entry in current_folder.entries if isinstance(entry, KitIpdFolder))
 
-    def _extract_folder(self, folder_tag: Tag, url: str) -> KitIpdFolder:
-        files: List[KitIpdFile] = []
-        name = folder_tag.getText().strip()
+                if not any(entry.name == folder_name for entry in subfolders()):
+                    current_folder.entries.append(KitIpdFolder(folder_name, []))
+                current_folder = next(entry for entry in subfolders() if entry.name == folder_name)
 
-        container: Tag = folder_tag.findNextSibling(name="table")
-        for link in self._find_file_links(container):
-            files.append(self._extract_file(link, url))
+            current_folder.entries.append(file)
 
-        return KitIpdFolder(name, files)
-
-    @staticmethod
-    def _find_folder_label(file_link: Tag) -> Optional[Tag]:
-        enclosing_table: Tag = file_link.findParent(name="table")
-        if enclosing_table is None:
-            return None
-        return enclosing_table.findPreviousSibling(name=re.compile("^h[1-6]$"))
+        return folder_tree.entries
 
     def _extract_file(self, link: Tag, url: str) -> KitIpdFile:
         url = self._abs_url_from_link(url, link)

From 596b6a7688a5101ec6e44a13f602c4673eb5e8e0 Mon Sep 17 00:00:00 2001
From: PinieP <59698589+PinieP@users.noreply.github.com>
Date: Tue, 5 Nov 2024 18:30:34 +0100
Subject: [PATCH 478/524] Add support for non-KIT shibboleth login (#98)

Co-authored-by: Mr-Pine <git@mr-pine.de>
Co-authored-by: I-Al-Istannen <I-Al-Istannen@users.noreply.github.com>
---
 CHANGELOG.md                               |   1 +
 CONFIG.md                                  |  21 ++-
 LICENSE                                    |   2 +-
 PFERD/crawl/http_crawler.py                |   7 +-
 PFERD/crawl/ilias/async_helper.py          |   3 +-
 PFERD/crawl/ilias/ilias_web_crawler.py     |  98 ++++++----
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 210 +--------------------
 PFERD/crawl/ilias/shibboleth_login.py      | 128 +++++++++++++
 8 files changed, 226 insertions(+), 244 deletions(-)
 create mode 100644 PFERD/crawl/ilias/shibboleth_login.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 12cda26..8024bba 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ ambiguous situations.
 ### Added
 - Support for MOB videos in page descriptions
 - Clickable links in the report to directly open new/modified/not-deleted files
+- Support for non KIT shibboleth login
 
 ### Changed
 - Remove videos from description pages
diff --git a/CONFIG.md b/CONFIG.md
index a52506d..9b79be8 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -163,12 +163,13 @@ out of the box for the corresponding universities:
 
 [ilias-dl]: https://github.com/V3lop5/ilias-downloader/blob/main/configs "ilias-downloader configs"
 
-| University    | `base_url`                           | `client_id`   |
-|---------------|--------------------------------------|---------------|
-| FH Aachen     | https://www.ili.fh-aachen.de         | elearning     |
-| Uni Köln      | https://www.ilias.uni-koeln.de/ilias | uk            |
-| Uni Konstanz  | https://ilias.uni-konstanz.de        | ILIASKONSTANZ |
-| Uni Stuttgart | https://ilias3.uni-stuttgart.de      | Uni_Stuttgart |
+| University    | `base_url`                              | `login_type` | `client_id`   |
+|---------------|-----------------------------------------|--------------|---------------|
+| FH Aachen     | https://www.ili.fh-aachen.de            | local        | elearning     |
+| Uni Köln      | https://www.ilias.uni-koeln.de/ilias    | local        | uk            |
+| Uni Konstanz  | https://ilias.uni-konstanz.de           | local        | ILIASKONSTANZ |
+| Uni Stuttgart | https://ilias3.uni-stuttgart.de         | local        | Uni_Stuttgart |
+| Uni Tübingen  | https://ovidius.uni-tuebingen.de/ilias3 | shibboleth   |               |
 
 If your university isn't listed, try navigating to your instance's login page.
 Assuming no custom login service is used, the URL will look something like this:
@@ -180,7 +181,11 @@ Assuming no custom login service is used, the URL will look something like this:
 If the values work, feel free to submit a PR and add them to the table above.
 
 - `base_url`: The URL where the ILIAS instance is located. (Required)
-- `client_id`: An ID used for authentication. (Required)
+- `login_type`: How you authenticate. (Required)
+    - `local`: Use `client_id` for authentication.
+    - `shibboleth`: Use shibboleth for authentication.
+- `client_id`: An ID used for authentication if `login_type` is `local`. Is
+  ignored if `login_type` is `shibboleth`.
 - `target`: The ILIAS element to crawl. (Required)
     - `desktop`: Crawl your personal desktop / dashboard
     - `<course id>`: Crawl the course with the given id
@@ -191,6 +196,8 @@ If the values work, feel free to submit a PR and add them to the table above.
       and duplication warnings if you are a member of an ILIAS group. The
       `desktop` target is generally preferable.
 - `auth`: Name of auth section to use for login. (Required)
+- `tfa_auth`: Name of auth section to use for two-factor authentication. Only
+  uses the auth section's password. (Default: Anonymous `tfa` authenticator)
 - `links`: How to represent external links. (Default: `fancy`)
     - `ignore`: Don't download links.
     - `plaintext`: A text file containing only the URL.
diff --git a/LICENSE b/LICENSE
index 13fa307..ccccbe3 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 Copyright 2019-2024 Garmelon, I-Al-Istannen, danstooamerican, pavelzw,
                     TheChristophe, Scriptim, thelukasprobst, Toorero,
-                    Mr-Pine, p-fruck
+                    Mr-Pine, p-fruck, PinieP
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index fe8a360..2cc97e1 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -262,7 +262,12 @@ class HttpCrawler(Crawler):
                     connect=self._http_timeout,
                     sock_connect=self._http_timeout,
                     sock_read=self._http_timeout,
-                )
+                ),
+                # See https://github.com/aio-libs/aiohttp/issues/6626
+                # Without this aiohttp will mangle the redirect header from Shibboleth, invalidating the
+                # passed signature. Shibboleth will not accept the broken signature and authentication will
+                # fail.
+                requote_redirect_url=False
         ) as session:
             self.session = session
             try:
diff --git a/PFERD/crawl/ilias/async_helper.py b/PFERD/crawl/ilias/async_helper.py
index 527a819..5e586b1 100644
--- a/PFERD/crawl/ilias/async_helper.py
+++ b/PFERD/crawl/ilias/async_helper.py
@@ -25,9 +25,10 @@ def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Calla
                 except asyncio.exceptions.TimeoutError as e:  # explicit http timeouts in HttpCrawler
                     last_exception = e
                 log.explain_topic(f"Retrying operation {name}. Retries left: {attempts - 1 - round}")
+                log.explain(f"Last exception: {last_exception!r}")
 
             if last_exception:
-                message = f"Error in I/O Operation: {last_exception}"
+                message = f"Error in I/O Operation: {last_exception!r}"
                 if failure_is_error:
                     raise CrawlError(message) from last_exception
                 else:
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 941b265..a6c68f1 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -23,10 +23,16 @@ from .file_templates import Links, learning_module_template
 from .ilias_html_cleaner import clean, insert_base_markup
 from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
                              IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
+from .shibboleth_login import ShibbolethLogin
 
 TargetType = Union[str, int]
 
 
+class LoginTypeLocal:
+    def __init__(self, client_id: str):
+        self.client_id = client_id
+
+
 class IliasWebCrawlerSection(HttpCrawlerSection):
     def base_url(self) -> str:
         base_url = self.s.get("base_url")
@@ -35,12 +41,30 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
 
         return base_url
 
-    def client_id(self) -> str:
-        client_id = self.s.get("client_id")
-        if not client_id:
-            self.missing_value("client_id")
+    def login(self) -> Union[Literal["shibboleth"], LoginTypeLocal]:
+        login_type = self.s.get("login_type")
+        if not login_type:
+            self.missing_value("login_type")
+        if login_type == "shibboleth":
+            return "shibboleth"
+        if login_type == "local":
+            client_id = self.s.get("client_id")
+            if not client_id:
+                self.missing_value("client_id")
+            return LoginTypeLocal(client_id)
 
-        return client_id
+        self.invalid_value("login_type", login_type, "Should be <shibboleth | local>")
+
+    def tfa_auth(
+        self, authenticators: Dict[str, Authenticator]
+    ) -> Optional[Authenticator]:
+        value: Optional[str] = self.s.get("tfa_auth")
+        if value is None:
+            return None
+        auth = authenticators.get(value)
+        if auth is None:
+            self.invalid_value("tfa_auth", value, "No such auth section exists")
+        return auth
 
     def target(self) -> TargetType:
         target = self.s.get("target")
@@ -156,7 +180,13 @@ instance's greatest bottleneck.
 
         self._auth = auth
         self._base_url = section.base_url()
-        self._client_id = section.client_id()
+        self._tfa_auth = section.tfa_auth(authenticators)
+
+        self._login_type = section.login()
+        if isinstance(self._login_type, LoginTypeLocal):
+            self._client_id = self._login_type.client_id
+        else:
+            self._shibboleth_login = ShibbolethLogin(self._base_url, self._auth, self._tfa_auth)
 
         self._target = section.target()
         self._link_file_redirect_delay = section.link_redirect_delay()
@@ -179,7 +209,7 @@ instance's greatest bottleneck.
     async def _crawl_course(self, course_id: int) -> None:
         # Start crawling at the given course
         root_url = url_set_query_param(
-            urljoin(self._base_url, "/goto.php"),
+            urljoin(self._base_url + "/", "goto.php"),
             "target", f"crs_{course_id}",
         )
 
@@ -460,11 +490,12 @@ instance's greatest bottleneck.
                     return ""
                 return None
 
+        auth_id = await self._current_auth_id()
         target = await impl()
         if target is not None:
             return target
 
-        await self._authenticate()
+        await self.authenticate(auth_id)
 
         target = await impl()
         if target is not None:
@@ -935,38 +966,39 @@ instance's greatest bottleneck.
                 return await request.read()
         raise CrawlError("get_authenticated failed even after authenticating")
 
-    # ToDo: Is iorepeat still required?
-    @_iorepeat(3, "Login", failure_is_error=True)
     async def _authenticate(self) -> None:
         # fill the session with the correct cookies
-        params = {
-            "client_id": self._client_id,
-            "cmd": "force_login",
-        }
-        async with self.session.get(urljoin(self._base_url, "/login.php"), params=params) as request:
-            login_page = soupify(await request.read())
+        if self._login_type == "shibboleth":
+            await self._shibboleth_login.login(self.session)
+        else:
+            params = {
+                "client_id": self._client_id,
+                "cmd": "force_login",
+            }
+            async with self.session.get(urljoin(self._base_url, "/login.php"), params=params) as request:
+                login_page = soupify(await request.read())
 
-        login_form = login_page.find("form", attrs={"name": "formlogin"})
-        if login_form is None:
-            raise CrawlError("Could not find the login form! Specified client id might be invalid.")
+            login_form = login_page.find("form", attrs={"name": "formlogin"})
+            if login_form is None:
+                raise CrawlError("Could not find the login form! Specified client id might be invalid.")
 
-        login_url = login_form.attrs.get("action")
-        if login_url is None:
-            raise CrawlError("Could not find the action URL in the login form!")
+            login_url = login_form.attrs.get("action")
+            if login_url is None:
+                raise CrawlError("Could not find the action URL in the login form!")
 
-        username, password = await self._auth.credentials()
+            username, password = await self._auth.credentials()
 
-        login_data = {
-            "username": username,
-            "password": password,
-            "cmd[doStandardAuthentication]": "Login",
-        }
+            login_data = {
+                "username": username,
+                "password": password,
+                "cmd[doStandardAuthentication]": "Login",
+            }
 
-        # do the actual login
-        async with self.session.post(urljoin(self._base_url, login_url), data=login_data) as request:
-            soup = soupify(await request.read())
-            if not self._is_logged_in(soup):
-                self._auth.invalidate_credentials()
+            # do the actual login
+            async with self.session.post(urljoin(self._base_url, login_url), data=login_data) as request:
+                soup = soupify(await request.read())
+                if not self._is_logged_in(soup):
+                    self._auth.invalidate_credentials()
 
     @staticmethod
     def _is_logged_in(soup: BeautifulSoup) -> bool:
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 558221d..fc1d58f 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -1,23 +1,14 @@
-from typing import Any, Dict, Optional, Union
+from typing import Dict, Literal
 
-import aiohttp
-import yarl
-from bs4 import BeautifulSoup
-
-from ...auth import Authenticator, TfaAuthenticator
+from ...auth import Authenticator
 from ...config import Config
-from ...logging import log
-from ...utils import soupify
-from ..crawler import CrawlError, CrawlWarning
-from .async_helper import _iorepeat
 from .ilias_web_crawler import IliasWebCrawler, IliasWebCrawlerSection
-
-TargetType = Union[str, int]
+from .shibboleth_login import ShibbolethLogin
 
 _ILIAS_URL = "https://ilias.studium.kit.edu"
 
 
-class KitShibbolethBackgroundLoginSuccessful():
+class KitShibbolethBackgroundLoginSuccessful:
     pass
 
 
@@ -25,19 +16,8 @@ class KitIliasWebCrawlerSection(IliasWebCrawlerSection):
     def base_url(self) -> str:
         return _ILIAS_URL
 
-    def client_id(self) -> str:
-        # KIT ILIAS uses the Shibboleth service for authentication. There's no
-        # use for a client id.
-        return "unused"
-
-    def tfa_auth(self, authenticators: Dict[str, Authenticator]) -> Optional[Authenticator]:
-        value: Optional[str] = self.s.get("tfa_auth")
-        if value is None:
-            return None
-        auth = authenticators.get(value)
-        if auth is None:
-            self.invalid_value("tfa_auth", value, "No such auth section exists")
-        return auth
+    def login(self) -> Literal["shibboleth"]:
+        return "shibboleth"
 
 
 class KitIliasWebCrawler(IliasWebCrawler):
@@ -46,184 +26,12 @@ class KitIliasWebCrawler(IliasWebCrawler):
         name: str,
         section: KitIliasWebCrawlerSection,
         config: Config,
-        authenticators: Dict[str, Authenticator]
+        authenticators: Dict[str, Authenticator],
     ):
         super().__init__(name, section, config, authenticators)
 
-        self._shibboleth_login = KitShibbolethLogin(
+        self._shibboleth_login = ShibbolethLogin(
+            _ILIAS_URL,
             self._auth,
             section.tfa_auth(authenticators),
         )
-
-    # We repeat this as the login method in shibboleth doesn't handle I/O errors.
-    # Shibboleth is quite reliable as well, the repeat is likely not critical here.
-    @_iorepeat(3, "Login", failure_is_error=True)
-    async def _authenticate(self) -> None:
-        await self._shibboleth_login.login(self.session)
-
-
-class KitShibbolethLogin:
-    """
-    Login via KIT's shibboleth system.
-    """
-
-    def __init__(self, authenticator: Authenticator, tfa_authenticator: Optional[Authenticator]) -> None:
-        self._auth = authenticator
-        self._tfa_auth = tfa_authenticator
-
-    async def login(self, sess: aiohttp.ClientSession) -> None:
-        """
-        Performs the ILIAS Shibboleth authentication dance and saves the login
-        cookies it receieves.
-
-        This function should only be called whenever it is detected that you're
-        not logged in. The cookies obtained should be good for a few minutes,
-        maybe even an hour or two.
-        """
-
-        # Equivalent: Click on "Mit KIT-Account anmelden" button in
-        # https://ilias.studium.kit.edu/login.php
-        url = f"{_ILIAS_URL}/shib_login.php"
-        data = {
-            "sendLogin": "1",
-            "idp_selection": "https://idp.scc.kit.edu/idp/shibboleth",
-            "il_target": "",
-            "home_organization_selection": "Weiter",
-        }
-        soup: Union[BeautifulSoup, KitShibbolethBackgroundLoginSuccessful] = await _shib_post(sess, url, data)
-
-        if isinstance(soup, KitShibbolethBackgroundLoginSuccessful):
-            return
-
-        # Attempt to login using credentials, if necessary
-        while not self._login_successful(soup):
-            # Searching the form here so that this fails before asking for
-            # credentials rather than after asking.
-            form = soup.find("form", {"class": "full content", "method": "post"})
-            action = form["action"]
-
-            csrf_token = form.find("input", {"name": "csrf_token"})["value"]
-
-            # Equivalent: Enter credentials in
-            # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
-            url = "https://idp.scc.kit.edu" + action
-            username, password = await self._auth.credentials()
-            data = {
-                "_eventId_proceed": "",
-                "j_username": username,
-                "j_password": password,
-                "csrf_token": csrf_token
-            }
-            soup = await _post(sess, url, data)
-
-            if soup.find(id="attributeRelease"):
-                raise CrawlError(
-                    "ILIAS Shibboleth entitlements changed! "
-                    "Please log in once in your browser and review them"
-                )
-
-            if self._tfa_required(soup):
-                soup = await self._authenticate_tfa(sess, soup)
-
-            if not self._login_successful(soup):
-                self._auth.invalidate_credentials()
-
-        # Equivalent: Being redirected via JS automatically
-        # (or clicking "Continue" if you have JS disabled)
-        relay_state = soup.find("input", {"name": "RelayState"})
-        saml_response = soup.find("input", {"name": "SAMLResponse"})
-        url = f"{_ILIAS_URL}/Shibboleth.sso/SAML2/POST"
-        data = {  # using the info obtained in the while loop above
-            "RelayState": relay_state["value"],
-            "SAMLResponse": saml_response["value"],
-        }
-        await sess.post(url, data=data)
-
-    async def _authenticate_tfa(
-        self,
-        session: aiohttp.ClientSession,
-        soup: BeautifulSoup
-    ) -> BeautifulSoup:
-        if not self._tfa_auth:
-            self._tfa_auth = TfaAuthenticator("ilias-anon-tfa")
-
-        tfa_token = await self._tfa_auth.password()
-
-        # Searching the form here so that this fails before asking for
-        # credentials rather than after asking.
-        form = soup.find("form", {"method": "post"})
-        action = form["action"]
-        csrf_token = form.find("input", {"name": "csrf_token"})["value"]
-
-        # Equivalent: Enter token in
-        # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
-        url = "https://idp.scc.kit.edu" + action
-        data = {
-            "_eventId_proceed": "",
-            "j_tokenNumber": tfa_token,
-            "csrf_token": csrf_token
-        }
-        return await _post(session, url, data)
-
-    @staticmethod
-    def _login_successful(soup: BeautifulSoup) -> bool:
-        relay_state = soup.find("input", {"name": "RelayState"})
-        saml_response = soup.find("input", {"name": "SAMLResponse"})
-        return relay_state is not None and saml_response is not None
-
-    @staticmethod
-    def _tfa_required(soup: BeautifulSoup) -> bool:
-        return soup.find(id="j_tokenNumber") is not None
-
-
-async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup:
-    async with session.post(url, data=data) as response:
-        return soupify(await response.read())
-
-
-async def _shib_post(
-    session: aiohttp.ClientSession,
-    url: str,
-    data: Any
-) -> Union[BeautifulSoup, KitShibbolethBackgroundLoginSuccessful]:
-    """
-    aiohttp unescapes '/' and ':' in URL query parameters which is not RFC compliant and rejected
-    by Shibboleth. Thanks a lot. So now we unroll the requests manually, parse location headers and
-    build encoded URL objects ourselves... Who thought mangling location header was a good idea??
-    """
-    log.explain_topic("Shib login POST")
-    async with session.post(url, data=data, allow_redirects=False) as response:
-        location = response.headers.get("location")
-        log.explain(f"Got location {location!r}")
-        if not location:
-            raise CrawlWarning(f"Login failed (1), no location header present at {url}")
-        correct_url = yarl.URL(location, encoded=True)
-        log.explain(f"Corrected location to {correct_url!r}")
-
-        if str(correct_url).startswith(_ILIAS_URL):
-            log.explain("ILIAS recognized our shib token and logged us in in the background, returning")
-            return KitShibbolethBackgroundLoginSuccessful()
-
-        async with session.get(correct_url, allow_redirects=False) as response:
-            location = response.headers.get("location")
-            log.explain(f"Redirected to {location!r} with status {response.status}")
-            # If shib still has a valid session, it will directly respond to the request
-            if location is None:
-                log.explain("Shib recognized us, returning its response directly")
-                return soupify(await response.read())
-
-            as_yarl = yarl.URL(response.url)
-            # Probably not needed anymore, but might catch a few weird situations with a nicer message
-            if not location or not as_yarl.host:
-                raise CrawlWarning(f"Login failed (2), no location header present at {correct_url}")
-
-            correct_url = yarl.URL.build(
-                scheme=as_yarl.scheme,
-                host=as_yarl.host,
-                path=location,
-                encoded=True
-            )
-            log.explain(f"Corrected location to {correct_url!r}")
-
-            async with session.get(correct_url, allow_redirects=False) as response:
-                return soupify(await response.read())
diff --git a/PFERD/crawl/ilias/shibboleth_login.py b/PFERD/crawl/ilias/shibboleth_login.py
new file mode 100644
index 0000000..d57820e
--- /dev/null
+++ b/PFERD/crawl/ilias/shibboleth_login.py
@@ -0,0 +1,128 @@
+from typing import Any, Optional
+
+import aiohttp
+import yarl
+from bs4 import BeautifulSoup
+
+from ...auth import Authenticator, TfaAuthenticator
+from ...logging import log
+from ...utils import soupify
+from ..crawler import CrawlError
+
+
+class ShibbolethLogin:
+    """
+    Login via shibboleth system.
+    """
+
+    def __init__(
+        self, ilias_url: str, authenticator: Authenticator, tfa_authenticator: Optional[Authenticator]
+    ) -> None:
+        self._ilias_url = ilias_url
+        self._auth = authenticator
+        self._tfa_auth = tfa_authenticator
+
+    async def login(self, sess: aiohttp.ClientSession) -> None:
+        """
+        Performs the ILIAS Shibboleth authentication dance and saves the login
+        cookies it receieves.
+
+        This function should only be called whenever it is detected that you're
+        not logged in. The cookies obtained should be good for a few minutes,
+        maybe even an hour or two.
+        """
+
+        # Equivalent: Click on "Mit KIT-Account anmelden" button in
+        # https://ilias.studium.kit.edu/login.php
+        url = f"{self._ilias_url}/shib_login.php"
+        async with sess.get(url) as response:
+            shib_url = response.url
+            if str(shib_url).startswith(self._ilias_url):
+                log.explain(
+                    "ILIAS recognized our shib token and logged us in in the background, returning"
+                )
+                return
+            soup: BeautifulSoup = soupify(await response.read())
+
+        # Attempt to login using credentials, if necessary
+        while not self._login_successful(soup):
+            # Searching the form here so that this fails before asking for
+            # credentials rather than after asking.
+            form = soup.find("form", {"method": "post"})
+            action = form["action"]
+
+            # Equivalent: Enter credentials in
+            # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
+            url = str(shib_url.origin()) + action
+            username, password = await self._auth.credentials()
+            data = {
+                "_eventId_proceed": "",
+                "j_username": username,
+                "j_password": password,
+            }
+            if csrf_token_input := form.find("input", {"name": "csrf_token"}):
+                data["csrf_token"] = csrf_token_input["value"]
+            soup = await _post(sess, url, data)
+
+            if soup.find(id="attributeRelease"):
+                raise CrawlError(
+                    "ILIAS Shibboleth entitlements changed! "
+                    "Please log in once in your browser and review them"
+                )
+
+            if self._tfa_required(soup):
+                soup = await self._authenticate_tfa(sess, soup, shib_url)
+
+            if not self._login_successful(soup):
+                self._auth.invalidate_credentials()
+
+        # Equivalent: Being redirected via JS automatically
+        # (or clicking "Continue" if you have JS disabled)
+        relay_state = soup.find("input", {"name": "RelayState"})
+        saml_response = soup.find("input", {"name": "SAMLResponse"})
+        url = form = soup.find("form", {"method": "post"})["action"]
+        data = {  # using the info obtained in the while loop above
+            "RelayState": relay_state["value"],
+            "SAMLResponse": saml_response["value"],
+        }
+        await sess.post(url, data=data)
+
+    async def _authenticate_tfa(
+        self, session: aiohttp.ClientSession, soup: BeautifulSoup, shib_url: yarl.URL
+    ) -> BeautifulSoup:
+        if not self._tfa_auth:
+            self._tfa_auth = TfaAuthenticator("ilias-anon-tfa")
+
+        tfa_token = await self._tfa_auth.password()
+
+        # Searching the form here so that this fails before asking for
+        # credentials rather than after asking.
+        form = soup.find("form", {"method": "post"})
+        action = form["action"]
+
+        # Equivalent: Enter token in
+        # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
+        url = str(shib_url.origin()) + action
+        username, password = await self._auth.credentials()
+        data = {
+            "_eventId_proceed": "",
+            "j_tokenNumber": tfa_token,
+        }
+        if csrf_token_input := form.find("input", {"name": "csrf_token"}):
+            data["csrf_token"] = csrf_token_input["value"]
+        return await _post(session, url, data)
+
+    @staticmethod
+    def _login_successful(soup: BeautifulSoup) -> bool:
+        relay_state = soup.find("input", {"name": "RelayState"})
+        saml_response = soup.find("input", {"name": "SAMLResponse"})
+        return relay_state is not None and saml_response is not None
+
+    @staticmethod
+    def _tfa_required(soup: BeautifulSoup) -> bool:
+        return soup.find(id="j_tokenNumber") is not None
+
+
+async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup:
+    async with session.post(url, data=data) as response:
+        return soupify(await response.read())

From 6dda4c55a8bdd0afba9126f39e7402df7dc59479 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 5 Nov 2024 18:36:21 +0100
Subject: [PATCH 479/524] Add doctype header to forum threads

This should fix mimetype detection on most systems and is more relevant
now that the report is clickable
---
 CHANGELOG.md                           | 1 +
 PFERD/crawl/ilias/ilias_web_crawler.py | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8024bba..5206b20 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -38,6 +38,7 @@ ambiguous situations.
 - Crawling of nested courses
 - Downloading of links with no target URL
 - Handle row flex on description pages
+- Add `<!DOCTYPE html>` heading to forum threads to fix mime type detection
 
 ## 3.6.0 - 2024-10-23
 
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index a6c68f1..2fc399d 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -750,7 +750,8 @@ instance's greatest bottleneck.
             return
 
         async with maybe_dl as (bar, sink):
-            content = element.title_tag.prettify()
+            content = "<!DOCTYPE html>\n"
+            content += element.title_tag.prettify()
             content += element.content_tag.prettify()
             sink.file.write(content.encode("utf-8"))
             sink.done()

From 712217e95962a383ee95c58fd85c61980ef1fc14 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 11 Nov 2024 12:52:55 +0100
Subject: [PATCH 480/524] Handle groups in cards

---
 CHANGELOG.md                        | 1 +
 PFERD/crawl/ilias/kit_ilias_html.py | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5206b20..095442d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -39,6 +39,7 @@ ambiguous situations.
 - Downloading of links with no target URL
 - Handle row flex on description pages
 - Add `<!DOCTYPE html>` heading to forum threads to fix mime type detection
+- Handle groups in cards
 
 ## 3.6.0 - 2024-10-23
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index e0c87ad..57c81e5 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -998,6 +998,8 @@ class IliasPage:
             return IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED
         if "exc" in icon["class"]:
             return IliasElementType.EXERCISE
+        if "grp" in icon["class"]:
+            return IliasElementType.FOLDER
         if "webr" in icon["class"]:
             return IliasElementType.LINK
         if "book" in icon["class"]:

From 287173b0b114f708cb34db4f3fef247962fccc3d Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 13 Nov 2024 20:38:27 +0100
Subject: [PATCH 481/524] Bump version to 3.7.0

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 095442d..e18f88a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.7.0 - 2024-11-13
+
 ### Added
 - Support for MOB videos in page descriptions
 - Clickable links in the report to directly open new/modified/not-deleted files
diff --git a/PFERD/version.py b/PFERD/version.py
index 0bf695b..21118d3 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.6.0"
+VERSION = "3.7.0"

From 678283d341294d3fefe69242d8f8b87d58a2b5c0 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 14 Nov 2024 20:06:13 +0100
Subject: [PATCH 482/524] Use Python facilities to convert paths to file://
 urls

---
 CHANGELOG.md   | 3 +++
 PFERD/pferd.py | 3 +--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e18f88a..bbd2dd6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+## Fixed
+- File links in report on Windows
+
 ## 3.7.0 - 2024-11-13
 
 ### Added
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index 850e68e..ca2e5b7 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -1,6 +1,5 @@
 from pathlib import Path, PurePath
 from typing import Dict, List, Optional
-from urllib.parse import quote
 
 from rich.markup import escape
 
@@ -171,7 +170,7 @@ class Pferd:
 
             def fmt_path_link(relative_path: PurePath) -> str:
                 # We need to URL-encode the path because it might contain spaces or special characters
-                link = f"file://{quote(str(crawler.output_dir.resolve(relative_path).absolute()))}"
+                link = crawler.output_dir.resolve(relative_path).absolute().as_uri()
                 return f"[link={link}]{fmt_path(relative_path)}[/link]"
 
             something_changed = False

From 16a2dd5b15561f91134bc2a3b31a92483921e021 Mon Sep 17 00:00:00 2001
From: Aurelia <mail@libaurea.de>
Date: Wed, 12 Feb 2025 21:48:05 +0100
Subject: [PATCH 483/524] fix: totp

---
 CHANGELOG.md                          | 1 +
 PFERD/crawl/ilias/shibboleth_login.py | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bbd2dd6..2ff98bc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ ambiguous situations.
 
 ## Fixed
 - File links in report on Windows
+- TOTP authentication in KIT Shibboleth
 
 ## 3.7.0 - 2024-11-13
 
diff --git a/PFERD/crawl/ilias/shibboleth_login.py b/PFERD/crawl/ilias/shibboleth_login.py
index d57820e..ab59f25 100644
--- a/PFERD/crawl/ilias/shibboleth_login.py
+++ b/PFERD/crawl/ilias/shibboleth_login.py
@@ -59,6 +59,7 @@ class ShibbolethLogin:
                 "_eventId_proceed": "",
                 "j_username": username,
                 "j_password": password,
+                "fudis_web_authn_assertion_input": "",
             }
             if csrf_token_input := form.find("input", {"name": "csrf_token"}):
                 data["csrf_token"] = csrf_token_input["value"]
@@ -106,7 +107,7 @@ class ShibbolethLogin:
         username, password = await self._auth.credentials()
         data = {
             "_eventId_proceed": "",
-            "j_tokenNumber": tfa_token,
+            "fudis_otp_input": tfa_token,
         }
         if csrf_token_input := form.find("input", {"name": "csrf_token"}):
             data["csrf_token"] = csrf_token_input["value"]
@@ -120,7 +121,7 @@ class ShibbolethLogin:
 
     @staticmethod
     def _tfa_required(soup: BeautifulSoup) -> bool:
-        return soup.find(id="j_tokenNumber") is not None
+        return soup.find(id="fudiscr-form") is not None
 
 
 async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup:

From bd9d7efe646b63f607dc1c2b5c23c6e9b5bd0466 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 12 Feb 2025 22:41:43 +0100
Subject: [PATCH 484/524] "Fix" mypy errors

Thank you mypy, very cool. These types make things *so much better*.
They don't just complicate everything and don't really help because they
can not detect that an element queried by a tag is no navigable
string...
---
 PFERD/auth/keyring.py                   |   4 +-
 PFERD/crawl/http_crawler.py             |   7 +-
 PFERD/crawl/ilias/file_templates.py     |  10 +-
 PFERD/crawl/ilias/ilias_html_cleaner.py |  14 +-
 PFERD/crawl/ilias/ilias_web_crawler.py  |  34 +--
 PFERD/crawl/ilias/kit_ilias_html.py     | 323 ++++++++++++------------
 PFERD/crawl/ilias/shibboleth_login.py   |  28 +-
 PFERD/crawl/kit_ipd_crawler.py          |   8 +-
 8 files changed, 224 insertions(+), 204 deletions(-)

diff --git a/PFERD/auth/keyring.py b/PFERD/auth/keyring.py
index c14f6fb..02a9269 100644
--- a/PFERD/auth/keyring.py
+++ b/PFERD/auth/keyring.py
@@ -1,4 +1,4 @@
-from typing import Optional, Tuple
+from typing import Optional, Tuple, cast
 
 import keyring
 
@@ -13,7 +13,7 @@ class KeyringAuthSection(AuthSection):
         return self.s.get("username")
 
     def keyring_name(self) -> str:
-        return self.s.get("keyring_name", fallback=NAME)
+        return cast(str, self.s.get("keyring_name", fallback=NAME))
 
 
 class KeyringAuthenticator(Authenticator):
diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index 2cc97e1..1c4631c 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -3,7 +3,7 @@ import http.cookies
 import ssl
 from datetime import datetime
 from pathlib import Path, PurePath
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import aiohttp
 import certifi
@@ -187,12 +187,12 @@ class HttpCrawler(Crawler):
             if level == 0 or (level == 1 and drop_h1):
                 return PurePath()
 
-            level_heading = tag.find_previous(name=f"h{level}")
+            level_heading = cast(Optional[Tag], tag.find_previous(name=f"h{level}"))
 
             if level_heading is None:
                 return find_associated_headings(tag, level - 1)
 
-            folder_name = level_heading.getText().strip()
+            folder_name = level_heading.get_text().strip()
             return find_associated_headings(level_heading, level - 1) / folder_name
 
         # start at level <h3> because paragraph-level headings are usually too granular for folder names
@@ -231,6 +231,7 @@ class HttpCrawler(Crawler):
 
                 etag_header = resp.headers.get("ETag")
                 last_modified_header = resp.headers.get("Last-Modified")
+                last_modified = None
 
                 if last_modified_header:
                     try:
diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index b206461..0a72199 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -1,5 +1,5 @@
 from enum import Enum
-from typing import Optional
+from typing import Optional, cast
 
 import bs4
 
@@ -139,13 +139,13 @@ def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next
         </div>
     """
     if prev and body.select_one(".ilc_page_lnav_LeftNavigation"):
-        text = body.select_one(".ilc_page_lnav_LeftNavigation").getText().strip()
+        text = cast(bs4.Tag, body.select_one(".ilc_page_lnav_LeftNavigation")).get_text().strip()
         left = f'<a href="{prev}">{text}</a>'
     else:
         left = "<span></span>"
 
     if next and body.select_one(".ilc_page_rnav_RightNavigation"):
-        text = body.select_one(".ilc_page_rnav_RightNavigation").getText().strip()
+        text = cast(bs4.Tag, body.select_one(".ilc_page_rnav_RightNavigation")).get_text().strip()
         right = f'<a href="{next}">{text}</a>'
     else:
         right = "<span></span>"
@@ -160,8 +160,8 @@ def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next
             "{{left}}", left).replace("{{right}}", right).encode())
         )
 
-    body = body.prettify()
-    return _learning_module_template.replace("{{body}}", body).replace("{{name}}", name)
+    body_str = cast(str, body.prettify())
+    return _learning_module_template.replace("{{body}}", body_str).replace("{{name}}", name)
 
 
 class Links(Enum):
diff --git a/PFERD/crawl/ilias/ilias_html_cleaner.py b/PFERD/crawl/ilias/ilias_html_cleaner.py
index e82906f..fb35bc0 100644
--- a/PFERD/crawl/ilias/ilias_html_cleaner.py
+++ b/PFERD/crawl/ilias/ilias_html_cleaner.py
@@ -1,3 +1,5 @@
+from typing import cast
+
 from bs4 import BeautifulSoup, Comment, Tag
 
 _STYLE_TAG_CONTENT = """
@@ -70,18 +72,18 @@ def insert_base_markup(soup: BeautifulSoup) -> BeautifulSoup:
 
 
 def clean(soup: BeautifulSoup) -> BeautifulSoup:
-    for block in soup.find_all(class_=lambda x: x in _ARTICLE_WORTHY_CLASSES):
+    for block in cast(list[Tag], soup.find_all(class_=lambda x: x in _ARTICLE_WORTHY_CLASSES)):
         block.name = "article"
 
-    for block in soup.find_all("h3"):
+    for block in cast(list[Tag], soup.find_all("h3")):
         block.name = "div"
 
-    for block in soup.find_all("h1"):
+    for block in cast(list[Tag], soup.find_all("h1")):
         block.name = "h3"
 
-    for block in soup.find_all(class_="ilc_va_ihcap_VAccordIHeadCap"):
+    for block in cast(list[Tag], soup.find_all(class_="ilc_va_ihcap_VAccordIHeadCap")):
         block.name = "h3"
-        block["class"] += ["accordion-head"]
+        block["class"] += ["accordion-head"]  # type: ignore
 
     for dummy in soup.select(".ilc_text_block_Standard.ilc_Paragraph"):
         children = list(dummy.children)
@@ -97,7 +99,7 @@ def clean(soup: BeautifulSoup) -> BeautifulSoup:
         if figure := video.find_parent("figure"):
             figure.decompose()
 
-    for hrule_imposter in soup.find_all(class_="ilc_section_Separator"):
+    for hrule_imposter in cast(list[Tag], soup.find_all(class_="ilc_section_Separator")):
         hrule_imposter.insert(0, soup.new_tag("hr"))
 
     return soup
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 2fc399d..557150c 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -257,6 +257,7 @@ instance's greatest bottleneck.
             async with cl:
                 next_stage_url: Optional[str] = url
                 current_parent = current_element
+                page = None
 
                 while next_stage_url:
                     soup = await self._get_page(next_stage_url)
@@ -278,6 +279,7 @@ instance's greatest bottleneck.
                     else:
                         next_stage_url = None
 
+                page = cast(IliasPage, page)
                 elements.extend(page.get_child_elements())
                 if description_string := page.get_description():
                     description.append(description_string)
@@ -461,10 +463,10 @@ instance's greatest bottleneck.
         if not dl:
             return
 
-        async with dl as (bar, sink):
+        async with dl as (_bar, sink):
             description = clean(insert_base_markup(description))
-            description = await self.internalize_images(description)
-            sink.file.write(description.prettify().encode("utf-8"))
+            description_tag = await self.internalize_images(description)
+            sink.file.write(cast(str, description_tag.prettify()).encode("utf-8"))
             sink.done()
 
     @anoncritical
@@ -483,7 +485,7 @@ instance's greatest bottleneck.
             async with self.session.get(export_url, allow_redirects=False) as resp:
                 # No redirect means we were authenticated
                 if hdrs.LOCATION not in resp.headers:
-                    return soupify(await resp.read()).select_one("a").get("href").strip()
+                    return soupify(await resp.read()).select_one("a").get("href").strip()  # type: ignore
                 # We are either unauthenticated or the link is not active
                 new_url = resp.headers[hdrs.LOCATION].lower()
                 if "baseclass=illinkresourcehandlergui" in new_url and "cmd=infoscreen" in new_url:
@@ -707,6 +709,8 @@ instance's greatest bottleneck.
 
         async with cl:
             next_stage_url = element.url
+            page = None
+
             while next_stage_url:
                 log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
                 log.explain(f"URL: {next_stage_url}")
@@ -719,7 +723,7 @@ instance's greatest bottleneck.
                 else:
                     break
 
-            download_data = page.get_download_forum_data()
+            download_data = cast(IliasPage, page).get_download_forum_data()
             if not download_data:
                 raise CrawlWarning("Failed to extract forum data")
             if download_data.empty:
@@ -751,8 +755,8 @@ instance's greatest bottleneck.
 
         async with maybe_dl as (bar, sink):
             content = "<!DOCTYPE html>\n"
-            content += element.title_tag.prettify()
-            content += element.content_tag.prettify()
+            content += cast(str, element.title_tag.prettify())
+            content += cast(str, element.content_tag.prettify())
             sink.file.write(content.encode("utf-8"))
             sink.done()
 
@@ -877,15 +881,15 @@ instance's greatest bottleneck.
                 continue
             if elem.name == "img":
                 if src := elem.attrs.get("src", None):
-                    url = urljoin(self._base_url, src)
+                    url = urljoin(self._base_url, cast(str, src))
                     if not url.startswith(self._base_url):
                         continue
                     log.explain(f"Internalizing {url!r}")
                     img = await self._get_authenticated(url)
                     elem.attrs["src"] = "data:;base64," + base64.b64encode(img).decode()
-            if elem.name == "iframe" and elem.attrs.get("src", "").startswith("//"):
+            if elem.name == "iframe" and cast(str, elem.attrs.get("src", "")).startswith("//"):
                 # For unknown reasons the protocol seems to be stripped.
-                elem.attrs["src"] = "https:" + elem.attrs["src"]
+                elem.attrs["src"] = "https:" + cast(str, elem.attrs["src"])
         return tag
 
     def _ensure_not_seen(self, element: IliasPageElement, parent_path: PurePath) -> None:
@@ -979,11 +983,11 @@ instance's greatest bottleneck.
             async with self.session.get(urljoin(self._base_url, "/login.php"), params=params) as request:
                 login_page = soupify(await request.read())
 
-            login_form = login_page.find("form", attrs={"name": "formlogin"})
+            login_form = cast(Optional[Tag], login_page.find("form", attrs={"name": "formlogin"}))
             if login_form is None:
                 raise CrawlError("Could not find the login form! Specified client id might be invalid.")
 
-            login_url = login_form.attrs.get("action")
+            login_url = cast(Optional[str], login_form.attrs.get("action"))
             if login_url is None:
                 raise CrawlError("Could not find the action URL in the login form!")
 
@@ -1004,14 +1008,14 @@ instance's greatest bottleneck.
     @staticmethod
     def _is_logged_in(soup: BeautifulSoup) -> bool:
         # Normal ILIAS pages
-        mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")
+        mainbar = cast(Optional[Tag], soup.find(class_="il-maincontrols-metabar"))
         if mainbar is not None:
-            login_button = mainbar.find(attrs={"href": lambda x: x and "login.php" in x})
+            login_button = mainbar.find(attrs={"href": lambda x: x is not None and "login.php" in x})
             shib_login = soup.find(id="button_shib_login")
             return not login_button and not shib_login
 
         # Personal Desktop
-        if soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x}):
+        if soup.find("a", attrs={"href": lambda x: x is not None and "block_type=pditems" in x}):
             return True
 
         # Video listing embeds do not have complete ILIAS html. Try to match them by
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 57c81e5..ee61cab 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -3,7 +3,7 @@ import re
 from dataclasses import dataclass
 from datetime import date, datetime, timedelta
 from enum import Enum
-from typing import Dict, List, Optional, Union, cast
+from typing import Dict, Optional, Union, cast
 from urllib.parse import urljoin, urlparse
 
 from bs4 import BeautifulSoup, Tag
@@ -117,7 +117,7 @@ class IliasPageElement:
 @dataclass
 class IliasDownloadForumData:
     url: str
-    form_data: Dict[str, Union[str, List[str]]]
+    form_data: Dict[str, Union[str, list[str]]]
     empty: bool
 
 
@@ -151,7 +151,7 @@ class IliasPage:
             return "goto.php?target=root_" in permalink
         return False
 
-    def get_child_elements(self) -> List[IliasPageElement]:
+    def get_child_elements(self) -> list[IliasPageElement]:
         """
         Return all child page elements you can find here.
         """
@@ -177,10 +177,10 @@ class IliasPage:
         return self._find_normal_entries()
 
     def get_info_tab(self) -> Optional[IliasPageElement]:
-        tab: Optional[Tag] = self._soup.find(
+        tab: Optional[Tag] = cast(Optional[Tag], self._soup.find(
             name="a",
-            attrs={"href": lambda x: x and "cmdClass=ilinfoscreengui" in x}
-        )
+            attrs={"href": lambda x: x is not None and "cmdClass=ilinfoscreengui" in x}
+        ))
         if tab is not None:
             return IliasPageElement.create_new(
                 IliasElementType.INFO_TAB,
@@ -193,7 +193,7 @@ class IliasPage:
         def is_interesting_class(name: str) -> bool:
             return name in ["ilCOPageSection", "ilc_Paragraph", "ilc_va_ihcap_VAccordIHeadCap"]
 
-        paragraphs: List[Tag] = self._soup.findAll(class_=is_interesting_class)
+        paragraphs: list[Tag] = cast(list[Tag], self._soup.find_all(class_=is_interesting_class))
         if not paragraphs:
             return None
 
@@ -217,8 +217,8 @@ class IliasPage:
     def get_learning_module_data(self) -> Optional[IliasLearningModulePage]:
         if not self._is_learning_module_page():
             return None
-        content = self._soup.select_one("#ilLMPageContent")
-        title = self._soup.select_one(".ilc_page_title_PageTitle").getText().strip()
+        content = cast(Tag, self._soup.select_one("#ilLMPageContent"))
+        title = cast(Tag, self._soup.select_one(".ilc_page_title_PageTitle")).get_text().strip()
         return IliasLearningModulePage(
             title=title,
             content=content,
@@ -243,15 +243,18 @@ class IliasPage:
         return None
 
     def get_download_forum_data(self) -> Optional[IliasDownloadForumData]:
-        form = self._soup.find("form", attrs={"action": lambda x: x and "fallbackCmd=showThreads" in x})
+        form = cast(Optional[Tag], self._soup.find(
+            "form",
+            attrs={"action": lambda x: x is not None and "fallbackCmd=showThreads" in x}
+        ))
         if not form:
             return None
-        post_url = self._abs_url_from_relative(form["action"])
+        post_url = self._abs_url_from_relative(cast(str, form["action"]))
 
-        thread_ids = [f["value"] for f in form.find_all(attrs={"name": "thread_ids[]"})]
+        thread_ids = [f["value"] for f in cast(list[Tag], form.find_all(attrs={"name": "thread_ids[]"}))]
 
-        form_data: Dict[str, Union[str, List[str]]] = {
-            "thread_ids[]": thread_ids,
+        form_data: Dict[str, Union[str, list[str]]] = {
+            "thread_ids[]": cast(list[str], thread_ids),
             "selected_cmd2": "html",
             "select_cmd2": "Ausführen",
             "selected_cmd": "",
@@ -285,7 +288,7 @@ class IliasPage:
     def _is_forum_page(self) -> bool:
         read_more_btn = self._soup.find(
             "button",
-            attrs={"onclick": lambda x: x and "cmdClass=ilobjforumgui&cmd=markAllRead" in x}
+            attrs={"onclick": lambda x: x is not None and "cmdClass=ilobjforumgui&cmd=markAllRead" in x}
         )
         return read_more_btn is not None
 
@@ -297,7 +300,7 @@ class IliasPage:
             return True
 
         # Raw listing without ILIAS fluff
-        video_element_table: Tag = self._soup.find(
+        video_element_table = self._soup.find(
             name="table", id=re.compile(r"tbl_xoct_.+")
         )
         return video_element_table is not None
@@ -305,8 +308,8 @@ class IliasPage:
     def _is_ilias_opencast_embedding(self) -> bool:
         # ILIAS fluff around the real opencast html
         if self._soup.find(id="headerimage"):
-            element: Tag = self._soup.find(id="headerimage")
-            if "opencast" in element.attrs["src"].lower():
+            element: Tag = cast(Tag, self._soup.find(id="headerimage"))
+            if "opencast" in cast(str, element.attrs["src"]).lower():
                 return True
         return False
 
@@ -317,8 +320,8 @@ class IliasPage:
 
         # We have no suitable parent - let's guesss
         if self._soup.find(id="headerimage"):
-            element: Tag = self._soup.find(id="headerimage")
-            if "exc" in element.attrs["src"].lower():
+            element: Tag = cast(Tag, self._soup.find(id="headerimage"))
+            if "exc" in cast(str, element.attrs["src"]).lower():
                 return True
 
         return False
@@ -340,10 +343,10 @@ class IliasPage:
         return self._uncollapse_future_meetings_url() is not None
 
     def _uncollapse_future_meetings_url(self) -> Optional[IliasPageElement]:
-        element = self._soup.find(
+        element = cast(Optional[Tag], self._soup.find(
             "a",
-            attrs={"href": lambda x: x and ("crs_next_sess=1" in x or "crs_prev_sess=1" in x)}
-        )
+            attrs={"href": lambda x: x is not None and ("crs_next_sess=1" in x or "crs_prev_sess=1" in x)}
+        ))
         if not element:
             return None
         link = self._abs_url_from_link(element)
@@ -360,24 +363,24 @@ class IliasPage:
         return "baseClass=ilmembershipoverviewgui" in self._page_url
 
     def _select_content_page_url(self) -> Optional[IliasPageElement]:
-        tab = self._soup.find(
+        tab = cast(Optional[Tag], self._soup.find(
             id="tab_view_content",
             attrs={"class": lambda x: x is not None and "active" not in x}
-        )
+        ))
         # Already selected (or not found)
         if not tab:
             return None
-        link = tab.find("a")
+        link = cast(Optional[Tag], tab.find("a"))
         if link:
-            link = self._abs_url_from_link(link)
-            return IliasPageElement.create_new(IliasElementType.FOLDER, link, "select content page")
+            link_str = self._abs_url_from_link(link)
+            return IliasPageElement.create_new(IliasElementType.FOLDER, link_str, "select content page")
 
         _unexpected_html_warning()
         log.warn_contd(f"Could not find content tab URL on {self._page_url!r}.")
         log.warn_contd("PFERD might not find content on the course's main page.")
         return None
 
-    def _player_to_video(self) -> List[IliasPageElement]:
+    def _player_to_video(self) -> list[IliasPageElement]:
         # Fetch the actual video page. This is a small wrapper page initializing a javscript
         # player. Sadly we can not execute that JS. The actual video stream url is nowhere
         # on the page, but defined in a JS object inside a script tag, passed to the player
@@ -414,10 +417,10 @@ class IliasPage:
         return items
 
     def _get_show_max_forum_entries_per_page_url(self) -> Optional[IliasPageElement]:
-        correct_link = self._soup.find(
+        correct_link = cast(Optional[Tag], self._soup.find(
             "a",
-            attrs={"href": lambda x: x and "trows=800" in x and "cmd=showThreads" in x}
-        )
+            attrs={"href": lambda x: x is not None and "trows=800" in x and "cmd=showThreads" in x}
+        ))
 
         if not correct_link:
             return None
@@ -426,15 +429,15 @@ class IliasPage:
 
         return IliasPageElement.create_new(IliasElementType.FORUM, link, "show all forum threads")
 
-    def _find_personal_desktop_entries(self) -> List[IliasPageElement]:
-        items: List[IliasPageElement] = []
+    def _find_personal_desktop_entries(self) -> list[IliasPageElement]:
+        items: list[IliasPageElement] = []
 
-        titles: List[Tag] = self._soup.select("#block_pditems_0 .il-item-title")
+        titles: list[Tag] = self._soup.select("#block_pditems_0 .il-item-title")
         for title in titles:
-            link = title.find("a")
+            link = cast(Optional[Tag], title.find("a"))
 
             if not link:
-                log.explain(f"Skipping offline item: {title.getText().strip()!r}")
+                log.explain(f"Skipping offline item: {title.get_text().strip()!r}")
                 continue
 
             name = _sanitize_path_name(link.text.strip())
@@ -460,13 +463,13 @@ class IliasPage:
 
         return items
 
-    def _find_copa_entries(self) -> List[IliasPageElement]:
-        items: List[IliasPageElement] = []
-        links: List[Tag] = self._soup.findAll(class_="ilc_flist_a_FileListItemLink")
+    def _find_copa_entries(self) -> list[IliasPageElement]:
+        items: list[IliasPageElement] = []
+        links: list[Tag] = cast(list[Tag], self._soup.find_all(class_="ilc_flist_a_FileListItemLink"))
 
         for link in links:
             url = self._abs_url_from_link(link)
-            name = re.sub(r"\([\d,.]+ [MK]B\)", "", link.getText()).strip().replace("\t", "")
+            name = re.sub(r"\([\d,.]+ [MK]B\)", "", link.get_text()).strip().replace("\t", "")
             name = _sanitize_path_name(name)
 
             if "file_id" not in url:
@@ -478,9 +481,9 @@ class IliasPage:
 
         return items
 
-    def _find_info_tab_entries(self) -> List[IliasPageElement]:
+    def _find_info_tab_entries(self) -> list[IliasPageElement]:
         items = []
-        links: List[Tag] = self._soup.select("a.il_ContainerItemCommand")
+        links: list[Tag] = self._soup.select("a.il_ContainerItemCommand")
 
         for link in links:
             if "cmdClass=ilobjcoursegui" not in link["href"]:
@@ -490,12 +493,12 @@ class IliasPage:
             items.append(IliasPageElement.create_new(
                 IliasElementType.FILE,
                 self._abs_url_from_link(link),
-                _sanitize_path_name(link.getText())
+                _sanitize_path_name(link.get_text())
             ))
 
         return items
 
-    def _find_opencast_video_entries(self) -> List[IliasPageElement]:
+    def _find_opencast_video_entries(self) -> list[IliasPageElement]:
         # ILIAS has three stages for video pages
         # 1. The initial dummy page without any videos. This page contains the link to the listing
         # 2. The video listing which might be paginated
@@ -503,14 +506,14 @@ class IliasPage:
         #
         # We need to figure out where we are.
 
-        video_element_table: Tag = self._soup.find(
+        video_element_table = cast(Optional[Tag], self._soup.find(
             name="table", id=re.compile(r"tbl_xoct_.+")
-        )
+        ))
 
         if video_element_table is None:
             # We are in stage 1
             # The page is actually emtpy but contains the link to stage 2
-            content_link: Tag = self._soup.select_one("#tab_series a")
+            content_link: Tag = cast(Tag, self._soup.select_one("#tab_series a"))
             url: str = self._abs_url_from_link(content_link)
             query_params = {"limit": "800", "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
             url = url_set_query_params(url, query_params)
@@ -527,14 +530,14 @@ class IliasPage:
 
         return self._find_opencast_video_entries_no_paging()
 
-    def _find_opencast_video_entries_paginated(self) -> List[IliasPageElement]:
-        table_element: Tag = self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
+    def _find_opencast_video_entries_paginated(self) -> list[IliasPageElement]:
+        table_element = cast(Optional[Tag], self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+")))
 
         if table_element is None:
             log.warn("Couldn't increase elements per page (table not found). I might miss elements.")
             return self._find_opencast_video_entries_no_paging()
 
-        id_match = re.match(r"tbl_xoct_(.+)", table_element.attrs["id"])
+        id_match = re.match(r"tbl_xoct_(.+)", cast(str, table_element.attrs["id"]))
         if id_match is None:
             log.warn("Couldn't increase elements per page (table id not found). I might miss elements.")
             return self._find_opencast_video_entries_no_paging()
@@ -548,16 +551,16 @@ class IliasPage:
         log.explain("Disabled pagination, retrying folder as a new entry")
         return [IliasPageElement.create_new(IliasElementType.OPENCAST_VIDEO_FOLDER, url, "")]
 
-    def _find_opencast_video_entries_no_paging(self) -> List[IliasPageElement]:
+    def _find_opencast_video_entries_no_paging(self) -> list[IliasPageElement]:
         """
         Crawls the "second stage" video page. This page contains the actual video urls.
         """
         # Video start links are marked with an "Abspielen" link
-        video_links: List[Tag] = self._soup.findAll(
+        video_links = cast(list[Tag], self._soup.find_all(
             name="a", text=re.compile(r"\s*(Abspielen|Play)\s*")
-        )
+        ))
 
-        results: List[IliasPageElement] = []
+        results: list[IliasPageElement] = []
 
         for link in video_links:
             results.append(self._listed_opencast_video_to_element(link))
@@ -569,12 +572,12 @@ class IliasPage:
         # 6th or 7th child (1 indexed) is the modification time string. Try to find it
         # by parsing backwards from the end and finding something that looks like a date
         modification_time = None
-        row: Tag = link.parent.parent.parent
+        row: Tag = link.parent.parent.parent  # type: ignore
         column_count = len(row.select("td.std"))
         for index in range(column_count, 0, -1):
-            modification_string = link.parent.parent.parent.select_one(
+            modification_string = link.parent.parent.parent.select_one(  # type: ignore
                 f"td.std:nth-child({index})"
-            ).getText().strip()
+            ).get_text().strip()
             if match := re.search(r"\d+\.\d+.\d+ \d+:\d+", modification_string):
                 modification_time = datetime.strptime(match.group(0), "%d.%m.%Y %H:%M")
                 break
@@ -583,7 +586,7 @@ class IliasPage:
             log.warn(f"Could not determine upload time for {link}")
             modification_time = datetime.now()
 
-        title = link.parent.parent.parent.select_one("td.std:nth-child(3)").getText().strip()
+        title = link.parent.parent.parent.select_one("td.std:nth-child(3)").get_text().strip()  # type: ignore
         title += ".mp4"
 
         video_name: str = _sanitize_path_name(title)
@@ -595,33 +598,34 @@ class IliasPage:
             IliasElementType.OPENCAST_VIDEO_PLAYER, video_url, video_name, modification_time
         )
 
-    def _find_exercise_entries(self) -> List[IliasPageElement]:
+    def _find_exercise_entries(self) -> list[IliasPageElement]:
         if self._soup.find(id="tab_submission"):
             log.explain("Found submission tab. This is an exercise detail page")
             return self._find_exercise_entries_detail_page()
         log.explain("Found no submission tab. This is an exercise root page")
         return self._find_exercise_entries_root_page()
 
-    def _find_exercise_entries_detail_page(self) -> List[IliasPageElement]:
-        results: List[IliasPageElement] = []
+    def _find_exercise_entries_detail_page(self) -> list[IliasPageElement]:
+        results: list[IliasPageElement] = []
 
         # Find all download links in the container (this will contain all the files)
-        download_links: List[Tag] = self._soup.findAll(
+        download_links = cast(list[Tag], self._soup.find_all(
             name="a",
             # download links contain the given command class
-            attrs={"href": lambda x: x and "cmd=download" in x},
+            attrs={"href": lambda x: x is not None and "cmd=download" in x},
             text="Download"
-        )
+        ))
 
         for link in download_links:
-            parent_row: Tag = link.findParent("tr")
-            children: List[Tag] = parent_row.findChildren("td")
+            parent_row: Tag = cast(Tag, link.find_parent("tr"))
+            children = cast(list[Tag], parent_row.find_all("td"))
 
-            name = _sanitize_path_name(children[1].getText().strip())
+            name = _sanitize_path_name(children[1].get_text().strip())
             log.explain(f"Found exercise detail entry {name!r}")
 
+            date = None
             for child in reversed(children):
-                date = demangle_date(child.getText().strip(), fail_silently=True)
+                date = demangle_date(child.get_text().strip(), fail_silently=True)
                 if date is not None:
                     break
             if date is None:
@@ -636,30 +640,33 @@ class IliasPage:
 
         return results
 
-    def _find_exercise_entries_root_page(self) -> List[IliasPageElement]:
-        results: List[IliasPageElement] = []
+    def _find_exercise_entries_root_page(self) -> list[IliasPageElement]:
+        results: list[IliasPageElement] = []
 
         # Each assignment is in an accordion container
-        assignment_containers: List[Tag] = self._soup.select(".il_VAccordionInnerContainer")
+        assignment_containers: list[Tag] = self._soup.select(".il_VAccordionInnerContainer")
 
         for container in assignment_containers:
             # Fetch the container name out of the header to use it in the path
-            container_name = container.select_one(".ilAssignmentHeader").getText().strip()
+            container_name = cast(Tag, container.select_one(".ilAssignmentHeader")).get_text().strip()
             log.explain(f"Found exercise container {container_name!r}")
 
             # Find all download links in the container (this will contain all the files)
-            files: List[Tag] = container.findAll(
+            files = cast(list[Tag], container.find_all(
                 name="a",
                 # download links contain the given command class
-                attrs={"href": lambda x: x and "cmdClass=ilexsubmissiongui" in x},
+                attrs={"href": lambda x: x is not None and "cmdClass=ilexsubmissiongui" in x},
                 text="Download"
-            )
+            ))
 
             # Grab each file as you now have the link
             for file_link in files:
                 # Two divs, side by side. Left is the name, right is the link ==> get left
                 # sibling
-                file_name = file_link.parent.findPrevious(name="div").getText().strip()
+                file_name = cast(
+                    Tag,
+                    cast(Tag, file_link.parent).find_previous(name="div")
+                ).get_text().strip()
                 url = self._abs_url_from_link(file_link)
 
                 log.explain(f"Found exercise entry {file_name!r}")
@@ -672,21 +679,21 @@ class IliasPage:
                 ))
 
             # Find all links to file listings (e.g. "Submitted Files" for groups)
-            file_listings: List[Tag] = container.findAll(
+            file_listings = cast(list[Tag], container.find_all(
                 name="a",
                 # download links contain the given command class
-                attrs={"href": lambda x: x and "cmdclass=ilexsubmissionfilegui" in x.lower()}
-            )
+                attrs={"href": lambda x: x is not None and "cmdclass=ilexsubmissionfilegui" in x.lower()}
+            ))
 
             # Add each listing as a new
             for listing in file_listings:
-                parent_container: Tag = listing.findParent(
-                    "div", attrs={"class": lambda x: x and "form-group" in x}
-                )
-                label_container: Tag = parent_container.find(
-                    attrs={"class": lambda x: x and "control-label" in x}
-                )
-                file_name = label_container.getText().strip()
+                parent_container = cast(Tag, listing.find_parent(
+                    "div", attrs={"class": lambda x: x is not None and "form-group" in x}
+                ))
+                label_container = cast(Tag, parent_container.find(
+                    attrs={"class": lambda x: x is not None and "control-label" in x}
+                ))
+                file_name = label_container.get_text().strip()
                 url = self._abs_url_from_link(listing)
                 log.explain(f"Found exercise detail {file_name!r} at {url}")
                 results.append(IliasPageElement.create_new(
@@ -699,10 +706,10 @@ class IliasPage:
 
         return results
 
-    def _find_normal_entries(self) -> List[IliasPageElement]:
-        result: List[IliasPageElement] = []
+    def _find_normal_entries(self) -> list[IliasPageElement]:
+        result: list[IliasPageElement] = []
 
-        links: List[Tag] = []
+        links: list[Tag] = []
         # Fetch all links and throw them to the general interpreter
         if self._is_course_overview_page():
             log.explain("Page is a course overview page, adjusting link selector")
@@ -716,9 +723,9 @@ class IliasPage:
             parents = [_sanitize_path_name(x) for x in self._find_upwards_folder_hierarchy(link)]
 
             if parents:
-                element_name = "/".join(parents) + "/" + _sanitize_path_name(link.getText())
+                element_name = "/".join(parents) + "/" + _sanitize_path_name(link.get_text())
             else:
-                element_name = _sanitize_path_name(link.getText())
+                element_name = _sanitize_path_name(link.get_text())
 
             element_type = self._find_type_from_link(element_name, link, abs_url)
             description = self._find_link_description(link)
@@ -750,17 +757,17 @@ class IliasPage:
 
         return result
 
-    def _find_mediacast_videos(self) -> List[IliasPageElement]:
-        videos: List[IliasPageElement] = []
+    def _find_mediacast_videos(self) -> list[IliasPageElement]:
+        videos: list[IliasPageElement] = []
 
-        for elem in cast(List[Tag], self._soup.select(".ilPlayerPreviewOverlayOuter")):
+        for elem in cast(list[Tag], self._soup.select(".ilPlayerPreviewOverlayOuter")):
             element_name = _sanitize_path_name(
-                elem.select_one(".ilPlayerPreviewDescription").getText().strip()
+                cast(Tag, elem.select_one(".ilPlayerPreviewDescription")).get_text().strip()
             )
             if not element_name.endswith(".mp4"):
                 # just to make sure it has some kinda-alrightish ending
                 element_name = element_name + ".mp4"
-            video_element = elem.find(name="video")
+            video_element = cast(Optional[Tag], elem.find(name="video"))
             if not video_element:
                 _unexpected_html_warning()
                 log.warn_contd(f"No <video> element found for mediacast video '{element_name}'")
@@ -768,18 +775,18 @@ class IliasPage:
 
             videos.append(IliasPageElement.create_new(
                 typ=IliasElementType.MEDIACAST_VIDEO,
-                url=self._abs_url_from_relative(video_element.get("src")),
+                url=self._abs_url_from_relative(cast(str, video_element.get("src"))),
                 name=element_name,
-                mtime=self._find_mediacast_video_mtime(elem.findParent(name="td"))
+                mtime=self._find_mediacast_video_mtime(cast(Tag, elem.find_parent(name="td")))
             ))
 
         return videos
 
-    def _find_mob_videos(self) -> List[IliasPageElement]:
-        videos: List[IliasPageElement] = []
+    def _find_mob_videos(self) -> list[IliasPageElement]:
+        videos: list[IliasPageElement] = []
 
         for figure in self._soup.select("figure.ilc_media_cont_MediaContainerHighlighted"):
-            title = figure.select_one("figcaption").getText().strip() + ".mp4"
+            title = cast(Tag, figure.select_one("figcaption")).get_text().strip() + ".mp4"
             video_element = figure.select_one("video")
             if not video_element:
                 _unexpected_html_warning()
@@ -789,7 +796,7 @@ class IliasPage:
             url = None
             for source in video_element.select("source"):
                 if source.get("type", "") == "video/mp4":
-                    url = source.get("src")
+                    url = cast(Optional[str], source.get("src"))
                     break
 
             if url is None:
@@ -807,15 +814,15 @@ class IliasPage:
         return videos
 
     def _find_mediacast_video_mtime(self, enclosing_td: Tag) -> Optional[datetime]:
-        description_td: Tag = enclosing_td.findPreviousSibling("td")
+        description_td = cast(Tag, enclosing_td.find_previous_sibling("td"))
         if not description_td:
             return None
 
-        meta_tag: Tag = description_td.find_all("p")[-1]
+        meta_tag = cast(Optional[Tag], description_td.find_all("p")[-1])
         if not meta_tag:
             return None
 
-        updated_str = meta_tag.getText().strip().replace("\n", " ")
+        updated_str = meta_tag.get_text().strip().replace("\n", " ")
         updated_str = re.sub(".+?: ", "", updated_str)
         return demangle_date(updated_str)
 
@@ -826,20 +833,20 @@ class IliasPage:
         It is in the same general div and this whole thing is guesswork.
         Therefore, you should check for meetings before passing them in this function.
         """
-        parents: List[Tag] = list(tag.parents)
+        parents: list[Tag] = list(tag.parents)
         for parent in parents:
             if not parent.get("class"):
                 continue
 
             # We should not crawl files under meetings
-            if "ilContainerListItemContentCB" in parent.get("class"):
-                link: Tag = parent.parent.find("a")
+            if "ilContainerListItemContentCB" in cast(str, parent.get("class")):
+                link: Tag = parent.parent.find("a")  # type: ignore
                 type = IliasPage._find_type_from_folder_like(link, self._page_url)
                 return type == IliasElementType.MEETING
 
         return False
 
-    def _find_upwards_folder_hierarchy(self, tag: Tag) -> List[str]:
+    def _find_upwards_folder_hierarchy(self, tag: Tag) -> list[str]:
         """
         Interprets accordions and expandable blocks as virtual folders and returns them
         in order. This allows us to find a file named "Test" in an accordion "Acc" as "Acc/Test"
@@ -848,7 +855,7 @@ class IliasPage:
 
         outer_accordion_content: Optional[Tag] = None
 
-        parents: List[Tag] = list(tag.parents)
+        parents: list[Tag] = list(tag.parents)
         for parent in parents:
             if not parent.get("class"):
                 continue
@@ -857,57 +864,63 @@ class IliasPage:
             # but some JS later transforms them into an accordion.
 
             # This is for these weird JS-y blocks and custom item groups
-            if "ilContainerItemsContainer" in parent.get("class"):
-                data_store_url = parent.parent.get("data-store-url", "").lower()
+            if "ilContainerItemsContainer" in cast(str, parent.get("class")):
+                data_store_url = parent.parent.get("data-store-url", "").lower()  # type: ignore
                 is_custom_item_group = "baseclass=ilcontainerblockpropertiesstoragegui" in data_store_url \
                                        and "cont_block_id=" in data_store_url
                 # I am currently under the impression that *only* those JS blocks have an
                 # ilNoDisplay class.
-                if not is_custom_item_group and "ilNoDisplay" not in parent.get("class"):
+                if not is_custom_item_group and "ilNoDisplay" not in cast(str, parent.get("class")):
                     continue
-                prev: Tag = parent.findPreviousSibling("div")
-                if "ilContainerBlockHeader" in prev.get("class"):
+                prev = cast(Tag, parent.find_previous_sibling("div"))
+                if "ilContainerBlockHeader" in cast(str, prev.get("class")):
                     if prev.find("h3"):
-                        found_titles.append(prev.find("h3").getText().strip())
+                        found_titles.append(cast(Tag, prev.find("h3")).get_text().strip())
                     else:
-                        found_titles.append(prev.find("h2").getText().strip())
+                        found_titles.append(cast(Tag, prev.find("h2")).get_text().strip())
 
             # And this for real accordions
-            if "il_VAccordionContentDef" in parent.get("class"):
+            if "il_VAccordionContentDef" in cast(str, parent.get("class")):
                 outer_accordion_content = parent
                 break
 
         if outer_accordion_content:
-            accordion_tag: Tag = outer_accordion_content.parent
-            head_tag: Tag = accordion_tag.find(attrs={
-                "class": lambda x: x and "ilc_va_ihead_VAccordIHead" in x
-            })
-            found_titles.append(head_tag.getText().strip())
+            accordion_tag = cast(Tag, outer_accordion_content.parent)
+            head_tag = cast(Tag, accordion_tag.find(attrs={
+                "class": lambda x: x is not None and "ilc_va_ihead_VAccordIHead" in x
+            }))
+            found_titles.append(head_tag.get_text().strip())
 
         return [_sanitize_path_name(x) for x in reversed(found_titles)]
 
     def _find_link_description(self, link: Tag) -> Optional[str]:
-        tile: Tag = link.findParent("div", {"class": lambda x: x and "il_ContainerListItem" in x})
+        tile = cast(
+            Tag,
+            link.find_parent("div", {"class": lambda x: x is not None and "il_ContainerListItem" in x})
+        )
         if not tile:
             return None
-        description_element: Tag = tile.find("div", {"class": lambda x: x and "il_Description" in x})
+        description_element = cast(
+            Tag,
+            tile.find("div", {"class": lambda x: x is not None and "il_Description" in x})
+        )
         if not description_element:
             return None
-        return description_element.getText().strip()
+        return description_element.get_text().strip()
 
     def _file_to_element(self, name: str, url: str, link_element: Tag) -> IliasPageElement:
         # Files have a list of properties (type, modification date, size, etc.)
         # In a series of divs.
         # Find the parent containing all those divs, so we can filter our what we need
-        properties_parent: Tag = link_element.findParent(
+        properties_parent = cast(Tag, cast(Tag, link_element.find_parent(
             "div", {"class": lambda x: "il_ContainerListItem" in x}
-        ).select_one(".il_ItemProperties")
+        )).select_one(".il_ItemProperties"))
         # The first one is always the filetype
-        file_type = properties_parent.select_one("span.il_ItemProperty").getText().strip()
+        file_type = cast(Tag, properties_parent.select_one("span.il_ItemProperty")).get_text().strip()
 
         # The rest does not have a stable order. Grab the whole text and reg-ex the date
         # out of it
-        all_properties_text = properties_parent.getText().strip()
+        all_properties_text = properties_parent.get_text().strip()
         modification_date_match = re.search(
             r"(((\d+\. \w+ \d+)|(Gestern|Yesterday)|(Heute|Today)|(Morgen|Tomorrow)), \d+:\d+)",
             all_properties_text
@@ -927,14 +940,14 @@ class IliasPage:
             IliasElementType.FILE, url, full_path, modification_date, skip_sanitize=True
         )
 
-    def _find_cards(self) -> List[IliasPageElement]:
-        result: List[IliasPageElement] = []
+    def _find_cards(self) -> list[IliasPageElement]:
+        result: list[IliasPageElement] = []
 
-        card_titles: List[Tag] = self._soup.select(".card-title a")
+        card_titles: list[Tag] = self._soup.select(".card-title a")
 
         for title in card_titles:
             url = self._abs_url_from_link(title)
-            name = _sanitize_path_name(title.getText().strip())
+            name = _sanitize_path_name(title.get_text().strip())
             type = self._find_type_from_card(title)
 
             if not type:
@@ -944,25 +957,25 @@ class IliasPage:
 
             result.append(IliasPageElement.create_new(type, url, name))
 
-        card_button_tiles: List[Tag] = self._soup.select(".card-title button")
+        card_button_tiles: list[Tag] = self._soup.select(".card-title button")
 
         for button in card_button_tiles:
-            regex = re.compile(button["id"] + r".*window.open\(['\"](.+?)['\"]")
+            regex = re.compile(button["id"] + r".*window.open\(['\"](.+?)['\"]")  # type: ignore
             res = regex.search(str(self._soup))
             if not res:
                 _unexpected_html_warning()
                 log.warn_contd(f"Could not find click handler target for {button}")
                 continue
             url = self._abs_url_from_relative(res.group(1))
-            name = _sanitize_path_name(button.getText().strip())
+            name = _sanitize_path_name(button.get_text().strip())
             type = self._find_type_from_card(button)
-            caption_parent = button.findParent(
+            caption_parent = cast(Tag, button.find_parent(
                 "div",
-                attrs={"class": lambda x: x and "caption" in x},
-            )
+                attrs={"class": lambda x: x is not None and "caption" in x},
+            ))
             caption_container = caption_parent.find_next_sibling("div")
             if caption_container:
-                description = caption_container.getText().strip()
+                description = caption_container.get_text().strip()
             else:
                 description = None
 
@@ -992,7 +1005,7 @@ class IliasPage:
             log.warn_contd(f"Tried to figure out element type, but did not find an icon for {card_title}")
             return None
 
-        icon: Tag = card_root.select_one(".il-card-repository-head .icon")
+        icon = cast(Tag, card_root.select_one(".il-card-repository-head .icon"))
 
         if "opencast" in icon["class"] or "xoct" in icon["class"]:
             return IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED
@@ -1125,7 +1138,7 @@ class IliasPage:
 
         is_session_expansion_button = found_parent.find(
             "a",
-            attrs={"href": lambda x: x and ("crs_next_sess=" in x or "crs_prev_sess=" in x)}
+            attrs={"href": lambda x: x is not None and ("crs_next_sess=" in x or "crs_prev_sess=" in x)}
         )
         if img_tag is None and is_session_expansion_button:
             log.explain("Found session expansion button, skipping it as it has no content")
@@ -1168,19 +1181,19 @@ class IliasPage:
     @staticmethod
     def is_logged_in(soup: BeautifulSoup) -> bool:
         # Normal ILIAS pages
-        mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")
+        mainbar = cast(Optional[Tag], soup.find(class_="il-maincontrols-metabar"))
         if mainbar is not None:
-            login_button = mainbar.find(attrs={"href": lambda x: x and "login.php" in x})
+            login_button = mainbar.find(attrs={"href": lambda x: x is not None and "login.php" in x})
             shib_login = soup.find(id="button_shib_login")
             return not login_button and not shib_login
 
         # Personal Desktop
-        if soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x}):
+        if soup.find("a", attrs={"href": lambda x: x is not None and "block_type=pditems" in x}):
             return True
 
         # Empty personal desktop has zero (0) markers. Match on the text...
         if alert := soup.select_one(".alert-info"):
-            text = alert.getText().lower()
+            text = alert.get_text().lower()
             if "you have not yet selected any favourites" in text:
                 return True
             if "sie haben aktuell noch keine favoriten ausgewählt" in text:
@@ -1208,7 +1221,7 @@ class IliasPage:
         """
         Create an absolute url from an <a> tag.
         """
-        return self._abs_url_from_relative(link_tag.get("href"))
+        return self._abs_url_from_relative(cast(str, link_tag.get("href")))
 
     def _abs_url_from_relative(self, relative_url: str) -> str:
         """
@@ -1218,10 +1231,10 @@ class IliasPage:
 
     @staticmethod
     def get_soup_permalink(soup: BeautifulSoup) -> Optional[str]:
-        perma_link_element: Tag = soup.select_one(".il-footer-permanent-url > a")
+        perma_link_element = cast(Tag, soup.select_one(".il-footer-permanent-url > a"))
         if not perma_link_element or not perma_link_element.get("href"):
             return None
-        return perma_link_element.get("href")
+        return cast(Optional[str], perma_link_element.get("href"))
 
 
 def _unexpected_html_warning() -> None:
@@ -1298,11 +1311,11 @@ def _sanitize_path_name(name: str) -> str:
     return name.replace("/", "-").replace("\\", "-").strip()
 
 
-def parse_ilias_forum_export(forum_export: BeautifulSoup) -> List[IliasForumThread]:
+def parse_ilias_forum_export(forum_export: BeautifulSoup) -> list[IliasForumThread]:
     elements = []
     for p in forum_export.select("body > p"):
         title_tag = p
-        content_tag = p.find_next_sibling("ul")
+        content_tag = cast(Optional[Tag], p.find_next_sibling("ul"))
 
         if not content_tag:
             # ILIAS allows users to delete the initial post while keeping the thread open
@@ -1310,7 +1323,7 @@ def parse_ilias_forum_export(forum_export: BeautifulSoup) -> List[IliasForumThre
             # I am not sure why you would want this, but ILIAS makes it easy to do.
             continue
 
-        title = p.find("b").text
+        title = cast(Tag, p.find("b")).text
         if ":" in title:
             title = title[title.find(":") + 1:]
         title = title.strip()
@@ -1321,7 +1334,7 @@ def parse_ilias_forum_export(forum_export: BeautifulSoup) -> List[IliasForumThre
 
 
 def _guess_timestamp_from_forum_post_content(content: Tag) -> Optional[datetime]:
-    posts: Optional[Tag] = content.select(".ilFrmPostHeader > span.small")
+    posts = cast(Optional[Tag], content.select(".ilFrmPostHeader > span.small"))
     if not posts:
         return None
 
diff --git a/PFERD/crawl/ilias/shibboleth_login.py b/PFERD/crawl/ilias/shibboleth_login.py
index ab59f25..7e725f0 100644
--- a/PFERD/crawl/ilias/shibboleth_login.py
+++ b/PFERD/crawl/ilias/shibboleth_login.py
@@ -1,8 +1,8 @@
-from typing import Any, Optional
+from typing import Any, Optional, cast
 
 import aiohttp
 import yarl
-from bs4 import BeautifulSoup
+from bs4 import BeautifulSoup, Tag
 
 from ...auth import Authenticator, TfaAuthenticator
 from ...logging import log
@@ -48,8 +48,8 @@ class ShibbolethLogin:
         while not self._login_successful(soup):
             # Searching the form here so that this fails before asking for
             # credentials rather than after asking.
-            form = soup.find("form", {"method": "post"})
-            action = form["action"]
+            form = cast(Tag, soup.find("form", {"method": "post"}))
+            action = cast(str, form["action"])
 
             # Equivalent: Enter credentials in
             # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
@@ -62,7 +62,7 @@ class ShibbolethLogin:
                 "fudis_web_authn_assertion_input": "",
             }
             if csrf_token_input := form.find("input", {"name": "csrf_token"}):
-                data["csrf_token"] = csrf_token_input["value"]
+                data["csrf_token"] = csrf_token_input["value"]  # type: ignore
             soup = await _post(sess, url, data)
 
             if soup.find(id="attributeRelease"):
@@ -79,14 +79,14 @@ class ShibbolethLogin:
 
         # Equivalent: Being redirected via JS automatically
         # (or clicking "Continue" if you have JS disabled)
-        relay_state = soup.find("input", {"name": "RelayState"})
-        saml_response = soup.find("input", {"name": "SAMLResponse"})
-        url = form = soup.find("form", {"method": "post"})["action"]
+        relay_state = cast(Tag, soup.find("input", {"name": "RelayState"}))
+        saml_response = cast(Tag, soup.find("input", {"name": "SAMLResponse"}))
+        url = form = soup.find("form", {"method": "post"})["action"]  # type: ignore
         data = {  # using the info obtained in the while loop above
-            "RelayState": relay_state["value"],
-            "SAMLResponse": saml_response["value"],
+            "RelayState": cast(str, relay_state["value"]),
+            "SAMLResponse": cast(str, saml_response["value"]),
         }
-        await sess.post(url, data=data)
+        await sess.post(cast(str, url), data=data)
 
     async def _authenticate_tfa(
         self, session: aiohttp.ClientSession, soup: BeautifulSoup, shib_url: yarl.URL
@@ -98,8 +98,8 @@ class ShibbolethLogin:
 
         # Searching the form here so that this fails before asking for
         # credentials rather than after asking.
-        form = soup.find("form", {"method": "post"})
-        action = form["action"]
+        form = cast(Tag, soup.find("form", {"method": "post"}))
+        action = cast(str, form["action"])
 
         # Equivalent: Enter token in
         # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
@@ -110,7 +110,7 @@ class ShibbolethLogin:
             "fudis_otp_input": tfa_token,
         }
         if csrf_token_input := form.find("input", {"name": "csrf_token"}):
-            data["csrf_token"] = csrf_token_input["value"]
+            data["csrf_token"] = csrf_token_input["value"]  # type: ignore
         return await _post(session, url, data)
 
     @staticmethod
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index e1d13a7..21d9dec 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -3,7 +3,7 @@ import re
 from dataclasses import dataclass
 from datetime import datetime
 from pathlib import PurePath
-from typing import Any, Awaitable, Generator, Iterable, List, Optional, Pattern, Tuple, Union
+from typing import Any, Awaitable, Generator, Iterable, List, Optional, Pattern, Tuple, Union, cast
 from urllib.parse import urljoin
 
 from bs4 import BeautifulSoup, Tag
@@ -156,11 +156,11 @@ class KitIpdCrawler(HttpCrawler):
         name = os.path.basename(url)
         return KitIpdFile(name, url)
 
-    def _find_file_links(self, tag: Union[Tag, BeautifulSoup]) -> List[Tag]:
-        return tag.findAll(name="a", attrs={"href": self._file_regex})
+    def _find_file_links(self, tag: Union[Tag, BeautifulSoup]) -> list[Tag]:
+        return cast(list[Tag], tag.find_all(name="a", attrs={"href": self._file_regex}))
 
     def _abs_url_from_link(self, url: str, link_tag: Tag) -> str:
-        return urljoin(url, link_tag.get("href"))
+        return urljoin(url, cast(str, link_tag.get("href")))
 
     async def _stream_from_url(self, url: str, path: PurePath, sink: FileSink, bar: ProgressBar) -> None:
         async with self.session.get(url, allow_redirects=False) as resp:

From 5f88539f7ed9bd06838de662a85dfa6027fb91a6 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 19 Feb 2025 12:05:20 +0100
Subject: [PATCH 485/524] Fix page size increase for forum threads

---
 PFERD/crawl/ilias/kit_ilias_html.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index ee61cab..f6fa423 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -286,11 +286,9 @@ class IliasPage:
         return None
 
     def _is_forum_page(self) -> bool:
-        read_more_btn = self._soup.find(
-            "button",
-            attrs={"onclick": lambda x: x is not None and "cmdClass=ilobjforumgui&cmd=markAllRead" in x}
-        )
-        return read_more_btn is not None
+        if perma_link := self.get_permalink():
+            return "target=frm_" in perma_link
+        return False
 
     def _is_video_player(self) -> bool:
         return "paella_config_file" in str(self._soup)

From 2f0e792670559fe98572eb937feafa95de41e9bd Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 19 Feb 2025 12:06:07 +0100
Subject: [PATCH 486/524] Increase default http timeout to 30

Otherwise larger forums will fail to download in time
---
 PFERD/crawl/http_crawler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index 1c4631c..471bf1e 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -22,7 +22,7 @@ ETAGS_CUSTOM_REPORT_VALUE_KEY = "etags"
 
 class HttpCrawlerSection(CrawlerSection):
     def http_timeout(self) -> float:
-        return self.s.getfloat("http_timeout", fallback=20)
+        return self.s.getfloat("http_timeout", fallback=30)
 
 
 class HttpCrawler(Crawler):

From ba2833dba5669e9b748cb64bbf98890f5d1299ae Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 19 Feb 2025 12:07:19 +0100
Subject: [PATCH 487/524] Crawl all threads in a forum

Before this patch the row count was unconditionally changed to 800. This
patch tries to detect how many rows the forum has and then fetches this
amount, if it is larger than 800.
---
 CHANGELOG.md                        |  1 +
 PFERD/crawl/ilias/kit_ilias_html.py | 37 ++++++++++++++++++++++++++---
 2 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2ff98bc..572f8c7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ ambiguous situations.
 ## Fixed
 - File links in report on Windows
 - TOTP authentication in KIT Shibboleth
+- Forum crawling only considering the first 20 entries
 
 ## 3.7.0 - 2024-11-13
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index f6fa423..a194856 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -264,10 +264,22 @@ class IliasPage:
 
     def get_next_stage_element(self) -> Optional[IliasPageElement]:
         if self._is_forum_page():
-            if "trows=800" in self._page_url:
+            if "trows=" in self._page_url:
+                log.explain("Manual row override detected, accepting it as good")
                 return None
             log.explain("Requesting *all* forum threads")
-            return self._get_show_max_forum_entries_per_page_url()
+            thread_count = self._get_forum_thread_count()
+            if thread_count is not None and thread_count > 400:
+                log.warn(
+                    "Forum has more than 400 threads, fetching all threads will take a while. "
+                    "You might need to adjust your http_timeout config option."
+                )
+
+            # Fetch at least 400 in case we detect it wrong
+            if thread_count is not None and thread_count < 400:
+                thread_count = 400
+
+            return self._get_show_max_forum_entries_per_page_url(thread_count)
         if self._is_ilias_opencast_embedding():
             log.explain("Unwrapping opencast embedding")
             return self.get_child_elements()[0]
@@ -414,7 +426,9 @@ class IliasPage:
 
         return items
 
-    def _get_show_max_forum_entries_per_page_url(self) -> Optional[IliasPageElement]:
+    def _get_show_max_forum_entries_per_page_url(
+        self, wanted_max: Optional[int] = None
+    ) -> Optional[IliasPageElement]:
         correct_link = cast(Optional[Tag], self._soup.find(
             "a",
             attrs={"href": lambda x: x is not None and "trows=800" in x and "cmd=showThreads" in x}
@@ -424,9 +438,26 @@ class IliasPage:
             return None
 
         link = self._abs_url_from_link(correct_link)
+        if wanted_max is not None:
+            link = link.replace("trows=800", f"trows={wanted_max}")
 
         return IliasPageElement.create_new(IliasElementType.FORUM, link, "show all forum threads")
 
+    def _get_forum_thread_count(self) -> Optional[int]:
+        log.explain_topic("Trying to find forum thread count")
+
+        candidates = cast(list[Tag], self._soup.select(".ilTableFootLight"))
+        extract_regex = re.compile(r"\s(?P<max>\d+)\s*\)")
+
+        for candidate in candidates:
+            log.explain(f"Found thread count candidate: {candidate}")
+            if match := extract_regex.search(candidate.get_text()):
+                return int(match.group("max"))
+        else:
+            log.explain("Found no candidates to extract thread count from")
+
+        return None
+
     def _find_personal_desktop_entries(self) -> list[IliasPageElement]:
         items: list[IliasPageElement] = []
 

From be175f9347ea73160839de643e089db328cf78df Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 19 Feb 2025 14:18:43 +0100
Subject: [PATCH 488/524] Download only new/updated forum threads

---
 PFERD/crawl/crawler.py                 | 29 +++++++++++
 PFERD/crawl/ilias/ilias_web_crawler.py | 60 +++++++++++++++++------
 PFERD/crawl/ilias/kit_ilias_html.py    | 67 +++++++++++++++++++++-----
 PFERD/output_dir.py                    | 16 ++++++
 4 files changed, 145 insertions(+), 27 deletions(-)

diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index fda1307..74616e0 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -294,6 +294,35 @@ class Crawler(ABC):
         log.explain("Answer: Yes")
         return CrawlToken(self._limiter, path)
 
+    def should_try_download(
+            self,
+            path: PurePath,
+            *,
+            etag_differs: Optional[bool] = None,
+            mtime: Optional[datetime] = None,
+            redownload: Optional[Redownload] = None,
+            on_conflict: Optional[OnConflict] = None,
+    ) -> bool:
+        log.explain_topic(f"Decision: Should Download {fmt_path(path)}")
+
+        if self._transformer.transform(path) is None:
+            log.explain("Answer: No (ignored)")
+            return False
+
+        should_download = self._output_dir.should_try_download(
+            path,
+            etag_differs=etag_differs,
+            mtime=mtime,
+            redownload=redownload,
+            on_conflict=on_conflict
+        )
+        if should_download:
+            log.explain("Answer: Yes")
+            return True
+        else:
+            log.explain("Answer: No")
+            return False
+
     async def download(
             self,
             path: PurePath,
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 557150c..7351593 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -723,20 +723,52 @@ instance's greatest bottleneck.
                 else:
                     break
 
-            download_data = cast(IliasPage, page).get_download_forum_data()
-            if not download_data:
-                raise CrawlWarning("Failed to extract forum data")
-            if download_data.empty:
+            forum_threads: list[tuple[IliasPageElement, bool]] = []
+            for entry in cast(IliasPage, page).get_forum_entries():
+                path = cl.path / (_sanitize_path_name(entry.name) + ".html")
+                forum_threads.append((entry, self.should_try_download(path, mtime=entry.mtime)))
+
+            # Sort the ids. The forum download will *preserve* this ordering
+            forum_threads.sort(key=lambda elem: elem[0].id())
+
+            if not forum_threads:
                 log.explain("Forum had no threads")
                 return
-            html = await self._post_authenticated(download_data.url, download_data.form_data)
-            elements = parse_ilias_forum_export(soupify(html))
 
-        elements.sort(key=lambda elem: elem.title)
+            download_data = cast(IliasPage, page).get_download_forum_data(
+                [thread.id() for thread, download in forum_threads if download]
+            )
+            if not download_data:
+                raise CrawlWarning("Failed to extract forum data")
+
+            if not download_data.empty:
+                html = await self._post_authenticated(download_data.url, download_data.form_data)
+                elements = parse_ilias_forum_export(soupify(html))
+            else:
+                elements = []
+
+        # Verify that ILIAS does not change the order, as we depend on it later. Otherwise, we could not call
+        # download in the correct order, potentially messing up duplication handling.
+        expected_element_titles = [thread.name for thread, download in forum_threads if download]
+        actual_element_titles = [_sanitize_path_name(thread.name) for thread in elements]
+        if expected_element_titles != actual_element_titles:
+            raise CrawlWarning(
+                f"Forum thread order mismatch: {expected_element_titles} != {actual_element_titles}"
+            )
 
         tasks: List[Awaitable[None]] = []
-        for elem in elements:
-            tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, elem)))
+        for thread, download in forum_threads:
+            if download:
+                # This only works because ILIAS keeps the order in the export
+                elem = elements.pop(0)
+                tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, elem)))
+            else:
+                # We only downloaded the threads we "should_try_download"ed. This can be an
+                # over-approximation and all will be fine.
+                # If we selected too few, e.g. because there was a duplicate title and the mtime of the
+                # original is newer than the update of the duplicate.
+                # This causes stale data locally, but I consider this problem acceptable right now.
+                tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, thread)))
 
         # And execute them
         await self.gather(tasks)
@@ -746,17 +778,17 @@ instance's greatest bottleneck.
     async def _download_forum_thread(
         self,
         parent_path: PurePath,
-        element: IliasForumThread,
+        element: Union[IliasForumThread, IliasPageElement]
     ) -> None:
-        path = parent_path / (_sanitize_path_name(element.title) + ".html")
+        path = parent_path / (_sanitize_path_name(element.name) + ".html")
         maybe_dl = await self.download(path, mtime=element.mtime)
-        if not maybe_dl:
+        if not maybe_dl or not isinstance(element, IliasForumThread):
             return
 
         async with maybe_dl as (bar, sink):
             content = "<!DOCTYPE html>\n"
-            content += cast(str, element.title_tag.prettify())
-            content += cast(str, element.content_tag.prettify())
+            content += cast(str, element.name_tag.prettify())
+            content += cast(str, await self.internalize_images(element.content_tag.prettify()))
             sink.file.write(content.encode("utf-8"))
             sink.done()
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index a194856..7956b00 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -22,6 +22,7 @@ class IliasElementType(Enum):
     FILE = "file"
     FOLDER = "folder"
     FORUM = "forum"
+    FORUM_THREAD = "forum_thread"
     INFO_TAB = "info_tab"
     LEARNING_MODULE = "learning_module"
     LINK = "link"
@@ -54,6 +55,7 @@ class IliasPageElement:
             r"fold_(?P<id>\d+)",
             r"frm_(?P<id>\d+)",
             r"exc_(?P<id>\d+)",
+            r"thr_pk=(?P<id>\d+)",  # forums
             r"ref_id=(?P<id>\d+)",
             r"target=[a-z]+_(?P<id>\d+)",
             r"mm_(?P<id>\d+)"
@@ -123,8 +125,8 @@ class IliasDownloadForumData:
 
 @dataclass
 class IliasForumThread:
-    title: str
-    title_tag: Tag
+    name: str
+    name_tag: Tag
     content_tag: Tag
     mtime: Optional[datetime]
 
@@ -242,7 +244,36 @@ class IliasPage:
             return url
         return None
 
-    def get_download_forum_data(self) -> Optional[IliasDownloadForumData]:
+    def get_forum_entries(self) -> list[IliasPageElement]:
+        form = self._get_forum_form()
+        if not form:
+            return []
+        threads = []
+
+        for row in cast(list[Tag], form.select("table > tbody > tr")):
+            url_tag = cast(
+                Optional[Tag],
+                row.find(name="a", attrs={"href": lambda x: x is not None and "cmd=viewthread" in x.lower()})
+            )
+            if url_tag is None:
+                log.explain(f"Skipping row without URL: {row}")
+                continue
+            name = url_tag.get_text().strip()
+            columns = [td.get_text().strip() for td in cast(list[Tag], row.find_all(name="td"))]
+            potential_dates_opt = [IliasPage._find_date_in_text(column) for column in columns]
+            potential_dates = [x for x in potential_dates_opt if x is not None]
+            mtime = max(potential_dates) if potential_dates else None
+
+            threads.append(IliasPageElement.create_new(
+                IliasElementType.FORUM_THREAD,
+                self._abs_url_from_link(url_tag),
+                name,
+                mtime=mtime
+            ))
+
+        return threads
+
+    def get_download_forum_data(self, thread_ids: list[str]) -> Optional[IliasDownloadForumData]:
         form = cast(Optional[Tag], self._soup.find(
             "form",
             attrs={"action": lambda x: x is not None and "fallbackCmd=showThreads" in x}
@@ -251,7 +282,7 @@ class IliasPage:
             return None
         post_url = self._abs_url_from_relative(cast(str, form["action"]))
 
-        thread_ids = [f["value"] for f in cast(list[Tag], form.find_all(attrs={"name": "thread_ids[]"}))]
+        log.explain(f"Fetching forum threads {thread_ids}")
 
         form_data: Dict[str, Union[str, list[str]]] = {
             "thread_ids[]": cast(list[str], thread_ids),
@@ -262,6 +293,12 @@ class IliasPage:
 
         return IliasDownloadForumData(url=post_url, form_data=form_data, empty=len(thread_ids) == 0)
 
+    def _get_forum_form(self) -> Optional[Tag]:
+        return cast(Optional[Tag], self._soup.find(
+            "form",
+            attrs={"action": lambda x: x is not None and "fallbackCmd=showThreads" in x}
+        ))
+
     def get_next_stage_element(self) -> Optional[IliasPageElement]:
         if self._is_forum_page():
             if "trows=" in self._page_url:
@@ -950,16 +987,9 @@ class IliasPage:
         # The rest does not have a stable order. Grab the whole text and reg-ex the date
         # out of it
         all_properties_text = properties_parent.get_text().strip()
-        modification_date_match = re.search(
-            r"(((\d+\. \w+ \d+)|(Gestern|Yesterday)|(Heute|Today)|(Morgen|Tomorrow)), \d+:\d+)",
-            all_properties_text
-        )
-        if modification_date_match is None:
-            modification_date = None
+        modification_date = IliasPage._find_date_in_text(all_properties_text)
+        if modification_date is None:
             log.explain(f"Element {name} at {url} has no date.")
-        else:
-            modification_date_str = modification_date_match.group(1)
-            modification_date = demangle_date(modification_date_str)
 
         # Grab the name from the link text
         full_path = name + "." + file_type
@@ -1243,6 +1273,17 @@ class IliasPage:
             return True
         return False
 
+    @staticmethod
+    def _find_date_in_text(text: str) -> Optional[datetime]:
+        modification_date_match = re.search(
+            r"(((\d+\. \w+ \d+)|(Gestern|Yesterday)|(Heute|Today)|(Morgen|Tomorrow)), \d+:\d+)",
+            text
+        )
+        if modification_date_match is not None:
+            modification_date_str = modification_date_match.group(1)
+            return demangle_date(modification_date_str)
+        return None
+
     def get_permalink(self) -> Optional[str]:
         return IliasPage.get_soup_permalink(self._soup)
 
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 09cf133..94337b6 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -371,6 +371,22 @@ class OutputDirectory:
 
         raise OutputDirError("Failed to create temporary file")
 
+    def should_try_download(
+        self,
+        path: PurePath,
+        *,
+        etag_differs: Optional[bool] = None,
+        mtime: Optional[datetime] = None,
+        redownload: Optional[Redownload] = None,
+        on_conflict: Optional[OnConflict] = None,
+    ) -> bool:
+        heuristics = Heuristics(etag_differs, mtime)
+        redownload = self._redownload if redownload is None else redownload
+        on_conflict = self._on_conflict if on_conflict is None else on_conflict
+        local_path = self.resolve(path)
+
+        return self._should_download(local_path, heuristics, redownload, on_conflict)
+
     async def download(
             self,
             remote_path: PurePath,

From 72cd0f77e2d7e58a3505961f3c58b94636e24156 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 19 Feb 2025 14:29:37 +0100
Subject: [PATCH 489/524] Prettify forum thread exports

Co-authored-by: Tim <me@scriptim.dev>
---
 CHANGELOG.md                           |  3 +
 PFERD/crawl/ilias/file_templates.py    | 89 ++++++++++++++++++++++++++
 PFERD/crawl/ilias/ilias_web_crawler.py | 12 ++--
 3 files changed, 99 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 572f8c7..ae82e4f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Changed
+- Added prettier CSS to forum threads
+
 ## Fixed
 - File links in report on Windows
 - TOTP authentication in KIT Shibboleth
diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index 0a72199..e148875 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -126,6 +126,88 @@ _learning_module_template = """
 </html>
 """
 
+_forum_thread_template = """
+<!DOCTYPE html>
+<html lang="en">
+    <head>
+        <meta charset="UTF-8">
+        <title>ILIAS - Forum: {{name}}</title>
+        <style>
+            * {
+                box-sizing: border-box;
+            }
+            body {
+                font-family: 'Open Sans', Verdana, Arial, Helvetica, sans-serif;
+                padding: 8px;
+            }
+            ul, ol, p {
+                margin: 1.2em 0;
+            }
+            p {
+                margin-top: 8px;
+                margin-bottom: 8px;
+            }
+            a {
+                color: #00876c;
+                text-decoration: none;
+                cursor: pointer;
+            }
+            a:hover {
+                text-decoration: underline;
+            }
+            body > p:first-child > span:first-child {
+                font-size: 1.6em;
+            }
+            body > p:first-child > span:first-child ~ span.default {
+                display: inline-block;
+                font-size: 1.2em;
+                padding-bottom: 8px;
+            }
+            .ilFrmPostContent {
+                margin-top: 8px;
+                max-width: 64em;
+            }
+            .ilFrmPostContent > *:first-child {
+                margin-top: 0px;
+            }
+            .ilFrmPostTitle {
+                margin-top: 24px;
+                color: #00876c;
+                font-weight: bold;
+            }
+            #ilFrmPostList {
+                list-style: none;
+                padding-left: 0;
+            }
+            li.ilFrmPostRow {
+                padding: 3px 0 3px 3px;
+                margin-bottom: 24px;
+                border-left: 6px solid #dddddd;
+            }
+            .ilFrmPostRow > div {
+                display: flex;
+            }
+            .ilFrmPostImage img {
+                margin: 0 !important;
+                padding: 6px 9px 9px 6px;
+            }
+            .ilUserIcon {
+                width: 115px;
+            }
+            .small {
+                text-decoration: none;
+                font-size: 0.75rem;
+                color: #6f6f6f;
+            }
+        </style>
+    </head>
+    <body>
+    {{heading}}
+    {{content}}
+    </body>
+</html>
+""".strip()  # noqa: E501 line too long
+
 
 def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next: Optional[str]) -> str:
     # Seems to be comments, ignore those.
@@ -164,6 +246,13 @@ def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next
     return _learning_module_template.replace("{{body}}", body_str).replace("{{name}}", name)
 
 
+def forum_thread_template(name: str, heading: bs4.Tag, content: bs4.Tag) -> str:
+    return _forum_thread_template \
+        .replace("{{name}}", name) \
+        .replace("{{heading}}", cast(str, heading.prettify())) \
+        .replace("{{content}}", cast(str, content.prettify()))
+
+
 class Links(Enum):
     IGNORE = "ignore"
     PLAINTEXT = "plaintext"
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 7351593..bc90991 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -19,7 +19,7 @@ from ...utils import fmt_path, soupify, url_set_query_param
 from ..crawler import CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
 from ..http_crawler import HttpCrawler, HttpCrawlerSection
 from .async_helper import _iorepeat
-from .file_templates import Links, learning_module_template
+from .file_templates import Links, forum_thread_template, learning_module_template
 from .ilias_html_cleaner import clean, insert_base_markup
 from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
                              IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
@@ -786,10 +786,12 @@ instance's greatest bottleneck.
             return
 
         async with maybe_dl as (bar, sink):
-            content = "<!DOCTYPE html>\n"
-            content += cast(str, element.name_tag.prettify())
-            content += cast(str, await self.internalize_images(element.content_tag.prettify()))
-            sink.file.write(content.encode("utf-8"))
+            rendered = forum_thread_template(
+                element.name,
+                element.name_tag,
+                element.content_tag
+            )
+            sink.file.write(rendered.encode("utf-8"))
             sink.done()
 
     async def _handle_learning_module(

From edc482cdf487e4a3bcf93e2dad7d69cbd3f32974 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 19 Feb 2025 14:45:18 +0100
Subject: [PATCH 490/524] Internalize images in forum threads

---
 PFERD/crawl/ilias/ilias_web_crawler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index bc90991..76cbe6b 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -789,7 +789,7 @@ instance's greatest bottleneck.
             rendered = forum_thread_template(
                 element.name,
                 element.name_tag,
-                element.content_tag
+                await self.internalize_images(element.content_tag)
             )
             sink.file.write(rendered.encode("utf-8"))
             sink.done()

From c8eff04ae0dc4b6528e5e8f25fb38b52fe6fd249 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 19 Feb 2025 14:56:32 +0100
Subject: [PATCH 491/524] Make thread titles link to original ILIAS thread

---
 PFERD/crawl/ilias/file_templates.py    |  4 +++-
 PFERD/crawl/ilias/ilias_web_crawler.py | 20 +++++++++++---------
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index e148875..ae8bb1e 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -246,7 +246,9 @@ def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next
     return _learning_module_template.replace("{{body}}", body_str).replace("{{name}}", name)
 
 
-def forum_thread_template(name: str, heading: bs4.Tag, content: bs4.Tag) -> str:
+def forum_thread_template(name: str, url: str, heading: bs4.Tag, content: bs4.Tag) -> str:
+    if title := cast(Optional[bs4.Tag], heading.find(name="b")):
+        title.wrap(bs4.Tag(name="a", attrs={"href": url}))
     return _forum_thread_template \
         .replace("{{name}}", name) \
         .replace("{{heading}}", cast(str, heading.prettify())) \
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 76cbe6b..add49ee 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -761,14 +761,14 @@ instance's greatest bottleneck.
             if download:
                 # This only works because ILIAS keeps the order in the export
                 elem = elements.pop(0)
-                tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, elem)))
+                tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, elem, thread)))
             else:
                 # We only downloaded the threads we "should_try_download"ed. This can be an
                 # over-approximation and all will be fine.
                 # If we selected too few, e.g. because there was a duplicate title and the mtime of the
                 # original is newer than the update of the duplicate.
                 # This causes stale data locally, but I consider this problem acceptable right now.
-                tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, thread)))
+                tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, thread, thread)))
 
         # And execute them
         await self.gather(tasks)
@@ -778,18 +778,20 @@ instance's greatest bottleneck.
     async def _download_forum_thread(
         self,
         parent_path: PurePath,
-        element: Union[IliasForumThread, IliasPageElement]
+        thread: Union[IliasForumThread, IliasPageElement],
+        element: IliasPageElement
     ) -> None:
-        path = parent_path / (_sanitize_path_name(element.name) + ".html")
-        maybe_dl = await self.download(path, mtime=element.mtime)
-        if not maybe_dl or not isinstance(element, IliasForumThread):
+        path = parent_path / (_sanitize_path_name(thread.name) + ".html")
+        maybe_dl = await self.download(path, mtime=thread.mtime)
+        if not maybe_dl or not isinstance(thread, IliasForumThread):
             return
 
         async with maybe_dl as (bar, sink):
             rendered = forum_thread_template(
-                element.name,
-                element.name_tag,
-                await self.internalize_images(element.content_tag)
+                thread.name,
+                element.url,
+                thread.name_tag,
+                await self.internalize_images(thread.content_tag)
             )
             sink.file.write(rendered.encode("utf-8"))
             sink.done()

From 63f25277b04a46e415da4f994f17e2b211ddbaf9 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 9 Mar 2025 23:44:25 +0100
Subject: [PATCH 492/524] Fix crawling of empty forum threads

---
 PFERD/crawl/ilias/kit_ilias_html.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 7956b00..963ab05 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -1387,16 +1387,18 @@ def parse_ilias_forum_export(forum_export: BeautifulSoup) -> list[IliasForumThre
         title_tag = p
         content_tag = cast(Optional[Tag], p.find_next_sibling("ul"))
 
-        if not content_tag:
-            # ILIAS allows users to delete the initial post while keeping the thread open
-            # This produces empty threads without *any* content.
-            # I am not sure why you would want this, but ILIAS makes it easy to do.
-            continue
-
         title = cast(Tag, p.find("b")).text
         if ":" in title:
             title = title[title.find(":") + 1:]
         title = title.strip()
+
+        if not content_tag or content_tag.find_previous_sibling("p") != title_tag:
+            # ILIAS allows users to delete the initial post while keeping the thread open
+            # This produces empty threads without *any* content.
+            # I am not sure why you would want this, but ILIAS makes it easy to do.
+            elements.append(IliasForumThread(title, title_tag, forum_export.new_tag("ul"), None))
+            continue
+
         mtime = _guess_timestamp_from_forum_post_content(content_tag)
         elements.append(IliasForumThread(title, title_tag, content_tag, mtime))
 

From 477234ad0d1827c0ec3c7e0e7783af365639a943 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 12 Apr 2025 14:54:58 +0200
Subject: [PATCH 493/524] Support ILIAS 9

---
 CHANGELOG.md                           |   6 +
 PFERD/crawl/ilias/ilias_web_crawler.py | 238 ++++----
 PFERD/crawl/ilias/kit_ilias_html.py    | 759 +++++++++++++++----------
 3 files changed, 571 insertions(+), 432 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ae82e4f..0a26913 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,8 +22,14 @@ ambiguous situations.
 
 ## Unreleased
 
+### Added
+- Support for ILIAS 9
+
 ### Changed
 - Added prettier CSS to forum threads
+- Downloaded forum threads now link to the forum instead of the ILIAS thread
+- Increase minimum supported Python version to 3.11
+- Do not crawl nested courses (courses linked in other courses)
 
 ## Fixed
 - File links in report on Windows
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index add49ee..52ecf92 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -22,7 +22,7 @@ from .async_helper import _iorepeat
 from .file_templates import Links, forum_thread_template, learning_module_template
 from .ilias_html_cleaner import clean, insert_base_markup
 from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
-                             IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
+                             IliasPageElement, IliasSoup, _sanitize_path_name, parse_ilias_forum_export)
 from .shibboleth_login import ShibbolethLogin
 
 TargetType = Union[str, int]
@@ -105,7 +105,6 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
 
 
 _DIRECTORY_PAGES: Set[IliasElementType] = {
-    IliasElementType.COURSE,
     IliasElementType.EXERCISE,
     IliasElementType.EXERCISE_FILES,
     IliasElementType.FOLDER,
@@ -217,11 +216,19 @@ instance's greatest bottleneck.
 
     async def _crawl_desktop(self) -> None:
         await self._crawl_url(
-            urljoin(self._base_url, "/ilias.php?baseClass=ilDashboardGUI&cmd=show")
+            urljoin(self._base_url, "/ilias.php?baseClass=ilDashboardGUI&cmd=show"),
+            crawl_nested_courses=True
         )
 
-    async def _crawl_url(self, url: str, expected_id: Optional[int] = None) -> None:
-        if awaitable := await self._handle_ilias_page(url, None, PurePath("."), expected_id):
+    async def _crawl_url(
+        self,
+        url: str,
+        expected_id: Optional[int] = None,
+        crawl_nested_courses: bool = False
+    ) -> None:
+        if awaitable := await self._handle_ilias_page(
+            url, None, PurePath("."), expected_id, crawl_nested_courses
+        ):
             await awaitable
 
     async def _handle_ilias_page(
@@ -230,6 +237,7 @@ instance's greatest bottleneck.
         current_element: Optional[IliasPageElement],
         path: PurePath,
         expected_course_id: Optional[int] = None,
+        crawl_nested_courses: bool = False
     ) -> Optional[Coroutine[Any, Any, None]]:
         maybe_cl = await self.crawl(path)
         if not maybe_cl:
@@ -237,7 +245,9 @@ instance's greatest bottleneck.
         if current_element:
             self._ensure_not_seen(current_element, path)
 
-        return self._crawl_ilias_page(url, current_element, maybe_cl, expected_course_id)
+        return self._crawl_ilias_page(
+            url, current_element, maybe_cl, expected_course_id, crawl_nested_courses
+        )
 
     @anoncritical
     async def _crawl_ilias_page(
@@ -246,6 +256,7 @@ instance's greatest bottleneck.
         current_element: Optional[IliasPageElement],
         cl: CrawlToken,
         expected_course_id: Optional[int] = None,
+        crawl_nested_courses: bool = False,
     ) -> None:
         elements: List[IliasPageElement] = []
         # A list as variable redefinitions are not propagated to outer scopes
@@ -267,12 +278,12 @@ instance's greatest bottleneck.
                     # If we expect to find a root course, enforce it
                     if current_parent is None and expected_course_id is not None:
                         perma_link = IliasPage.get_soup_permalink(soup)
-                        if not perma_link or "crs_" not in perma_link:
+                        if not perma_link or "crs/" not in perma_link:
                             raise CrawlError("Invalid course id? Didn't find anything looking like a course")
                         if str(expected_course_id) not in perma_link:
                             raise CrawlError(f"Expected course id {expected_course_id} but got {perma_link}")
 
-                    page = IliasPage(soup, next_stage_url, current_parent)
+                    page = IliasPage(soup, current_parent)
                     if next_element := page.get_next_stage_element():
                         current_parent = next_element
                         next_stage_url = next_element.url
@@ -294,7 +305,7 @@ instance's greatest bottleneck.
 
         tasks: List[Awaitable[None]] = []
         for element in elements:
-            if handle := await self._handle_ilias_element(cl.path, element):
+            if handle := await self._handle_ilias_element(cl.path, element, crawl_nested_courses):
                 tasks.append(asyncio.create_task(handle))
 
         # And execute them
@@ -310,6 +321,7 @@ instance's greatest bottleneck.
         self,
         parent_path: PurePath,
         element: IliasPageElement,
+        crawl_nested_courses: bool = False
     ) -> Optional[Coroutine[Any, Any, None]]:
         # element.name might contain `/` if the crawler created nested elements,
         # so we can not sanitize it here. We trust in the output dir to thwart worst-case
@@ -362,6 +374,56 @@ instance's greatest bottleneck.
                 "[bright_black](scorm learning modules are not supported)"
             )
             return None
+        elif element.type == IliasElementType.LITERATURE_LIST:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](literature lists are not currently supported)"
+            )
+            return None
+        elif element.type == IliasElementType.LEARNING_MODULE_HTML:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](HTML learning modules are not supported)"
+            )
+            return None
+        elif element.type == IliasElementType.BLOG:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](blogs are not currently supported)"
+            )
+            return None
+        elif element.type == IliasElementType.DCL_RECORD_LIST:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](dcl record lists are not currently supported)"
+            )
+            return None
+        elif element.type == IliasElementType.MEDIA_POOL:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](media pools are not currently supported)"
+            )
+            return None
+        elif element.type == IliasElementType.COURSE:
+            if crawl_nested_courses:
+                return await self._handle_ilias_page(element.url, element, element_path)
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](not descending into linked course)"
+            )
+            return None
         elif element.type == IliasElementType.LEARNING_MODULE:
             return await self._handle_learning_module(element, element_path)
         elif element.type == IliasElementType.LINK:
@@ -590,7 +652,7 @@ instance's greatest bottleneck.
             )
 
         async with dl as (bar, sink):
-            page = IliasPage(await self._get_page(element.url), element.url, element)
+            page = IliasPage(await self._get_page(element.url), element)
             stream_elements = page.get_child_elements()
 
             if len(stream_elements) > 1:
@@ -600,7 +662,7 @@ instance's greatest bottleneck.
                 stream_element = stream_elements[0]
 
                 # We do not have a local cache yet
-                await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
+                await self._stream_from_url(stream_element, sink, bar, is_video=True)
                 add_to_report([str(self._transformer.transform(dl.path))])
                 return
 
@@ -615,7 +677,7 @@ instance's greatest bottleneck.
             async with maybe_dl as (bar, sink):
                 log.explain(f"Streaming video from real url {stream_element.url}")
                 contained_video_paths.append(str(self._transformer.transform(maybe_dl.path)))
-                await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
+                await self._stream_from_url(stream_element, sink, bar, is_video=True)
 
         add_to_report(contained_video_paths)
 
@@ -637,12 +699,19 @@ instance's greatest bottleneck.
     async def _download_file(self, element: IliasPageElement, dl: DownloadToken, is_video: bool) -> None:
         assert dl  # The function is only reached when dl is not None
         async with dl as (bar, sink):
-            await self._stream_from_url(element.url, sink, bar, is_video)
+            await self._stream_from_url(element, sink, bar, is_video)
+
+    async def _stream_from_url(
+        self,
+        element: IliasPageElement,
+        sink: FileSink,
+        bar: ProgressBar,
+        is_video: bool
+    ) -> None:
+        url = element.url
 
-    async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar, is_video: bool) -> None:
         async def try_stream() -> bool:
             next_url = url
-
             # Normal files redirect to the magazine if we are not authenticated. As files could be HTML,
             # we can not match on the content type here. Instead, we disallow redirects and inspect the
             # new location. If we are redirected anywhere but the ILIAS 8 "sendfile" command, we assume
@@ -690,7 +759,7 @@ instance's greatest bottleneck.
         await self.authenticate(auth_id)
 
         if not await try_stream():
-            raise CrawlError("File streaming failed after authenticate()")
+            raise CrawlError(f"File streaming failed after authenticate() {element!r}")
 
     async def _handle_forum(
         self,
@@ -705,70 +774,23 @@ instance's greatest bottleneck.
     @_iorepeat(3, "crawling forum")
     @anoncritical
     async def _crawl_forum(self, element: IliasPageElement, cl: CrawlToken) -> None:
-        elements: List[IliasForumThread] = []
-
         async with cl:
-            next_stage_url = element.url
-            page = None
-
-            while next_stage_url:
-                log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
-                log.explain(f"URL: {next_stage_url}")
-
-                soup = await self._get_page(next_stage_url)
-                page = IliasPage(soup, next_stage_url, element)
-
-                if next := page.get_next_stage_element():
-                    next_stage_url = next.url
-                else:
-                    break
-
-            forum_threads: list[tuple[IliasPageElement, bool]] = []
-            for entry in cast(IliasPage, page).get_forum_entries():
-                path = cl.path / (_sanitize_path_name(entry.name) + ".html")
-                forum_threads.append((entry, self.should_try_download(path, mtime=entry.mtime)))
-
-            # Sort the ids. The forum download will *preserve* this ordering
-            forum_threads.sort(key=lambda elem: elem[0].id())
-
-            if not forum_threads:
-                log.explain("Forum had no threads")
+            inner = IliasPage(await self._get_page(element.url), element)
+            export_url = inner.get_forum_export_url()
+            if not export_url:
+                log.warn("Could not extract forum export url")
                 return
 
-            download_data = cast(IliasPage, page).get_download_forum_data(
-                [thread.id() for thread, download in forum_threads if download]
-            )
-            if not download_data:
-                raise CrawlWarning("Failed to extract forum data")
+            export = await self._post(export_url, {
+                "format": "html",
+                "cmd[createExportFile]": ""
+            })
 
-            if not download_data.empty:
-                html = await self._post_authenticated(download_data.url, download_data.form_data)
-                elements = parse_ilias_forum_export(soupify(html))
-            else:
-                elements = []
-
-        # Verify that ILIAS does not change the order, as we depend on it later. Otherwise, we could not call
-        # download in the correct order, potentially messing up duplication handling.
-        expected_element_titles = [thread.name for thread, download in forum_threads if download]
-        actual_element_titles = [_sanitize_path_name(thread.name) for thread in elements]
-        if expected_element_titles != actual_element_titles:
-            raise CrawlWarning(
-                f"Forum thread order mismatch: {expected_element_titles} != {actual_element_titles}"
-            )
+            elements = parse_ilias_forum_export(soupify(export))
 
         tasks: List[Awaitable[None]] = []
-        for thread, download in forum_threads:
-            if download:
-                # This only works because ILIAS keeps the order in the export
-                elem = elements.pop(0)
-                tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, elem, thread)))
-            else:
-                # We only downloaded the threads we "should_try_download"ed. This can be an
-                # over-approximation and all will be fine.
-                # If we selected too few, e.g. because there was a duplicate title and the mtime of the
-                # original is newer than the update of the duplicate.
-                # This causes stale data locally, but I consider this problem acceptable right now.
-                tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, thread, thread)))
+        for thread in elements:
+            tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, thread, element.url)))
 
         # And execute them
         await self.gather(tasks)
@@ -779,7 +801,7 @@ instance's greatest bottleneck.
         self,
         parent_path: PurePath,
         thread: Union[IliasForumThread, IliasPageElement],
-        element: IliasPageElement
+        forum_url: str
     ) -> None:
         path = parent_path / (_sanitize_path_name(thread.name) + ".html")
         maybe_dl = await self.download(path, mtime=thread.mtime)
@@ -789,7 +811,7 @@ instance's greatest bottleneck.
         async with maybe_dl as (bar, sink):
             rendered = forum_thread_template(
                 thread.name,
-                element.url,
+                forum_url,
                 thread.name_tag,
                 await self.internalize_images(thread.content_tag)
             )
@@ -817,7 +839,7 @@ instance's greatest bottleneck.
             log.explain_topic(f"Parsing initial HTML page for {fmt_path(cl.path)}")
             log.explain(f"URL: {element.url}")
             soup = await self._get_page(element.url)
-            page = IliasPage(soup, element.url, element)
+            page = IliasPage(soup, element)
             if next := page.get_learning_module_data():
                 elements.extend(await self._crawl_learning_module_direction(
                     cl.path, next.previous_url, "left", element
@@ -860,7 +882,7 @@ instance's greatest bottleneck.
             log.explain_topic(f"Parsing HTML page for {fmt_path(path)} ({dir}-{counter})")
             log.explain(f"URL: {next_element_url}")
             soup = await self._get_page(next_element_url)
-            page = IliasPage(soup, next_element_url, parent_element)
+            page = IliasPage(soup, parent_element)
             if next := page.get_learning_module_data():
                 elements.append(next)
                 if dir == "left":
@@ -891,13 +913,13 @@ instance's greatest bottleneck.
         if prev:
             prev_p = self._transformer.transform(parent_path / (_sanitize_path_name(prev) + ".html"))
             if prev_p:
-                prev = os.path.relpath(prev_p, my_path.parent)
+                prev = cast(str, os.path.relpath(prev_p, my_path.parent))
             else:
                 prev = None
         if next:
             next_p = self._transformer.transform(parent_path / (_sanitize_path_name(next) + ".html"))
             if next_p:
-                next = os.path.relpath(next_p, my_path.parent)
+                next = cast(str, os.path.relpath(next_p, my_path.parent))
             else:
                 next = None
 
@@ -937,10 +959,10 @@ instance's greatest bottleneck.
             )
         self._visited_urls[element.url] = parent_path
 
-    async def _get_page(self, url: str, root_page_allowed: bool = False) -> BeautifulSoup:
+    async def _get_page(self, url: str, root_page_allowed: bool = False) -> IliasSoup:
         auth_id = await self._current_auth_id()
         async with self.session.get(url) as request:
-            soup = soupify(await request.read())
+            soup = IliasSoup(soupify(await request.read()), str(request.url))
             if IliasPage.is_logged_in(soup):
                 return self._verify_page(soup, url, root_page_allowed)
 
@@ -949,13 +971,13 @@ instance's greatest bottleneck.
 
         # Retry once after authenticating. If this fails, we will die.
         async with self.session.get(url) as request:
-            soup = soupify(await request.read())
+            soup = IliasSoup(soupify(await request.read()), str(request.url))
             if IliasPage.is_logged_in(soup):
                 return self._verify_page(soup, url, root_page_allowed)
         raise CrawlError(f"get_page failed even after authenticating on {url!r}")
 
     @staticmethod
-    def _verify_page(soup: BeautifulSoup, url: str, root_page_allowed: bool) -> BeautifulSoup:
+    def _verify_page(soup: IliasSoup, url: str, root_page_allowed: bool) -> IliasSoup:
         if IliasPage.is_root_page(soup) and not root_page_allowed:
             raise CrawlError(
                 "Unexpectedly encountered ILIAS root page. "
@@ -967,29 +989,19 @@ instance's greatest bottleneck.
             )
         return soup
 
-    async def _post_authenticated(
+    async def _post(
         self,
         url: str,
         data: dict[str, Union[str, List[str]]]
     ) -> bytes:
-        auth_id = await self._current_auth_id()
-
         form_data = aiohttp.FormData()
         for key, val in data.items():
             form_data.add_field(key, val)
 
-        async with self.session.post(url, data=form_data(), allow_redirects=False) as request:
+        async with self.session.post(url, data=form_data()) as request:
             if request.status == 200:
                 return await request.read()
-
-        # We weren't authenticated, so try to do that
-        await self.authenticate(auth_id)
-
-        # Retry once after authenticating. If this fails, we will die.
-        async with self.session.post(url, data=data, allow_redirects=False) as request:
-            if request.status == 200:
-                return await request.read()
-        raise CrawlError("post_authenticated failed even after authenticating")
+            raise CrawlError(f"post failed with status {request.status}")
 
     async def _get_authenticated(self, url: str) -> bytes:
         auth_id = await self._current_auth_id()
@@ -1037,34 +1049,6 @@ instance's greatest bottleneck.
 
             # do the actual login
             async with self.session.post(urljoin(self._base_url, login_url), data=login_data) as request:
-                soup = soupify(await request.read())
-                if not self._is_logged_in(soup):
+                soup = IliasSoup(soupify(await request.read()), str(request.url))
+                if not IliasPage.is_logged_in(soup):
                     self._auth.invalidate_credentials()
-
-    @staticmethod
-    def _is_logged_in(soup: BeautifulSoup) -> bool:
-        # Normal ILIAS pages
-        mainbar = cast(Optional[Tag], soup.find(class_="il-maincontrols-metabar"))
-        if mainbar is not None:
-            login_button = mainbar.find(attrs={"href": lambda x: x is not None and "login.php" in x})
-            shib_login = soup.find(id="button_shib_login")
-            return not login_button and not shib_login
-
-        # Personal Desktop
-        if soup.find("a", attrs={"href": lambda x: x is not None and "block_type=pditems" in x}):
-            return True
-
-        # Video listing embeds do not have complete ILIAS html. Try to match them by
-        # their video listing table
-        video_table = soup.find(
-            recursive=True,
-            name="table",
-            attrs={"id": lambda x: x is not None and x.startswith("tbl_xoct")}
-        )
-        if video_table is not None:
-            return True
-        # The individual video player wrapper page has nothing of the above.
-        # Match it by its playerContainer.
-        if soup.select_one("#playerContainer") is not None:
-            return True
-        return False
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 963ab05..5ea17d6 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -3,20 +3,100 @@ import re
 from dataclasses import dataclass
 from datetime import date, datetime, timedelta
 from enum import Enum
-from typing import Dict, Optional, Union, cast
+from typing import Callable, Dict, Optional, Union, cast
 from urllib.parse import urljoin, urlparse
 
 from bs4 import BeautifulSoup, Tag
 
+from PFERD.crawl import CrawlError
+from PFERD.crawl.crawler import CrawlWarning
 from PFERD.logging import log
 from PFERD.utils import url_set_query_params
 
 TargetType = Union[str, int]
 
 
+class TypeMatcher:
+    class UrlPath:
+        path: str
+
+        def __init__(self, path: str):
+            self.path = path
+
+    class UrlParameter:
+        query: str
+
+        def __init__(self, query: str):
+            self.query = query
+
+    class ImgSrc:
+        src: str
+
+        def __init__(self, src: str):
+            self.src = src
+
+    class ImgAlt:
+        alt: str
+
+        def __init__(self, alt: str):
+            self.alt = alt
+
+    class All:
+        matchers: list['IliasElementMatcher']
+
+        def __init__(self, matchers: list['IliasElementMatcher']):
+            self.matchers = matchers
+
+    class Any:
+        matchers: list['IliasElementMatcher']
+
+        def __init__(self, matchers: list['IliasElementMatcher']):
+            self.matchers = matchers
+
+    @staticmethod
+    def path(path: str) -> UrlPath:
+        return TypeMatcher.UrlPath(path)
+
+    @staticmethod
+    def query(query: str) -> UrlParameter:
+        return TypeMatcher.UrlParameter(query)
+
+    @staticmethod
+    def img_src(src: str) -> ImgSrc:
+        return TypeMatcher.ImgSrc(src)
+
+    @staticmethod
+    def img_alt(alt: str) -> ImgAlt:
+        return TypeMatcher.ImgAlt(alt)
+
+    @staticmethod
+    def all(*matchers: 'IliasElementMatcher') -> All:
+        return TypeMatcher.All(list(matchers))
+
+    @staticmethod
+    def any(*matchers: 'IliasElementMatcher') -> Any:
+        return TypeMatcher.Any(list(matchers))
+
+    @staticmethod
+    def never() -> Any:
+        return TypeMatcher.Any([])
+
+
+IliasElementMatcher = (
+    TypeMatcher.UrlPath
+    | TypeMatcher.UrlParameter
+    | TypeMatcher.ImgSrc
+    | TypeMatcher.ImgAlt
+    | TypeMatcher.All
+    | TypeMatcher.Any
+)
+
+
 class IliasElementType(Enum):
+    BLOG = "blog"
     BOOKING = "booking"
     COURSE = "course"
+    DCL_RECORD_LIST = "dcl_record_list"
     EXERCISE = "exercise"
     EXERCISE_FILES = "exercise_files"  # own submitted files
     FILE = "file"
@@ -25,7 +105,10 @@ class IliasElementType(Enum):
     FORUM_THREAD = "forum_thread"
     INFO_TAB = "info_tab"
     LEARNING_MODULE = "learning_module"
+    LEARNING_MODULE_HTML = "learning_module_html"
+    LITERATURE_LIST = "literature_list"
     LINK = "link"
+    MEDIA_POOL = "media_pool"
     MEDIACAST_VIDEO = "mediacast_video"
     MEDIACAST_VIDEO_FOLDER = "mediacast_video_folder"
     MEETING = "meeting"
@@ -38,6 +121,131 @@ class IliasElementType(Enum):
     SURVEY = "survey"
     TEST = "test"  # an online test. Will be ignored currently.
 
+    def matcher(self) -> IliasElementMatcher:
+        match self:
+            case IliasElementType.BLOG:
+                return TypeMatcher.any(
+                    TypeMatcher.img_src("_blog.svg")
+                )
+            case IliasElementType.BOOKING:
+                return TypeMatcher.any(
+                    TypeMatcher.path("/book/"),
+                    TypeMatcher.img_src("_book.svg")
+                )
+            case IliasElementType.COURSE:
+                return TypeMatcher.any(TypeMatcher.path("/crs/"), TypeMatcher.img_src("_crsr.svg"))
+            case IliasElementType.DCL_RECORD_LIST:
+                return TypeMatcher.any(
+                    TypeMatcher.img_src("_dcl.svg"),
+                    TypeMatcher.query("cmdclass=ildclrecordlistgui")
+                )
+            case IliasElementType.EXERCISE:
+                return TypeMatcher.any(
+                    TypeMatcher.path("/exc/"),
+                    TypeMatcher.path("_exc_"),
+                    TypeMatcher.img_src("_exc.svg"),
+                )
+            case IliasElementType.EXERCISE_FILES:
+                return TypeMatcher.never()
+            case IliasElementType.FILE:
+                return TypeMatcher.any(
+                    TypeMatcher.query("cmd=sendfile"),
+                    TypeMatcher.path("_file_"),
+                    TypeMatcher.img_src("/filedelivery/"),
+                )
+            case IliasElementType.FOLDER:
+                return TypeMatcher.any(
+                    TypeMatcher.path("/fold/"),
+                    TypeMatcher.img_src("_fold.svg"),
+
+                    TypeMatcher.path("/grp/"),
+                    TypeMatcher.img_src("_grp.svg"),
+
+                    TypeMatcher.path("/copa/"),
+                    TypeMatcher.path("_copa_"),
+                    TypeMatcher.img_src("_copa.svg"),
+
+                    # Not supported right now but warn users
+                    # TypeMatcher.query("baseclass=ilmediapoolpresentationgui"),
+                    # TypeMatcher.img_alt("medienpool"),
+                    # TypeMatcher.img_src("_mep.svg"),
+                )
+            case IliasElementType.FORUM:
+                return TypeMatcher.any(
+                    TypeMatcher.path("/frm/"),
+                    TypeMatcher.path("_frm_"),
+                    TypeMatcher.img_src("_frm.svg"),
+                )
+            case IliasElementType.FORUM_THREAD:
+                return TypeMatcher.never()
+            case IliasElementType.INFO_TAB:
+                return TypeMatcher.never()
+            case IliasElementType.LITERATURE_LIST:
+                return TypeMatcher.img_src("_bibl.svg")
+            case IliasElementType.LEARNING_MODULE:
+                return TypeMatcher.any(
+                    TypeMatcher.path("/lm/"),
+                    TypeMatcher.img_src("_lm.svg")
+                )
+            case IliasElementType.LEARNING_MODULE_HTML:
+                return TypeMatcher.any(
+                    TypeMatcher.query("baseclass=ilhtlmpresentationgui"),
+                    TypeMatcher.img_src("_htlm.svg")
+                )
+            case IliasElementType.LINK:
+                return TypeMatcher.any(
+                    TypeMatcher.all(
+                        TypeMatcher.query("baseclass=illinkresourcehandlergui"),
+                        TypeMatcher.query("calldirectlink"),
+                    ),
+                    TypeMatcher.img_src("_webr.svg")
+                )
+            case IliasElementType.MEDIA_POOL:
+                return TypeMatcher.any(
+                    TypeMatcher.query("baseclass=ilmediapoolpresentationgui"),
+                    TypeMatcher.img_src("_mep.svg")
+                )
+            case IliasElementType.MEDIACAST_VIDEO:
+                return TypeMatcher.never()
+            case IliasElementType.MEDIACAST_VIDEO_FOLDER:
+                return TypeMatcher.any(
+                    TypeMatcher.path("/mcst/"),
+                    TypeMatcher.query("baseclass=ilmediacasthandlergui"),
+                    TypeMatcher.img_src("_mcst.svg")
+                )
+            case IliasElementType.MEETING:
+                return TypeMatcher.any(
+                    TypeMatcher.img_src("_sess.svg")
+                )
+            case IliasElementType.MOB_VIDEO:
+                return TypeMatcher.never()
+            case IliasElementType.OPENCAST_VIDEO:
+                return TypeMatcher.never()
+            case IliasElementType.OPENCAST_VIDEO_FOLDER:
+                return TypeMatcher.never()
+            case IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED:
+                return TypeMatcher.img_alt("opencast")
+            case IliasElementType.OPENCAST_VIDEO_PLAYER:
+                return TypeMatcher.never()
+            case IliasElementType.SCORM_LEARNING_MODULE:
+                return TypeMatcher.any(
+                    TypeMatcher.query("baseclass=ilsahspresentationgui"),
+                    TypeMatcher.img_src("_sahs.svg")
+                )
+            case IliasElementType.SURVEY:
+                return TypeMatcher.any(
+                    TypeMatcher.path("/svy/"),
+                    TypeMatcher.img_src("svy.svg")
+                )
+            case IliasElementType.TEST:
+                return TypeMatcher.any(
+                    TypeMatcher.query("cmdclass=ilobjtestgui"),
+                    TypeMatcher.query("cmdclass=iltestscreengui"),
+                    TypeMatcher.img_src("_tst.svg")
+                )
+
+        raise CrawlWarning(f"Unknown matcher {self}")
+
 
 @dataclass
 class IliasPageElement:
@@ -50,11 +258,21 @@ class IliasPageElement:
     def id(self) -> str:
         regexes = [
             r"eid=(?P<id>[0-9a-z\-]+)",
-            r"file_(?P<id>\d+)",
-            r"copa_(?P<id>\d+)",
-            r"fold_(?P<id>\d+)",
-            r"frm_(?P<id>\d+)",
-            r"exc_(?P<id>\d+)",
+            r"book/(?P<id>\d+)",  # booking
+            r"cat/(?P<id>\d+)",
+            r"copa/(?P<id>\d+)",  # content page
+            r"crs/(?P<id>\d+)",  # course
+            r"exc/(?P<id>\d+)",  # exercise
+            r"file/(?P<id>\d+)",  # file
+            r"fold/(?P<id>\d+)",  # folder
+            r"frm/(?P<id>\d+)",  # forum
+            r"grp/(?P<id>\d+)",  # group
+            r"lm/(?P<id>\d+)",  # learning module
+            r"mcst/(?P<id>\d+)",  # mediacast
+            r"pg/(?P<id>(\d|_)+)",  # page?
+            r"svy/(?P<id>\d+)",  # survey
+            r"sess/(?P<id>\d+)",  # session
+            r"webr/(?P<id>\d+)",  # web referene (link)
             r"thr_pk=(?P<id>\d+)",  # forums
             r"ref_id=(?P<id>\d+)",
             r"target=[a-z]+_(?P<id>\d+)",
@@ -139,18 +357,28 @@ class IliasLearningModulePage:
     previous_url: Optional[str]
 
 
+class IliasSoup:
+    soup: BeautifulSoup
+    page_url: str
+
+    def __init__(self, soup: BeautifulSoup, page_url: str):
+        self.soup = soup
+        self.page_url = page_url
+
+
 class IliasPage:
 
-    def __init__(self, soup: BeautifulSoup, _page_url: str, source_element: Optional[IliasPageElement]):
-        self._soup = soup
-        self._page_url = _page_url
+    def __init__(self, ilias_soup: IliasSoup, source_element: Optional[IliasPageElement]):
+        self._ilias_soup = ilias_soup
+        self._soup = ilias_soup.soup
+        self._page_url = ilias_soup.page_url
         self._page_type = source_element.type if source_element else None
         self._source_name = source_element.name if source_element else ""
 
     @staticmethod
-    def is_root_page(soup: BeautifulSoup) -> bool:
+    def is_root_page(soup: IliasSoup) -> bool:
         if permalink := IliasPage.get_soup_permalink(soup):
-            return "goto.php?target=root_" in permalink
+            return "goto.php/root/" in permalink
         return False
 
     def get_child_elements(self) -> list[IliasPageElement]:
@@ -193,7 +421,10 @@ class IliasPage:
 
     def get_description(self) -> Optional[BeautifulSoup]:
         def is_interesting_class(name: str) -> bool:
-            return name in ["ilCOPageSection", "ilc_Paragraph", "ilc_va_ihcap_VAccordIHeadCap"]
+            return name in [
+                "ilCOPageSection", "ilc_Paragraph", "ilc_va_ihcap_VAccordIHeadCap",
+                "ilc_va_ihcap_AccordIHeadCap", "ilc_media_cont_MediaContainer"
+            ]
 
         paragraphs: list[Tag] = cast(list[Tag], self._soup.find_all(class_=is_interesting_class))
         if not paragraphs:
@@ -206,6 +437,21 @@ class IliasPage:
         for p in paragraphs:
             if p.find_parent(class_=is_interesting_class):
                 continue
+            if "ilc_media_cont_MediaContainer" in p["class"]:
+                # We have an embedded video which should be downloaded by _find_mob_videos
+                if video := p.select_one("video"):
+                    url, title = self._find_mob_video_url_title(video, p)
+                    raw_html += '<div style="min-width: 100px; min-height: 100px; border: 1px solid black;'
+                    raw_html += 'display: flex; justify-content: center; align-items: center;'
+                    raw_html += ' margin: 0.5rem;">'
+                    if url is not None and urlparse(url).hostname != urlparse(self._page_url).hostname:
+                        if url.startswith("//"):
+                            url = "https:" + url
+                        raw_html += f'<a href="{url}" target="_blank">External Video: {title}</a>'
+                    else:
+                        raw_html += f"Video elided. Filename: '{title}'."
+                    raw_html += "</div>\n"
+                    continue
 
             # Ignore special listings (like folder groupings)
             if "ilc_section_Special" in p["class"]:
@@ -244,79 +490,31 @@ class IliasPage:
             return url
         return None
 
-    def get_forum_entries(self) -> list[IliasPageElement]:
-        form = self._get_forum_form()
-        if not form:
-            return []
-        threads = []
-
-        for row in cast(list[Tag], form.select("table > tbody > tr")):
-            url_tag = cast(
-                Optional[Tag],
-                row.find(name="a", attrs={"href": lambda x: x is not None and "cmd=viewthread" in x.lower()})
-            )
-            if url_tag is None:
-                log.explain(f"Skipping row without URL: {row}")
-                continue
-            name = url_tag.get_text().strip()
-            columns = [td.get_text().strip() for td in cast(list[Tag], row.find_all(name="td"))]
-            potential_dates_opt = [IliasPage._find_date_in_text(column) for column in columns]
-            potential_dates = [x for x in potential_dates_opt if x is not None]
-            mtime = max(potential_dates) if potential_dates else None
-
-            threads.append(IliasPageElement.create_new(
-                IliasElementType.FORUM_THREAD,
-                self._abs_url_from_link(url_tag),
-                name,
-                mtime=mtime
-            ))
-
-        return threads
-
-    def get_download_forum_data(self, thread_ids: list[str]) -> Optional[IliasDownloadForumData]:
-        form = cast(Optional[Tag], self._soup.find(
-            "form",
-            attrs={"action": lambda x: x is not None and "fallbackCmd=showThreads" in x}
-        ))
-        if not form:
+    def get_forum_export_url(self) -> Optional[str]:
+        forum_link = self._soup.select_one("#tab_forums_threads > a")
+        if not forum_link:
+            log.explain("Found no forum link")
             return None
-        post_url = self._abs_url_from_relative(cast(str, form["action"]))
 
-        log.explain(f"Fetching forum threads {thread_ids}")
+        base_url = self._abs_url_from_link(forum_link)
+        base_url = re.sub(r"cmd=\w+", "cmd=post", base_url)
+        base_url = re.sub(r"cmdClass=\w+", "cmdClass=ilExportGUI", base_url)
 
-        form_data: Dict[str, Union[str, list[str]]] = {
-            "thread_ids[]": cast(list[str], thread_ids),
-            "selected_cmd2": "html",
-            "select_cmd2": "Ausführen",
-            "selected_cmd": "",
-        }
+        rtoken_form = cast(
+            Optional[Tag],
+            self._soup.find("form", attrs={"action": lambda x: x is not None and "rtoken=" in x})
+        )
+        if not rtoken_form:
+            log.explain("Found no rtoken anywhere")
+            return None
+        match = cast(re.Match[str], re.search(r"rtoken=(\w+)", str(rtoken_form.attrs["action"])))
+        rtoken = match.group(1)
 
-        return IliasDownloadForumData(url=post_url, form_data=form_data, empty=len(thread_ids) == 0)
+        base_url = base_url + "&rtoken=" + rtoken
 
-    def _get_forum_form(self) -> Optional[Tag]:
-        return cast(Optional[Tag], self._soup.find(
-            "form",
-            attrs={"action": lambda x: x is not None and "fallbackCmd=showThreads" in x}
-        ))
+        return base_url
 
     def get_next_stage_element(self) -> Optional[IliasPageElement]:
-        if self._is_forum_page():
-            if "trows=" in self._page_url:
-                log.explain("Manual row override detected, accepting it as good")
-                return None
-            log.explain("Requesting *all* forum threads")
-            thread_count = self._get_forum_thread_count()
-            if thread_count is not None and thread_count > 400:
-                log.warn(
-                    "Forum has more than 400 threads, fetching all threads will take a while. "
-                    "You might need to adjust your http_timeout config option."
-                )
-
-            # Fetch at least 400 in case we detect it wrong
-            if thread_count is not None and thread_count < 400:
-                thread_count = 400
-
-            return self._get_show_max_forum_entries_per_page_url(thread_count)
         if self._is_ilias_opencast_embedding():
             log.explain("Unwrapping opencast embedding")
             return self.get_child_elements()[0]
@@ -334,11 +532,6 @@ class IliasPage:
                 log.explain("Crawling info tab, skipping content select")
         return None
 
-    def _is_forum_page(self) -> bool:
-        if perma_link := self.get_permalink():
-            return "target=frm_" in perma_link
-        return False
-
     def _is_video_player(self) -> bool:
         return "paella_config_file" in str(self._soup)
 
@@ -378,7 +571,7 @@ class IliasPage:
 
     def _is_content_page(self) -> bool:
         if link := self.get_permalink():
-            return "target=copa_" in link
+            return "/copa/" in link
         return False
 
     def _is_learning_module_page(self) -> bool:
@@ -513,19 +706,17 @@ class IliasPage:
                 # Configure button/link does not have anything interesting
                 continue
 
-            type = self._find_type_from_link(name, link, url)
-            if not type:
+            typ = IliasPage._find_type_for_element(
+                name, url, lambda: IliasPage._find_icon_for_folder_entry(link)
+            )
+            if not typ:
                 _unexpected_html_warning()
                 log.warn_contd(f"Could not extract type for {link}")
                 continue
 
-            log.explain(f"Found {name!r}")
+            log.explain(f"Found {name!r} of type {typ}")
 
-            if type == IliasElementType.FILE and "_download" not in url:
-                url = re.sub(r"(target=file_\d+)", r"\1_download", url)
-                log.explain("Rewired file URL to include download part")
-
-            items.append(IliasPageElement.create_new(type, url, name))
+            items.append(IliasPageElement.create_new(typ, url, name))
 
         return items
 
@@ -786,15 +977,17 @@ class IliasPage:
         for link in links:
             abs_url = self._abs_url_from_link(link)
             # Make sure parents are sanitized. We do not want accidental parents
-            parents = [_sanitize_path_name(x) for x in self._find_upwards_folder_hierarchy(link)]
+            parents = [_sanitize_path_name(x) for x in IliasPage._find_upwards_folder_hierarchy(link)]
 
             if parents:
                 element_name = "/".join(parents) + "/" + _sanitize_path_name(link.get_text())
             else:
                 element_name = _sanitize_path_name(link.get_text())
 
-            element_type = self._find_type_from_link(element_name, link, abs_url)
-            description = self._find_link_description(link)
+            element_type = IliasPage._find_type_for_element(
+                element_name, abs_url, lambda: IliasPage._find_icon_for_folder_entry(link)
+            )
+            description = IliasPage._find_link_description(link)
 
             # The last meeting on every page is expanded by default.
             # Its content is then shown inline *and* in the meeting page itself.
@@ -805,10 +998,10 @@ class IliasPage:
             if not element_type:
                 continue
             elif element_type == IliasElementType.FILE:
-                result.append(self._file_to_element(element_name, abs_url, link))
+                result.append(IliasPage._file_to_element(element_name, abs_url, link))
                 continue
 
-            log.explain(f"Found {element_name!r}")
+            log.explain(f"Found {element_name!r} of type {element_type}")
             result.append(IliasPageElement.create_new(
                 element_type,
                 abs_url,
@@ -826,71 +1019,92 @@ class IliasPage:
     def _find_mediacast_videos(self) -> list[IliasPageElement]:
         videos: list[IliasPageElement] = []
 
-        for elem in cast(list[Tag], self._soup.select(".ilPlayerPreviewOverlayOuter")):
-            element_name = _sanitize_path_name(
-                cast(Tag, elem.select_one(".ilPlayerPreviewDescription")).get_text().strip()
-            )
-            if not element_name.endswith(".mp4"):
-                # just to make sure it has some kinda-alrightish ending
-                element_name = element_name + ".mp4"
-            video_element = cast(Optional[Tag], elem.find(name="video"))
-            if not video_element:
-                _unexpected_html_warning()
-                log.warn_contd(f"No <video> element found for mediacast video '{element_name}'")
-                continue
+        regex = re.compile(r"il\.VideoPlaylist\.init.+?\[(.+?)], ")
+        for script in cast(list[Tag], self._soup.find_all("script")):
+            for match in regex.finditer(script.text):
+                try:
+                    playlist = json.loads("[" + match.group(1) + "]")
+                except json.JSONDecodeError:
+                    log.warn("Could not decode playlist json")
+                    log.warn_contd(f"Playlist json: [{match.group(1)}]")
+                    continue
+                for elem in playlist:
+                    title = elem.get("title", None)
+                    description = elem.get("description", None)
+                    url = elem.get("resource", None)
+                    if title is None or description is None or url is None:
+                        log.explain(f"Mediacast json: {match.group(1)}")
+                        log.warn("Mediacast video json was not complete")
+                    if title is None:
+                        log.warn_contd("Missing title")
+                    if description is None:
+                        log.warn_contd("Missing description")
+                    if url is None:
+                        log.warn_contd("Missing URL")
 
-            videos.append(IliasPageElement.create_new(
-                typ=IliasElementType.MEDIACAST_VIDEO,
-                url=self._abs_url_from_relative(cast(str, video_element.get("src"))),
-                name=element_name,
-                mtime=self._find_mediacast_video_mtime(cast(Tag, elem.find_parent(name="td")))
-            ))
+                    if not title.endswith(".mp4") and not title.endswith(".webm"):
+                        # just to make sure it has some kinda-alrightish ending
+                        title = title + ".mp4"
+                    videos.append(IliasPageElement.create_new(
+                        typ=IliasElementType.MEDIACAST_VIDEO,
+                        url=self._abs_url_from_relative(cast(str, url)),
+                        name=_sanitize_path_name(title)
+                    ))
 
         return videos
 
     def _find_mob_videos(self) -> list[IliasPageElement]:
         videos: list[IliasPageElement] = []
 
-        for figure in self._soup.select("figure.ilc_media_cont_MediaContainerHighlighted"):
-            title = cast(Tag, figure.select_one("figcaption")).get_text().strip() + ".mp4"
+        selector = "figure.ilc_media_cont_MediaContainerHighlighted,figure.ilc_media_cont_MediaContainer"
+        for figure in self._soup.select(selector):
             video_element = figure.select_one("video")
             if not video_element:
-                _unexpected_html_warning()
-                log.warn_contd(f"No <video> element found for mob video '{title}'")
                 continue
 
-            url = None
-            for source in video_element.select("source"):
-                if source.get("type", "") == "video/mp4":
-                    url = cast(Optional[str], source.get("src"))
-                    break
+            url, title = self._find_mob_video_url_title(video_element, figure)
 
             if url is None:
                 _unexpected_html_warning()
                 log.warn_contd(f"No <source> element found for mob video '{title}'")
                 continue
 
+            if urlparse(url).hostname != urlparse(self._page_url).hostname:
+                log.explain(f"Found external video at {url}, ignoring")
+                continue
+
             videos.append(IliasPageElement.create_new(
                 typ=IliasElementType.MOB_VIDEO,
-                url=self._abs_url_from_relative(url),
+                url=url,
                 name=_sanitize_path_name(title),
                 mtime=None
             ))
 
         return videos
 
-    def _find_mediacast_video_mtime(self, enclosing_td: Tag) -> Optional[datetime]:
-        description_td = cast(Tag, enclosing_td.find_previous_sibling("td"))
-        if not description_td:
-            return None
+    def _find_mob_video_url_title(self, video_element: Tag, figure: Tag) -> tuple[Optional[str], str]:
+        url = None
+        for source in video_element.select("source"):
+            if source.get("type", "") == "video/mp4":
+                url = cast(Optional[str], source.get("src"))
+                break
 
-        meta_tag = cast(Optional[Tag], description_td.find_all("p")[-1])
-        if not meta_tag:
-            return None
+        if url is None and video_element.get("src"):
+            url = cast(Optional[str], video_element.get("src"))
 
-        updated_str = meta_tag.get_text().strip().replace("\n", " ")
-        updated_str = re.sub(".+?: ", "", updated_str)
-        return demangle_date(updated_str)
+        fig_caption = cast(Optional[Tag], figure.select_one("figcaption"))
+        if fig_caption:
+            title = cast(Tag, figure.select_one("figcaption")).get_text().strip() + ".mp4"
+        elif url is not None:
+            path = urlparse(self._abs_url_from_relative(url)).path
+            title = path.rsplit("/", 1)[-1]
+        else:
+            title = f"unknown video {figure}"
+
+        if url:
+            url = self._abs_url_from_relative(url)
+
+        return url, title
 
     def _is_in_expanded_meeting(self, tag: Tag) -> bool:
         """
@@ -907,12 +1121,17 @@ class IliasPage:
             # We should not crawl files under meetings
             if "ilContainerListItemContentCB" in cast(str, parent.get("class")):
                 link: Tag = parent.parent.find("a")  # type: ignore
-                type = IliasPage._find_type_from_folder_like(link, self._page_url)
-                return type == IliasElementType.MEETING
+                typ = IliasPage._find_type_for_element(
+                    "meeting",
+                    self._abs_url_from_link(link),
+                    lambda: IliasPage._find_icon_for_folder_entry(link)
+                )
+                return typ == IliasElementType.MEETING
 
         return False
 
-    def _find_upwards_folder_hierarchy(self, tag: Tag) -> list[str]:
+    @staticmethod
+    def _find_upwards_folder_hierarchy(tag: Tag) -> list[str]:
         """
         Interprets accordions and expandable blocks as virtual folders and returns them
         in order. This allows us to find a file named "Test" in an accordion "Acc" as "Acc/Test"
@@ -953,13 +1172,16 @@ class IliasPage:
         if outer_accordion_content:
             accordion_tag = cast(Tag, outer_accordion_content.parent)
             head_tag = cast(Tag, accordion_tag.find(attrs={
-                "class": lambda x: x is not None and "ilc_va_ihead_VAccordIHead" in x
+                "class": lambda x: x is not None and (
+                    "ilc_va_ihead_VAccordIHead" in x or "ilc_va_ihead_AccordIHead" in x
+                )
             }))
             found_titles.append(head_tag.get_text().strip())
 
         return [_sanitize_path_name(x) for x in reversed(found_titles)]
 
-    def _find_link_description(self, link: Tag) -> Optional[str]:
+    @staticmethod
+    def _find_link_description(link: Tag) -> Optional[str]:
         tile = cast(
             Tag,
             link.find_parent("div", {"class": lambda x: x is not None and "il_ContainerListItem" in x})
@@ -974,7 +1196,8 @@ class IliasPage:
             return None
         return description_element.get_text().strip()
 
-    def _file_to_element(self, name: str, url: str, link_element: Tag) -> IliasPageElement:
+    @staticmethod
+    def _file_to_element(name: str, url: str, link_element: Tag) -> IliasPageElement:
         # Files have a list of properties (type, modification date, size, etc.)
         # In a series of divs.
         # Find the parent containing all those divs, so we can filter our what we need
@@ -1007,27 +1230,38 @@ class IliasPage:
         for title in card_titles:
             url = self._abs_url_from_link(title)
             name = _sanitize_path_name(title.get_text().strip())
-            type = self._find_type_from_card(title)
+            typ = IliasPage._find_type_for_element(
+                name, url, lambda: IliasPage._find_icon_from_card(title)
+            )
 
-            if not type:
+            if not typ:
                 _unexpected_html_warning()
                 log.warn_contd(f"Could not extract type for {title}")
                 continue
 
-            result.append(IliasPageElement.create_new(type, url, name))
+            result.append(IliasPageElement.create_new(typ, url, name))
 
         card_button_tiles: list[Tag] = self._soup.select(".card-title button")
 
         for button in card_button_tiles:
-            regex = re.compile(button["id"] + r".*window.open\(['\"](.+?)['\"]")  # type: ignore
-            res = regex.search(str(self._soup))
-            if not res:
+            signal_regex = re.compile("#" + str(button["id"]) + r"[\s\S]*?\.trigger\('(.+?)'")
+            signal_match = signal_regex.search(str(self._soup))
+            if not signal_match:
                 _unexpected_html_warning()
-                log.warn_contd(f"Could not find click handler target for {button}")
+                log.warn_contd(f"Could not find click handler signal for {button}")
                 continue
-            url = self._abs_url_from_relative(res.group(1))
+            signal = signal_match.group(1)
+            open_regex = re.compile(r"\.on\('" + signal + r"[\s\S]*?window.open\(['\"](.+?)['\"]")
+            open_match = open_regex.search(str(self._soup))
+            if not open_match:
+                _unexpected_html_warning()
+                log.warn_contd(f"Could not find click handler target for signal {signal} for {button}")
+                continue
+            url = self._abs_url_from_relative(open_match.group(1))
             name = _sanitize_path_name(button.get_text().strip())
-            type = self._find_type_from_card(button)
+            typ = IliasPage._find_type_for_element(
+                name, url, lambda: IliasPage._find_icon_from_card(button)
+            )
             caption_parent = cast(Tag, button.find_parent(
                 "div",
                 attrs={"class": lambda x: x is not None and "caption" in x},
@@ -1038,143 +1272,59 @@ class IliasPage:
             else:
                 description = None
 
-            if not type:
+            if not typ:
                 _unexpected_html_warning()
                 log.warn_contd(f"Could not extract type for {button}")
                 continue
 
-            result.append(IliasPageElement.create_new(type, url, name, description=description))
+            result.append(IliasPageElement.create_new(typ, url, name, description=description))
 
         return result
 
-    def _find_type_from_card(self, card_title: Tag) -> Optional[IliasElementType]:
-        def is_card_root(element: Tag) -> bool:
-            return "il-card" in element["class"] and "thumbnail" in element["class"]
-
-        card_root: Optional[Tag] = None
-
-        # We look for the card root
-        for parent in card_title.parents:
-            if is_card_root(parent):
-                card_root = parent
-                break
-
-        if card_root is None:
-            _unexpected_html_warning()
-            log.warn_contd(f"Tried to figure out element type, but did not find an icon for {card_title}")
-            return None
-
-        icon = cast(Tag, card_root.select_one(".il-card-repository-head .icon"))
-
-        if "opencast" in icon["class"] or "xoct" in icon["class"]:
-            return IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED
-        if "exc" in icon["class"]:
-            return IliasElementType.EXERCISE
-        if "grp" in icon["class"]:
-            return IliasElementType.FOLDER
-        if "webr" in icon["class"]:
-            return IliasElementType.LINK
-        if "book" in icon["class"]:
-            return IliasElementType.BOOKING
-        if "crsr" in icon["class"]:
-            return IliasElementType.COURSE
-        if "frm" in icon["class"]:
-            return IliasElementType.FORUM
-        if "sess" in icon["class"]:
-            return IliasElementType.MEETING
-        if "tst" in icon["class"]:
-            return IliasElementType.TEST
-        if "fold" in icon["class"]:
-            return IliasElementType.FOLDER
-        if "copa" in icon["class"]:
-            return IliasElementType.FOLDER
-        if "svy" in icon["class"]:
-            return IliasElementType.SURVEY
-        if "file" in icon["class"]:
-            return IliasElementType.FILE
-        if "mcst" in icon["class"]:
-            return IliasElementType.MEDIACAST_VIDEO_FOLDER
-
-        _unexpected_html_warning()
-        log.warn_contd(f"Could not extract type from {icon} for card title {card_title}")
-        return None
-
     @staticmethod
-    def _find_type_from_link(
+    def _find_type_for_element(
         element_name: str,
-        link_element: Tag,
-        url: str
+        url: str,
+        icon_for_element: Callable[[], Optional[Tag]],
     ) -> Optional[IliasElementType]:
         """
         Decides which sub crawler to use for a given top level element.
         """
         parsed_url = urlparse(url)
+        icon = icon_for_element()
 
-        # file URLs contain "target=file"
-        if "target=file_" in parsed_url.query:
-            return IliasElementType.FILE
+        def try_matcher(matcher: IliasElementMatcher) -> bool:
+            match matcher:
+                case TypeMatcher.All(matchers=ms):
+                    return all(try_matcher(m) for m in ms)
+                case TypeMatcher.Any(matchers=ms):
+                    return any(try_matcher(m) for m in ms)
+                case TypeMatcher.ImgAlt(alt=alt):
+                    return icon is not None and alt in str(icon["alt"]).lower()
+                case TypeMatcher.ImgSrc(src=src):
+                    return icon is not None and src in str(icon["src"]).lower()
+                case TypeMatcher.UrlPath(path=path):
+                    return path in parsed_url.path.lower()
+                case TypeMatcher.UrlParameter(query=query):
+                    return query in parsed_url.query.lower()
 
-        if "target=grp_" in parsed_url.query:
-            return IliasElementType.FOLDER
+            raise CrawlError(f"Unknown matcher {matcher}")
 
-        if "target=crs_" in parsed_url.query:
-            return IliasElementType.FOLDER
-
-        if "baseClass=ilExerciseHandlerGUI" in parsed_url.query:
-            return IliasElementType.EXERCISE
-
-        if "baseClass=ilLinkResourceHandlerGUI" in parsed_url.query and "calldirectlink" in parsed_url.query:
-            return IliasElementType.LINK
-
-        if "cmd=showThreads" in parsed_url.query or "target=frm_" in parsed_url.query:
-            return IliasElementType.FORUM
-
-        if "cmdClass=ilobjtestgui" in parsed_url.query:
-            return IliasElementType.TEST
-
-        if "baseClass=ilLMPresentationGUI" in parsed_url.query:
-            return IliasElementType.LEARNING_MODULE
-
-        if "baseClass=ilMediaCastHandlerGUI" in parsed_url.query:
-            return IliasElementType.MEDIACAST_VIDEO_FOLDER
-
-        if "baseClass=ilSAHSPresentationGUI" in parsed_url.query:
-            return IliasElementType.SCORM_LEARNING_MODULE
-
-        # other universities might have content type specified in URL path
-        if "_file_" in parsed_url.path:
-            return IliasElementType.FILE
-
-        if "_fold_" in parsed_url.path or "_copa_" in parsed_url.path:
-            return IliasElementType.FOLDER
-
-        if "_frm_" in parsed_url.path:
-            return IliasElementType.FORUM
-
-        if "_exc_" in parsed_url.path:
-            return IliasElementType.EXERCISE
-
-        # Booking and Meeting can not be detected based on the link. They do have a ref_id though, so
-        # try to guess it from the image.
-
-        # Everything with a ref_id can *probably* be opened to reveal nested things
-        # video groups, directories, exercises, etc
-        if "ref_id=" in parsed_url.query or "goto.php" in parsed_url.path:
-            return IliasPage._find_type_from_folder_like(link_element, url)
+        for typ in IliasElementType:
+            if try_matcher(typ.matcher()):
+                return typ
 
         _unexpected_html_warning()
-        log.warn_contd(
-            f"Tried to figure out element type, but failed for {element_name!r} / {link_element!r})"
-        )
+        log.warn_contd(f"Tried to figure out element type, but failed for {element_name!r} / {url!r})")
+
+        if "ref_id=" in parsed_url.query.lower() or "goto.php" in parsed_url.path.lower():
+            log.warn_contd("Defaulting to FOLDER as it contains a ref_id/goto")
+            return IliasElementType.FOLDER
+
         return None
 
     @staticmethod
-    def _find_type_from_folder_like(link_element: Tag, url: str) -> Optional[IliasElementType]:
-        """
-        Try crawling something that looks like a folder.
-        """
-        # pylint: disable=too-many-return-statements
-
+    def _find_icon_for_folder_entry(link_element: Tag) -> Optional[Tag]:
         found_parent: Optional[Tag] = None
 
         # We look for the outer div of our inner link, to find information around it
@@ -1186,7 +1336,9 @@ class IliasPage:
 
         if found_parent is None:
             _unexpected_html_warning()
-            log.warn_contd(f"Tried to figure out element type, but did not find an icon for {url}")
+            log.warn_contd(
+                f"Tried to figure out element type, but did not find an icon for {link_element!r}"
+            )
             return None
 
         # Find the small descriptive icon to figure out the type
@@ -1203,42 +1355,35 @@ class IliasPage:
             log.explain("Found session expansion button, skipping it as it has no content")
             return None
 
-        if img_tag is None:
-            _unexpected_html_warning()
-            log.warn_contd(f"Tried to figure out element type, but did not find an image for {url}")
-            return None
+        if img_tag is not None:
+            return img_tag
 
-        if "opencast" in str(img_tag["alt"]).lower():
-            return IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED
-
-        if str(img_tag["src"]).endswith("icon_exc.svg"):
-            return IliasElementType.EXERCISE
-
-        if str(img_tag["src"]).endswith("icon_webr.svg"):
-            return IliasElementType.LINK
-
-        if str(img_tag["src"]).endswith("icon_book.svg"):
-            return IliasElementType.BOOKING
-
-        if str(img_tag["src"]).endswith("frm.svg"):
-            return IliasElementType.FORUM
-
-        if str(img_tag["src"]).endswith("sess.svg"):
-            return IliasElementType.MEETING
-
-        if str(img_tag["src"]).endswith("icon_tst.svg"):
-            return IliasElementType.TEST
-
-        if str(img_tag["src"]).endswith("icon_mcst.svg"):
-            return IliasElementType.MEDIACAST_VIDEO_FOLDER
-
-        if str(img_tag["src"]).endswith("icon_sahs.svg"):
-            return IliasElementType.SCORM_LEARNING_MODULE
-
-        return IliasElementType.FOLDER
+        log.explain(f"Tried to figure out element type, but did not find an image for {link_element!r}")
+        return None
 
     @staticmethod
-    def is_logged_in(soup: BeautifulSoup) -> bool:
+    def _find_icon_from_card(card_title: Tag) -> Optional[Tag]:
+        def is_card_root(element: Tag) -> bool:
+            return "il-card" in element["class"] and "thumbnail" in element["class"]
+
+        card_root: Optional[Tag] = None
+
+        # We look for the card root
+        for parent in card_title.parents:
+            if is_card_root(parent):
+                card_root = parent
+                break
+
+        if card_root is None:
+            _unexpected_html_warning()
+            log.warn_contd(f"Tried to figure out element type, but did not find an icon for {card_title}")
+            return None
+
+        return cast(Tag, card_root.select_one(".il-card-repository-head .icon"))
+
+    @staticmethod
+    def is_logged_in(ilias_soup: IliasSoup) -> bool:
+        soup = ilias_soup.soup
         # Normal ILIAS pages
         mainbar = cast(Optional[Tag], soup.find(class_="il-maincontrols-metabar"))
         if mainbar is not None:
@@ -1285,7 +1430,7 @@ class IliasPage:
         return None
 
     def get_permalink(self) -> Optional[str]:
-        return IliasPage.get_soup_permalink(self._soup)
+        return IliasPage.get_soup_permalink(self._ilias_soup)
 
     def _abs_url_from_link(self, link_tag: Tag) -> str:
         """
@@ -1300,11 +1445,15 @@ class IliasPage:
         return urljoin(self._page_url, relative_url)
 
     @staticmethod
-    def get_soup_permalink(soup: BeautifulSoup) -> Optional[str]:
-        perma_link_element = cast(Tag, soup.select_one(".il-footer-permanent-url > a"))
-        if not perma_link_element or not perma_link_element.get("href"):
-            return None
-        return cast(Optional[str], perma_link_element.get("href"))
+    def get_soup_permalink(ilias_soup: IliasSoup) -> Optional[str]:
+        scripts = cast(list[Tag], ilias_soup.soup.find_all("script"))
+        pattern = re.compile(r"il\.Footer\.permalink\.copyText\(\"(.+?)\"\)")
+        for script in scripts:
+            if match := pattern.search(script.text):
+                url = match.group(1)
+                url = url.replace(r"\/", "/")
+                return url
+        return None
 
 
 def _unexpected_html_warning() -> None:

From b97b6fae6b3b1563609db393850b99261c34bc5b Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 14 Apr 2025 21:13:25 +0200
Subject: [PATCH 494/524] Update minimum Python version to 3.11

---
 .github/workflows/build-and-release.yml |  2 +-
 PFERD/crawl/crawler.py                  |  4 +---
 PFERD/logging.py                        |  9 ++++-----
 PFERD/report.py                         | 11 +----------
 README.md                               |  2 +-
 flake.lock                              |  8 ++++----
 flake.nix                               |  2 +-
 pyproject.toml                          |  2 +-
 8 files changed, 14 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/build-and-release.yml b/.github/workflows/build-and-release.yml
index 1f60c59..0117222 100644
--- a/.github/workflows/build-and-release.yml
+++ b/.github/workflows/build-and-release.yml
@@ -14,7 +14,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, windows-latest, macos-13, macos-latest]
-        python: ["3.9"]
+        python: ["3.11"]
     steps:
       - uses: actions/checkout@v4
 
diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index 74616e0..7ef5fe4 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -149,9 +149,7 @@ class CrawlerSection(Section):
         return self.s.getboolean("skip", fallback=False)
 
     def output_dir(self, name: str) -> Path:
-        # TODO Use removeprefix() after switching to 3.9
-        if name.startswith("crawl:"):
-            name = name[len("crawl:"):]
+        name = name.removeprefix("crawl:")
         return Path(self.s.get("output_dir", name)).expanduser()
 
     def redownload(self) -> Redownload:
diff --git a/PFERD/logging.py b/PFERD/logging.py
index b958fb2..c19e4a0 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -1,9 +1,8 @@
 import asyncio
 import sys
 import traceback
-from contextlib import asynccontextmanager, contextmanager
-# TODO In Python 3.9 and above, ContextManager is deprecated
-from typing import AsyncIterator, ContextManager, Iterator, List, Optional
+from contextlib import AbstractContextManager, asynccontextmanager, contextmanager
+from typing import AsyncIterator, Iterator, List, Optional
 
 from rich.console import Console, Group
 from rich.live import Live
@@ -261,7 +260,7 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
             action: str,
             text: str,
             total: Optional[float] = None,
-    ) -> ContextManager[ProgressBar]:
+    ) -> AbstractContextManager[ProgressBar]:
         """
         Allows markup in the "style" argument which will be applied to the
         "action" string.
@@ -277,7 +276,7 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
             action: str,
             text: str,
             total: Optional[float] = None,
-    ) -> ContextManager[ProgressBar]:
+    ) -> AbstractContextManager[ProgressBar]:
         """
         Allows markup in the "style" argument which will be applied to the
         "action" string.
diff --git a/PFERD/report.py b/PFERD/report.py
index 0eaaca9..72e2727 100644
--- a/PFERD/report.py
+++ b/PFERD/report.py
@@ -34,15 +34,6 @@ class MarkConflictError(Exception):
         self.collides_with = collides_with
 
 
-# TODO Use PurePath.is_relative_to when updating to 3.9
-def is_relative_to(a: PurePath, b: PurePath) -> bool:
-    try:
-        a.relative_to(b)
-        return True
-    except ValueError:
-        return False
-
-
 class Report:
     """
     A report of a synchronization. Includes all files found by the crawler, as
@@ -173,7 +164,7 @@ class Report:
             if path == other:
                 raise MarkDuplicateError(path)
 
-            if is_relative_to(path, other) or is_relative_to(other, path):
+            if path.is_relative_to(other) or other.is_relative_to(path):
                 raise MarkConflictError(path, other)
 
         self.known_files.add(path)
diff --git a/README.md b/README.md
index d5d7980..c96fea0 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@ Binaries for Linux, Windows and Mac can be downloaded directly from the
 
 ### With pip
 
-Ensure you have at least Python 3.9 installed. Run the following command to
+Ensure you have at least Python 3.11 installed. Run the following command to
 install PFERD or upgrade it to the latest version:
 
 ```
diff --git a/flake.lock b/flake.lock
index 6428667..d9326af 100644
--- a/flake.lock
+++ b/flake.lock
@@ -2,16 +2,16 @@
   "nodes": {
     "nixpkgs": {
       "locked": {
-        "lastModified": 1708979614,
-        "narHash": "sha256-FWLWmYojIg6TeqxSnHkKpHu5SGnFP5um1uUjH+wRV6g=",
+        "lastModified": 1744440957,
+        "narHash": "sha256-FHlSkNqFmPxPJvy+6fNLaNeWnF1lZSgqVCl/eWaJRc4=",
         "owner": "NixOS",
         "repo": "nixpkgs",
-        "rev": "b7ee09cf5614b02d289cd86fcfa6f24d4e078c2a",
+        "rev": "26d499fc9f1d567283d5d56fcf367edd815dba1d",
         "type": "github"
       },
       "original": {
         "owner": "NixOS",
-        "ref": "nixos-23.11",
+        "ref": "nixos-24.11",
         "repo": "nixpkgs",
         "type": "github"
       }
diff --git a/flake.nix b/flake.nix
index 4fc47b2..c8dbe0c 100644
--- a/flake.nix
+++ b/flake.nix
@@ -2,7 +2,7 @@
   description = "Tool for downloading course-related files from ILIAS";
 
   inputs = {
-    nixpkgs.url = "github:NixOS/nixpkgs/nixos-23.11";
+    nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11";
   };
 
   outputs = { self, nixpkgs }:
diff --git a/pyproject.toml b/pyproject.toml
index bc67e1c..e22fe85 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,7 +12,7 @@ dependencies = [
   "certifi>=2021.10.8"
 ]
 dynamic = ["version"]
-requires-python = ">=3.9"
+requires-python = ">=3.11"
 
 [project.scripts]
 pferd = "PFERD.__main__:main"

From 3f60638d335e4c65e4eda434f2d4f72731773066 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 16 Apr 2025 00:47:05 +0200
Subject: [PATCH 495/524] Bump version to 3.8.0

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0a26913..f3854f2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.8.0 - 2025-04-16
+
 ### Added
 - Support for ILIAS 9
 
diff --git a/PFERD/version.py b/PFERD/version.py
index 21118d3..77c0c6c 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.7.0"
+VERSION = "3.8.0"

From 653bf139f0055536e5c7c59fe138082d49be6ed3 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 16 Apr 2025 10:45:06 +0200
Subject: [PATCH 496/524] Fix encoding of descriptions and force images to
 light mode

---
 CHANGELOG.md                            |  4 ++++
 PFERD/crawl/ilias/ilias_html_cleaner.py | 11 +++++++----
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f3854f2..4dbd832 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,10 @@ ambiguous situations.
 
 ## Unreleased
 
+## Fixed
+- Description html files now specify at UTF-8 encoding
+- Images in descriptions now always have a white background
+
 ## 3.8.0 - 2025-04-16
 
 ### Added
diff --git a/PFERD/crawl/ilias/ilias_html_cleaner.py b/PFERD/crawl/ilias/ilias_html_cleaner.py
index fb35bc0..35a7ea0 100644
--- a/PFERD/crawl/ilias/ilias_html_cleaner.py
+++ b/PFERD/crawl/ilias/ilias_html_cleaner.py
@@ -39,6 +39,10 @@ _STYLE_TAG_CONTENT = """
       margin: 0.5rem 0;
     }
 
+    img {
+        background-color: white;
+    }
+
     body {
       padding: 1em;
       grid-template-columns: 1fr min(60rem, 90%) 1fr;
@@ -56,12 +60,11 @@ _ARTICLE_WORTHY_CLASSES = [
 def insert_base_markup(soup: BeautifulSoup) -> BeautifulSoup:
     head = soup.new_tag("head")
     soup.insert(0, head)
+    # Force UTF-8 encoding
+    head.append(soup.new_tag("meta", charset="utf-8"))
 
-    simplecss_link: Tag = soup.new_tag("link")
     # <link rel="stylesheet" href="https://cdn.simplecss.org/simple.css">
-    simplecss_link["rel"] = "stylesheet"
-    simplecss_link["href"] = "https://cdn.simplecss.org/simple.css"
-    head.append(simplecss_link)
+    head.append(soup.new_tag("link", rel="stylesheet", href="https://cdn.simplecss.org/simple.css"))
 
     # Basic style tags for compat
     style: Tag = soup.new_tag("style")

From 77fce7daf85101719ef4385ba583dd0aeff35a35 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 17 Apr 2025 11:22:35 +0200
Subject: [PATCH 497/524] Bump version to 3.8.1

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4dbd832..af5bcfb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.8.1 - 2025-04-17
+
 ## Fixed
 - Description html files now specify at UTF-8 encoding
 - Images in descriptions now always have a white background
diff --git a/PFERD/version.py b/PFERD/version.py
index 77c0c6c..d67e528 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.8.0"
+VERSION = "3.8.1"

From bdf17f5c870a51a8bfe7a2072ab17b6c1e66d11c Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 23 Apr 2025 16:03:37 +0200
Subject: [PATCH 498/524] Ignore wikis

---
 CHANGELOG.md                           | 3 +++
 PFERD/crawl/ilias/ilias_web_crawler.py | 8 ++++++++
 PFERD/crawl/ilias/kit_ilias_html.py    | 6 ++++++
 3 files changed, 17 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index af5bcfb..2bc00b6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+## Changed
+- Explicitly mention that wikis are not supported at the moment and ignore them
+
 ## 3.8.1 - 2025-04-17
 
 ## Fixed
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 52ecf92..8ba959a 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -424,6 +424,14 @@ instance's greatest bottleneck.
                 "[bright_black](not descending into linked course)"
             )
             return None
+        elif element.type == IliasElementType.WIKI:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](wikis are not currently supported)"
+            )
+            return None
         elif element.type == IliasElementType.LEARNING_MODULE:
             return await self._handle_learning_module(element, element_path)
         elif element.type == IliasElementType.LINK:
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 5ea17d6..6d3e487 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -120,6 +120,7 @@ class IliasElementType(Enum):
     SCORM_LEARNING_MODULE = "scorm_learning_module"
     SURVEY = "survey"
     TEST = "test"  # an online test. Will be ignored currently.
+    WIKI = "wiki"
 
     def matcher(self) -> IliasElementMatcher:
         match self:
@@ -243,6 +244,11 @@ class IliasElementType(Enum):
                     TypeMatcher.query("cmdclass=iltestscreengui"),
                     TypeMatcher.img_src("_tst.svg")
                 )
+            case IliasElementType.WIKI:
+                return TypeMatcher.any(
+                    TypeMatcher.query("baseClass=ilwikihandlergui"),
+                    TypeMatcher.img_src("wiki.svg")
+                )
 
         raise CrawlWarning(f"Unknown matcher {self}")
 

From b305e1ce2337399b233daf3c881e43308ce065f3 Mon Sep 17 00:00:00 2001
From: Nikolas Heise <nikolas.heise@uni-konstanz.de>
Date: Tue, 22 Apr 2025 13:30:32 +0200
Subject: [PATCH 499/524] Fix login using the native ilias login form

---
 CHANGELOG.md                           |  3 +++
 PFERD/crawl/ilias/ilias_web_crawler.py | 12 +++++-------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2bc00b6..0e8dc10 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,9 @@ ambiguous situations.
 ## Changed
 - Explicitly mention that wikis are not supported at the moment and ignore them
 
+## Fixed
+- Ilias-native login
+
 ## 3.8.1 - 2025-04-17
 
 ## Fixed
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 8ba959a..48396f9 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -1039,7 +1039,7 @@ instance's greatest bottleneck.
             async with self.session.get(urljoin(self._base_url, "/login.php"), params=params) as request:
                 login_page = soupify(await request.read())
 
-            login_form = cast(Optional[Tag], login_page.find("form", attrs={"name": "formlogin"}))
+            login_form = cast(Optional[Tag], login_page.find("form", attrs={"name": "login_form"}))
             if login_form is None:
                 raise CrawlError("Could not find the login form! Specified client id might be invalid.")
 
@@ -1049,14 +1049,12 @@ instance's greatest bottleneck.
 
             username, password = await self._auth.credentials()
 
-            login_data = {
-                "username": username,
-                "password": password,
-                "cmd[doStandardAuthentication]": "Login",
-            }
+            login_form_data = aiohttp.FormData()
+            login_form_data.add_field('login_form/input_3/input_4', username)
+            login_form_data.add_field('login_form/input_3/input_5', password)
 
             # do the actual login
-            async with self.session.post(urljoin(self._base_url, login_url), data=login_data) as request:
+            async with self.session.post(urljoin(self._base_url, login_url), data=login_form_data) as request:
                 soup = IliasSoup(soupify(await request.read()), str(request.url))
                 if not IliasPage.is_logged_in(soup):
                     self._auth.invalidate_credentials()

From 4c230ef6dd216e5fcadc17388e1c17d8a2ee4619 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Fri, 25 Apr 2025 13:45:57 +0200
Subject: [PATCH 500/524] Fix exercise crawling

---
 CHANGELOG.md                           |   1 +
 PFERD/crawl/ilias/ilias_web_crawler.py |   1 +
 PFERD/crawl/ilias/kit_ilias_html.py    | 158 +++++++++++++++----------
 3 files changed, 95 insertions(+), 65 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0e8dc10..e7273a0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,7 @@ ambiguous situations.
 
 ## Fixed
 - Ilias-native login
+- Exercise crawling
 
 ## 3.8.1 - 2025-04-17
 
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 48396f9..3b78e5d 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -107,6 +107,7 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
 _DIRECTORY_PAGES: Set[IliasElementType] = {
     IliasElementType.EXERCISE,
     IliasElementType.EXERCISE_FILES,
+    IliasElementType.EXERCISE_OVERVIEW,
     IliasElementType.FOLDER,
     IliasElementType.INFO_TAB,
     IliasElementType.MEDIACAST_VIDEO_FOLDER,
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 6d3e487..0a09ecc 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -97,7 +97,8 @@ class IliasElementType(Enum):
     BOOKING = "booking"
     COURSE = "course"
     DCL_RECORD_LIST = "dcl_record_list"
-    EXERCISE = "exercise"
+    EXERCISE_OVERVIEW = "exercise_overview"
+    EXERCISE = "exercise"  # own submitted files
     EXERCISE_FILES = "exercise_files"  # own submitted files
     FILE = "file"
     FOLDER = "folder"
@@ -141,13 +142,15 @@ class IliasElementType(Enum):
                     TypeMatcher.query("cmdclass=ildclrecordlistgui")
                 )
             case IliasElementType.EXERCISE:
+                return TypeMatcher.never()
+            case IliasElementType.EXERCISE_FILES:
+                return TypeMatcher.never()
+            case IliasElementType.EXERCISE_OVERVIEW:
                 return TypeMatcher.any(
                     TypeMatcher.path("/exc/"),
                     TypeMatcher.path("_exc_"),
                     TypeMatcher.img_src("_exc.svg"),
                 )
-            case IliasElementType.EXERCISE_FILES:
-                return TypeMatcher.never()
             case IliasElementType.FILE:
                 return TypeMatcher.any(
                     TypeMatcher.query("cmd=sendfile"),
@@ -530,6 +533,8 @@ class IliasPage:
         if self._contains_collapsed_future_meetings():
             log.explain("Requesting *all* future meetings")
             return self._uncollapse_future_meetings_url()
+        if self._is_exercise_not_all_shown():
+            return self._show_all_exercises()
         if not self._is_content_tab_selected():
             if self._page_type != IliasElementType.INFO_TAB:
                 log.explain("Selecting content tab")
@@ -561,7 +566,7 @@ class IliasPage:
 
     def _is_exercise_file(self) -> bool:
         # we know it from before
-        if self._page_type == IliasElementType.EXERCISE:
+        if self._page_type == IliasElementType.EXERCISE_OVERVIEW:
             return True
 
         # We have no suitable parent - let's guesss
@@ -598,6 +603,17 @@ class IliasPage:
         link = self._abs_url_from_link(element)
         return IliasPageElement.create_new(IliasElementType.FOLDER, link, "show all meetings")
 
+    def _is_exercise_not_all_shown(self) -> bool:
+        return (self._page_type == IliasElementType.EXERCISE_OVERVIEW
+                and "mode=all" not in self._page_url.lower())
+
+    def _show_all_exercises(self) -> Optional[IliasPageElement]:
+        return IliasPageElement.create_new(
+            IliasElementType.EXERCISE_OVERVIEW,
+            self._page_url + "&mode=all",
+            "show all exercises"
+        )
+
     def _is_content_tab_selected(self) -> bool:
         return self._select_content_page_url() is None
 
@@ -863,15 +879,62 @@ class IliasPage:
 
     def _find_exercise_entries(self) -> list[IliasPageElement]:
         if self._soup.find(id="tab_submission"):
-            log.explain("Found submission tab. This is an exercise detail page")
-            return self._find_exercise_entries_detail_page()
+            log.explain("Found submission tab. This is an exercise detail or files page")
+            if self._soup.select_one("#tab_submission.active") is None:
+                log.explain("  This is a details page")
+                return self._find_exercise_entries_detail_page()
+            else:
+                log.explain("  This is a files page")
+                return self._find_exercise_entries_files_page()
+
         log.explain("Found no submission tab. This is an exercise root page")
         return self._find_exercise_entries_root_page()
 
     def _find_exercise_entries_detail_page(self) -> list[IliasPageElement]:
         results: list[IliasPageElement] = []
 
-        # Find all download links in the container (this will contain all the files)
+        if link := cast(Optional[Tag], self._soup.select_one("#tab_submission > a")):
+            results.append(IliasPageElement.create_new(
+                IliasElementType.EXERCISE_FILES,
+                self._abs_url_from_link(link),
+                "Submission"
+            ))
+        else:
+            log.explain("Found no submission link for exercise, maybe it has not started yet?")
+
+        # Find all download links in the container (this will contain all the *feedback* files)
+        download_links = cast(list[Tag], self._soup.find_all(
+            name="a",
+            # download links contain the given command class
+            attrs={"href": lambda x: x is not None and "cmd=download" in x},
+            text="Download"
+        ))
+
+        for link in download_links:
+            parent_row: Tag = cast(Tag, link.find_parent(
+                attrs={"class": lambda x: x is not None and "row" in x}))
+            name_tag = cast(Optional[Tag], parent_row.find(name="div"))
+
+            if not name_tag:
+                log.warn("Could not find name tag for exercise entry")
+                _unexpected_html_warning()
+                continue
+
+            name = _sanitize_path_name(name_tag.get_text().strip())
+            log.explain(f"Found exercise detail entry {name!r}")
+
+            results.append(IliasPageElement.create_new(
+                IliasElementType.FILE,
+                self._abs_url_from_link(link),
+                name
+            ))
+
+        return results
+
+    def _find_exercise_entries_files_page(self) -> list[IliasPageElement]:
+        results: list[IliasPageElement] = []
+
+        # Find all download links in the container
         download_links = cast(list[Tag], self._soup.find_all(
             name="a",
             # download links contain the given command class
@@ -884,7 +947,7 @@ class IliasPage:
             children = cast(list[Tag], parent_row.find_all("td"))
 
             name = _sanitize_path_name(children[1].get_text().strip())
-            log.explain(f"Found exercise detail entry {name!r}")
+            log.explain(f"Found exercise file entry {name!r}")
 
             date = None
             for child in reversed(children):
@@ -892,7 +955,7 @@ class IliasPage:
                 if date is not None:
                     break
             if date is None:
-                log.warn(f"Date parsing failed for exercise entry {name!r}")
+                log.warn(f"Date parsing failed for exercise file entry {name!r}")
 
             results.append(IliasPageElement.create_new(
                 IliasElementType.FILE,
@@ -906,66 +969,31 @@ class IliasPage:
     def _find_exercise_entries_root_page(self) -> list[IliasPageElement]:
         results: list[IliasPageElement] = []
 
-        # Each assignment is in an accordion container
-        assignment_containers: list[Tag] = self._soup.select(".il_VAccordionInnerContainer")
+        content_tab = cast(Optional[Tag], self._soup.find(id="ilContentContainer"))
+        if not content_tab:
+            log.warn("Could not find content tab in exercise overview page")
+            _unexpected_html_warning()
+            return []
 
-        for container in assignment_containers:
-            # Fetch the container name out of the header to use it in the path
-            container_name = cast(Tag, container.select_one(".ilAssignmentHeader")).get_text().strip()
-            log.explain(f"Found exercise container {container_name!r}")
+        individual_exercises = content_tab.find_all(
+            name="a",
+            attrs={
+                "href": lambda x: x is not None
+                and "ass_id=" in x
+                and "cmdClass=ilAssignmentPresentationGUI" in x
+            }
+        )
 
-            # Find all download links in the container (this will contain all the files)
-            files = cast(list[Tag], container.find_all(
-                name="a",
-                # download links contain the given command class
-                attrs={"href": lambda x: x is not None and "cmdClass=ilexsubmissiongui" in x},
-                text="Download"
+        for exercise in cast(list[Tag], individual_exercises):
+            name = _sanitize_path_name(exercise.get_text().strip())
+            results.append(IliasPageElement.create_new(
+                IliasElementType.EXERCISE,
+                self._abs_url_from_link(exercise),
+                name
             ))
 
-            # Grab each file as you now have the link
-            for file_link in files:
-                # Two divs, side by side. Left is the name, right is the link ==> get left
-                # sibling
-                file_name = cast(
-                    Tag,
-                    cast(Tag, file_link.parent).find_previous(name="div")
-                ).get_text().strip()
-                url = self._abs_url_from_link(file_link)
-
-                log.explain(f"Found exercise entry {file_name!r}")
-                results.append(IliasPageElement.create_new(
-                    IliasElementType.FILE,
-                    url,
-                    _sanitize_path_name(container_name) + "/" + _sanitize_path_name(file_name),
-                    mtime=None,  # We do not have any timestamp
-                    skip_sanitize=True
-                ))
-
-            # Find all links to file listings (e.g. "Submitted Files" for groups)
-            file_listings = cast(list[Tag], container.find_all(
-                name="a",
-                # download links contain the given command class
-                attrs={"href": lambda x: x is not None and "cmdclass=ilexsubmissionfilegui" in x.lower()}
-            ))
-
-            # Add each listing as a new
-            for listing in file_listings:
-                parent_container = cast(Tag, listing.find_parent(
-                    "div", attrs={"class": lambda x: x is not None and "form-group" in x}
-                ))
-                label_container = cast(Tag, parent_container.find(
-                    attrs={"class": lambda x: x is not None and "control-label" in x}
-                ))
-                file_name = label_container.get_text().strip()
-                url = self._abs_url_from_link(listing)
-                log.explain(f"Found exercise detail {file_name!r} at {url}")
-                results.append(IliasPageElement.create_new(
-                    IliasElementType.EXERCISE_FILES,
-                    url,
-                    _sanitize_path_name(container_name) + "/" + _sanitize_path_name(file_name),
-                    None,  # we do not have any timestamp
-                    skip_sanitize=True
-                ))
+        for result in results:
+            log.explain(f"Found exercise {result.name!r}")
 
         return results
 

From 77a23265a9c7433012fab4d893ae96ed294207f4 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 29 Apr 2025 17:55:57 +0200
Subject: [PATCH 501/524] Bump version to 3.8.2

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e7273a0..20a39b0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.8.2 - 2025-04-29
+
 ## Changed
 - Explicitly mention that wikis are not supported at the moment and ignore them
 
diff --git a/PFERD/version.py b/PFERD/version.py
index d67e528..12c568a 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.8.1"
+VERSION = "3.8.2"

From 8caad0008d049d5676a6c40f2e77110e106a6291 Mon Sep 17 00:00:00 2001
From: PinieP <59698589+PinieP@users.noreply.github.com>
Date: Mon, 5 May 2025 22:05:54 +0200
Subject: [PATCH 502/524] Fix check for nonexistent `ilias_url` command
 attribute to `base_url` (#113)

---
 PFERD/cli/command_ilias_web.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/PFERD/cli/command_ilias_web.py b/PFERD/cli/command_ilias_web.py
index 77a1657..5efec20 100644
--- a/PFERD/cli/command_ilias_web.py
+++ b/PFERD/cli/command_ilias_web.py
@@ -45,8 +45,8 @@ def load(
     load_crawler(args, section)
 
     section["type"] = COMMAND_NAME
-    if args.ilias_url is not None:
-        section["base_url"] = args.ilias_url
+    if args.base_url is not None:
+        section["base_url"] = args.base_url
     if args.client_id is not None:
         section["client_id"] = args.client_id
 

From 2b0d20a1f626292f310ffa21dc7a2683ae6b9066 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 29 Apr 2025 18:30:33 +0200
Subject: [PATCH 503/524] Fix crawling of exercises with instructions

We do not want a second path and the instruction field has an identical
link...
---
 CHANGELOG.md                        |  3 +++
 PFERD/crawl/ilias/kit_ilias_html.py | 19 ++++++++++---------
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 20a39b0..de29b58 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+## Fixed
+- Crawling of exercises with instructions
+
 ## 3.8.2 - 2025-04-29
 
 ## Changed
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 0a09ecc..105c606 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -975,16 +975,17 @@ class IliasPage:
             _unexpected_html_warning()
             return []
 
-        individual_exercises = content_tab.find_all(
-            name="a",
-            attrs={
-                "href": lambda x: x is not None
-                and "ass_id=" in x
-                and "cmdClass=ilAssignmentPresentationGUI" in x
-            }
-        )
+        exercise_links = content_tab.select(".il-item-title a")
+
+        for exercise in cast(list[Tag], exercise_links):
+            if "href" not in exercise.attrs:
+                continue
+            href = exercise.attrs["href"]
+            if type(href) is not str:
+                continue
+            if "ass_id=" not in href or "cmdclass=ilassignmentpresentationgui" not in href.lower():
+                continue
 
-        for exercise in cast(list[Tag], individual_exercises):
             name = _sanitize_path_name(exercise.get_text().strip())
             results.append(IliasPageElement.create_new(
                 IliasElementType.EXERCISE,

From 34564cedb44f4712656a2e48ae3b8fd0a8837c41 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 26 May 2025 15:59:25 +0200
Subject: [PATCH 504/524] Add support for link collections

---
 CHANGELOG.md                           |   6 ++
 PFERD/crawl/ilias/file_templates.py    |  95 ++++++++++++++---
 PFERD/crawl/ilias/ilias_web_crawler.py | 142 +++++++++++++++----------
 PFERD/crawl/ilias/kit_ilias_html.py    |   8 +-
 4 files changed, 180 insertions(+), 71 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index de29b58..f9bf6d0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,12 @@ ambiguous situations.
 
 ## Unreleased
 
+## Added
+- Support for link collections.  
+  In "fancy" mode, a single HTML file with multiple links is generated.
+  In all other modes, PFERD creates a folder for the collection and a new file
+  for every link inside.
+
 ## Fixed
 - Crawling of exercises with instructions
 
diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index ae8bb1e..f959917 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -1,3 +1,5 @@
+import dataclasses
+import re
 from enum import Enum
 from typing import Optional, cast
 
@@ -12,7 +14,9 @@ _link_template_fancy = """
     <head>
         <meta charset="UTF-8">
         <title>ILIAS - Link: {{name}}</title>
+        <!-- REPEAT REMOVE START -->
         <meta http-equiv = "refresh" content = "{{redirect_delay}}; url = {{link}}" />
+        <!-- REPEAT REMOVE END -->
     </head>
 
     <style>
@@ -23,6 +27,8 @@ _link_template_fancy = """
         display: flex;
         align-items: center;
         justify-content: center;
+        flex-direction: column;
+        gap: 4px;
     }
     body {
         padding: 0;
@@ -31,11 +37,16 @@ _link_template_fancy = """
         font-family: "Open Sans", Verdana, Arial, Helvetica, sans-serif;
         height: 100vh;
     }
-    .row {
-        background-color: white;
+    .column {
         min-width: 500px;
         max-width: 90vw;
         display: flex;
+        flex-direction: column;
+        row-gap: 5px;
+    }
+    .row {
+        background-color: white;
+        display: flex;
         padding: 1em;
     }
     .logo {
@@ -75,19 +86,23 @@ _link_template_fancy = """
     }
     </style>
     <body class="center-flex">
-        <div class="row">
-            <div class="logo center-flex">
-                <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24">
-                    <path d="M12 0c-6.627 0-12 5.373-12 12s5.373 12 12 12 12-5.373 12-12-5.373-12-12-12zm9.567 9.098c-.059-.058-.127-.108-.206-.138-.258-.101-1.35.603-1.515.256-.108-.231-.327.148-.578.008-.121-.067-.459-.52-.611-.465-.312.112.479.974.694 1.087.203-.154.86-.469 1.002-.039.271.812-.745 1.702-1.264 2.171-.775.702-.63-.454-1.159-.86-.277-.213-.274-.667-.555-.824-.125-.071-.7-.732-.694-.821l-.017.167c-.095.072-.297-.27-.319-.325 0 .298.485.772.646 1.011.273.409.42 1.005.756 1.339.179.18.866.923 1.045.908l.921-.437c.649.154-1.531 3.237-1.738 3.619-.171.321.139 1.112.114 1.49-.029.437-.374.579-.7.817-.35.255-.268.752-.562.934-.521.321-.897 1.366-1.639 1.361-.219-.001-1.151.364-1.273.007-.095-.258-.223-.455-.356-.71-.131-.25-.015-.51-.175-.731-.11-.154-.479-.502-.513-.684-.002-.157.118-.632.283-.715.231-.118.044-.462.016-.663-.048-.357-.27-.652-.535-.859-.393-.302-.189-.542-.098-.974 0-.206-.126-.476-.402-.396-.57.166-.396-.445-.812-.417-.299.021-.543.211-.821.295-.349.104-.707-.083-1.053-.126-1.421-.179-1.885-1.804-1.514-2.976.037-.192-.115-.547-.048-.696.159-.352.485-.752.768-1.021.16-.152.365-.113.553-.231.29-.182.294-.558.578-.789.404-.328.956-.321 1.482-.392.281-.037 1.35-.268 1.518-.06 0 .039.193.611-.019.578.438.023 1.061.756 1.476.585.213-.089.135-.744.573-.427.265.19 1.45.275 1.696.07.152-.125.236-.939.053-1.031.117.116-.618.125-.686.099-.122-.044-.235.115-.43.025.117.055-.651-.358-.22-.674-.181.132-.349-.037-.544.109-.135.109.062.181-.13.277-.305.155-.535-.53-.649-.607-.118-.077-1.024-.713-.777-.298l.797.793c-.04.026-.209-.289-.209-.059.053-.136.02.585-.105.35-.056-.09.091-.14.006-.271 0-.085-.23-.169-.275-.228-.126-.157-.462-.502-.644-.585-.05-.024-.771.088-.832.111-.071.099-.131.203-.181.314-.149.055-.29.127-.423.216l-.159.356c-.068.061-.772.294-.776.303.03-.076-.492-.172-.457-.324.038-.167.215-.687.169-.877-.048-.199 1.085.287 1.158-.238.029-.227.047-.492-.316-.531.069.008.702-.249.807-.364.148-.169.486-.447.731-.447.286 0 .225-.417.356-.622.133.053-.071.38.088.512-.01-.104.45.057.494.033.105-.056.691-.023.601-.299-.101-.28.052-.197.183-.255-.02.008.248-.458.363-.456-.104-.089-.398.112-.516.103-.308-.024-.177-.525-.061-.672.09-.116-.246-.258-.25-.036-.006.332-.314.633-.243 1.075.109.666-.743-.161-.816-.115-.283.172-.515-.216-.368-.449.149-.238.51-.226.659-.48.104-.179.227-.389.388-.524.541-.454.689-.091 1.229-.042.526.048.178.125.105.327-.07.192.289.261.413.1.071-.092.232-.326.301-.499.07-.175.578-.2.527-.365 2.72 1.148 4.827 3.465 5.694 6.318zm-11.113-3.779l.068-.087.073-.019c.042-.034.086-.118.151-.104.043.009.146.095.111.148-.037.054-.066-.049-.081.101-.018.169-.188.167-.313.222-.087.037-.175-.018-.09-.104l.088-.108-.007-.049zm.442.245c.046-.045.138-.008.151-.094.014-.084.078-.178-.008-.335-.022-.042.116-.082.051-.137l-.109.032s.155-.668.364-.366l-.089.103c.135.134.172.47.215.687.127.066.324.078.098.192.117-.02-.618.314-.715.178-.072-.083.317-.139.307-.173-.004-.011-.317-.02-.265-.087zm1.43-3.547l-.356.326c-.36.298-1.28.883-1.793.705-.524-.18-1.647.667-1.826.673-.067.003.002-.641.36-.689-.141.021.993-.575 1.185-.805.678-.146 1.381-.227 2.104-.227l.326.017zm-5.086 1.19c.07.082.278.092-.026.288-.183.11-.377.809-.548.809-.51.223-.542-.439-1.109.413-.078.115-.395.158-.644.236.685-.688 1.468-1.279 2.327-1.746zm-5.24 8.793c0-.541.055-1.068.139-1.586l.292.185c.113.135.113.719.169.911.139.482.484.751.748 1.19.155.261.414.923.332 1.197.109-.179 1.081.824 1.259 1.033.418.492.74 1.088.061 1.574-.219.158.334 1.14.049 1.382l-.365.094c-.225.138-.235.397-.166.631-1.562-1.765-2.518-4.076-2.518-6.611zm14.347-5.823c.083-.01-.107.167-.107.167.033.256.222.396.581.527.437.157.038.455-.213.385-.139-.039-.854-.255-.879.025 0 .167-.679.001-.573-.175.073-.119.05-.387.186-.562.193-.255.38-.116.386.032-.001.394.398-.373.619-.399z"/>
-                </svg>
-            </div>
-            <div class="tile">
-                <div class="top-row">
-                    <a href="{{link}}">{{name}}</a>
+        <div class="column">
+        <!-- REPEAT START -->
+            <div class="row">
+                <div class="logo center-flex">
+                    <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24">
+                        <path d="M12 0c-6.627 0-12 5.373-12 12s5.373 12 12 12 12-5.373 12-12-5.373-12-12-12zm9.567 9.098c-.059-.058-.127-.108-.206-.138-.258-.101-1.35.603-1.515.256-.108-.231-.327.148-.578.008-.121-.067-.459-.52-.611-.465-.312.112.479.974.694 1.087.203-.154.86-.469 1.002-.039.271.812-.745 1.702-1.264 2.171-.775.702-.63-.454-1.159-.86-.277-.213-.274-.667-.555-.824-.125-.071-.7-.732-.694-.821l-.017.167c-.095.072-.297-.27-.319-.325 0 .298.485.772.646 1.011.273.409.42 1.005.756 1.339.179.18.866.923 1.045.908l.921-.437c.649.154-1.531 3.237-1.738 3.619-.171.321.139 1.112.114 1.49-.029.437-.374.579-.7.817-.35.255-.268.752-.562.934-.521.321-.897 1.366-1.639 1.361-.219-.001-1.151.364-1.273.007-.095-.258-.223-.455-.356-.71-.131-.25-.015-.51-.175-.731-.11-.154-.479-.502-.513-.684-.002-.157.118-.632.283-.715.231-.118.044-.462.016-.663-.048-.357-.27-.652-.535-.859-.393-.302-.189-.542-.098-.974 0-.206-.126-.476-.402-.396-.57.166-.396-.445-.812-.417-.299.021-.543.211-.821.295-.349.104-.707-.083-1.053-.126-1.421-.179-1.885-1.804-1.514-2.976.037-.192-.115-.547-.048-.696.159-.352.485-.752.768-1.021.16-.152.365-.113.553-.231.29-.182.294-.558.578-.789.404-.328.956-.321 1.482-.392.281-.037 1.35-.268 1.518-.06 0 .039.193.611-.019.578.438.023 1.061.756 1.476.585.213-.089.135-.744.573-.427.265.19 1.45.275 1.696.07.152-.125.236-.939.053-1.031.117.116-.618.125-.686.099-.122-.044-.235.115-.43.025.117.055-.651-.358-.22-.674-.181.132-.349-.037-.544.109-.135.109.062.181-.13.277-.305.155-.535-.53-.649-.607-.118-.077-1.024-.713-.777-.298l.797.793c-.04.026-.209-.289-.209-.059.053-.136.02.585-.105.35-.056-.09.091-.14.006-.271 0-.085-.23-.169-.275-.228-.126-.157-.462-.502-.644-.585-.05-.024-.771.088-.832.111-.071.099-.131.203-.181.314-.149.055-.29.127-.423.216l-.159.356c-.068.061-.772.294-.776.303.03-.076-.492-.172-.457-.324.038-.167.215-.687.169-.877-.048-.199 1.085.287 1.158-.238.029-.227.047-.492-.316-.531.069.008.702-.249.807-.364.148-.169.486-.447.731-.447.286 0 .225-.417.356-.622.133.053-.071.38.088.512-.01-.104.45.057.494.033.105-.056.691-.023.601-.299-.101-.28.052-.197.183-.255-.02.008.248-.458.363-.456-.104-.089-.398.112-.516.103-.308-.024-.177-.525-.061-.672.09-.116-.246-.258-.25-.036-.006.332-.314.633-.243 1.075.109.666-.743-.161-.816-.115-.283.172-.515-.216-.368-.449.149-.238.51-.226.659-.48.104-.179.227-.389.388-.524.541-.454.689-.091 1.229-.042.526.048.178.125.105.327-.07.192.289.261.413.1.071-.092.232-.326.301-.499.07-.175.578-.2.527-.365 2.72 1.148 4.827 3.465 5.694 6.318zm-11.113-3.779l.068-.087.073-.019c.042-.034.086-.118.151-.104.043.009.146.095.111.148-.037.054-.066-.049-.081.101-.018.169-.188.167-.313.222-.087.037-.175-.018-.09-.104l.088-.108-.007-.049zm.442.245c.046-.045.138-.008.151-.094.014-.084.078-.178-.008-.335-.022-.042.116-.082.051-.137l-.109.032s.155-.668.364-.366l-.089.103c.135.134.172.47.215.687.127.066.324.078.098.192.117-.02-.618.314-.715.178-.072-.083.317-.139.307-.173-.004-.011-.317-.02-.265-.087zm1.43-3.547l-.356.326c-.36.298-1.28.883-1.793.705-.524-.18-1.647.667-1.826.673-.067.003.002-.641.36-.689-.141.021.993-.575 1.185-.805.678-.146 1.381-.227 2.104-.227l.326.017zm-5.086 1.19c.07.082.278.092-.026.288-.183.11-.377.809-.548.809-.51.223-.542-.439-1.109.413-.078.115-.395.158-.644.236.685-.688 1.468-1.279 2.327-1.746zm-5.24 8.793c0-.541.055-1.068.139-1.586l.292.185c.113.135.113.719.169.911.139.482.484.751.748 1.19.155.261.414.923.332 1.197.109-.179 1.081.824 1.259 1.033.418.492.74 1.088.061 1.574-.219.158.334 1.14.049 1.382l-.365.094c-.225.138-.235.397-.166.631-1.562-1.765-2.518-4.076-2.518-6.611zm14.347-5.823c.083-.01-.107.167-.107.167.033.256.222.396.581.527.437.157.038.455-.213.385-.139-.039-.854-.255-.879.025 0 .167-.679.001-.573-.175.073-.119.05-.387.186-.562.193-.255.38-.116.386.032-.001.394.398-.373.619-.399z"/>
+                    </svg>
                 </div>
-                <div class="bottom-row">{{description}}</div>
+                <div class="tile">
+                    <div class="top-row">
+                        <a href="{{link}}">{{name}}</a>
+                    </div>
+                    <div class="bottom-row">{{description}}</div>
+                </div>
+                <div class="menu-button center-flex"> ⯆ </div>
             </div>
-            <div class="menu-button center-flex"> ⯆ </div>
+        <!-- REPEAT END -->
         </div>
     </body>
 </html>
@@ -255,6 +270,13 @@ def forum_thread_template(name: str, url: str, heading: bs4.Tag, content: bs4.Ta
         .replace("{{content}}", cast(str, content.prettify()))
 
 
+@dataclasses.dataclass
+class LinkData:
+    name: str
+    url: str
+    description: str
+
+
 class Links(Enum):
     IGNORE = "ignore"
     PLAINTEXT = "plaintext"
@@ -272,6 +294,11 @@ class Links(Enum):
             return None
         raise ValueError("Missing switch case")
 
+    def collection_as_one(self) -> bool:
+        if self == Links.FANCY:
+            return True
+        return False
+
     def extension(self) -> Optional[str]:
         if self == Links.FANCY:
             return ".html"
@@ -283,10 +310,48 @@ class Links(Enum):
             return None
         raise ValueError("Missing switch case")
 
+    def interpolate(self, redirect_delay: int, collection_name: str, links: list[LinkData]) -> str:
+        template = self.template()
+        if template is None:
+            raise ValueError("Cannot interpolate ignored links")
+
+        if len(links) == 1:
+            link = links[0]
+            content = template
+            content = content.replace("{{link}}", link.url)
+            content = content.replace("{{name}}", link.name)
+            content = content.replace("{{description}}", link.description)
+            content = content.replace("{{redirect_delay}}", str(redirect_delay))
+            return content
+        if self == Links.PLAINTEXT or self == Links.INTERNET_SHORTCUT:
+            return "\n".join(f"{link.url}" for link in links)
+
+        # All others get coerced to fancy
+        content = cast(str, Links.FANCY.template())
+        repeated_content = cast(
+            re.Match[str],
+            re.search(r"<!-- REPEAT START -->([\s\S]+)<!-- REPEAT END -->", content)
+        ).group(1)
+
+        parts = []
+        for link in links:
+            instance = repeated_content
+            instance = instance.replace("{{link}}", link.url)
+            instance = instance.replace("{{name}}", link.name)
+            instance = instance.replace("{{description}}", link.description)
+            instance = instance.replace("{{redirect_delay}}", str(redirect_delay))
+            parts.append(instance)
+
+        content = content.replace(repeated_content, "\n".join(parts))
+        content = content.replace("{{name}}", collection_name)
+        content = re.sub(r"<!-- REPEAT REMOVE START -->[\s\S]+<!-- REPEAT REMOVE END -->", "", content)
+
+        return content
+
     @staticmethod
     def from_string(string: str) -> "Links":
         try:
             return Links(string)
         except ValueError:
-            raise ValueError("must be one of 'ignore', 'plaintext',"
-                             " 'html', 'internet-shortcut'")
+            options = [f"'{option.value}'" for option in Links]
+            raise ValueError(f"must be one of {', '.join(options)}")
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 3b78e5d..b682c0a 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -19,7 +19,7 @@ from ...utils import fmt_path, soupify, url_set_query_param
 from ..crawler import CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
 from ..http_crawler import HttpCrawler, HttpCrawlerSection
 from .async_helper import _iorepeat
-from .file_templates import Links, forum_thread_template, learning_module_template
+from .file_templates import LinkData, Links, forum_thread_template, learning_module_template
 from .ilias_html_cleaner import clean, insert_base_markup
 from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
                              IliasPageElement, IliasSoup, _sanitize_path_name, parse_ilias_forum_export)
@@ -437,6 +437,8 @@ instance's greatest bottleneck.
             return await self._handle_learning_module(element, element_path)
         elif element.type == IliasElementType.LINK:
             return await self._handle_link(element, element_path)
+        elif element.type == IliasElementType.LINK_COLLECTION:
+            return await self._handle_link(element, element_path)
         elif element.type == IliasElementType.BOOKING:
             return await self._handle_booking(element, element_path)
         elif element.type == IliasElementType.OPENCAST_VIDEO:
@@ -462,44 +464,97 @@ instance's greatest bottleneck.
         log.explain_topic(f"Decision: Crawl Link {fmt_path(element_path)}")
         log.explain(f"Links type is {self._links}")
 
-        link_template_maybe = self._links.template()
-        link_extension = self._links.extension()
-        if not link_template_maybe or not link_extension:
+        export_url = url_set_query_param(element.url, "cmd", "exportHTML")
+        resolved = await self._resolve_link_target(export_url)
+        if resolved == "none":
+            links = [LinkData(element.name, "", element.description or "")]
+        else:
+            links = self._parse_link_content(element, cast(BeautifulSoup, resolved))
+
+        maybe_extension = self._links.extension()
+
+        if not maybe_extension:
             log.explain("Answer: No")
             return None
         else:
             log.explain("Answer: Yes")
-        element_path = element_path.with_name(element_path.name + link_extension)
 
-        maybe_dl = await self.download(element_path, mtime=element.mtime)
-        if not maybe_dl:
+        if len(links) <= 1 or self._links.collection_as_one():
+            element_path = element_path.with_name(element_path.name + maybe_extension)
+            maybe_dl = await self.download(element_path, mtime=element.mtime)
+            if not maybe_dl:
+                return None
+            return self._download_link(self._links, element.name, links, maybe_dl)
+
+        maybe_cl = await self.crawl(element_path)
+        if not maybe_cl:
             return None
+        # Required for download_all closure
+        cl = maybe_cl
+        extension = maybe_extension
 
-        return self._download_link(element, link_template_maybe, maybe_dl)
+        async def download_all() -> None:
+            for link in links:
+                path = cl.path / (_sanitize_path_name(link.name) + extension)
+                if dl := await self.download(path, mtime=element.mtime):
+                    await self._download_link(self._links, element.name, [link], dl)
+
+        return download_all()
 
     @anoncritical
     @_iorepeat(3, "resolving link")
-    async def _download_link(self, element: IliasPageElement, link_template: str, dl: DownloadToken) -> None:
-        async with dl as (bar, sink):
-            export_url = element.url.replace("cmd=calldirectlink", "cmd=exportHTML")
-            real_url = await self._resolve_link_target(export_url)
-            self._write_link_content(link_template, real_url, element.name, element.description, sink)
-
-    def _write_link_content(
+    async def _download_link(
         self,
-        link_template: str,
-        url: str,
-        name: str,
-        description: Optional[str],
-        sink: FileSink,
+        link_renderer: Links,
+        collection_name: str,
+        links: list[LinkData],
+        dl: DownloadToken
     ) -> None:
-        content = link_template
-        content = content.replace("{{link}}", url)
-        content = content.replace("{{name}}", name)
-        content = content.replace("{{description}}", str(description))
-        content = content.replace("{{redirect_delay}}", str(self._link_file_redirect_delay))
-        sink.file.write(content.encode("utf-8"))
-        sink.done()
+        async with dl as (bar, sink):
+            rendered = link_renderer.interpolate(self._link_file_redirect_delay, collection_name, links)
+            sink.file.write(rendered.encode("utf-8"))
+            sink.done()
+
+    async def _resolve_link_target(self, export_url: str) -> Union[BeautifulSoup, Literal['none']]:
+        async def impl() -> Optional[Union[BeautifulSoup, Literal['none']]]:
+            async with self.session.get(export_url, allow_redirects=False) as resp:
+                # No redirect means we were authenticated
+                if hdrs.LOCATION not in resp.headers:
+                    return soupify(await resp.read())  # .select_one("a").get("href").strip()  # type: ignore
+                # We are either unauthenticated or the link is not active
+                new_url = resp.headers[hdrs.LOCATION].lower()
+                if "baseclass=illinkresourcehandlergui" in new_url and "cmd=infoscreen" in new_url:
+                    return "none"
+                return None
+
+        auth_id = await self._current_auth_id()
+        target = await impl()
+        if target is not None:
+            return target
+
+        await self.authenticate(auth_id)
+
+        target = await impl()
+        if target is not None:
+            return target
+
+        raise CrawlError("resolve_link_target failed even after authenticating")
+
+    @staticmethod
+    def _parse_link_content(element: IliasPageElement, content: BeautifulSoup) -> list[LinkData]:
+        links = cast(list[Tag], list(content.select("a")))
+        if len(links) == 1:
+            url = str(links[0].get("href")).strip()
+            return [LinkData(name=element.name, description=element.description or "", url=url)]
+
+        results = []
+        for link in links:
+            url = str(link.get("href")).strip()
+            name = link.get_text(strip=True)
+            description = cast(Tag, link.find_next_sibling("dd")).get_text(strip=True)
+            results.append(LinkData(name=name, description=description, url=url.strip()))
+
+        return results
 
     async def _handle_booking(
         self,
@@ -524,7 +579,7 @@ instance's greatest bottleneck.
 
         self._ensure_not_seen(element, element_path)
 
-        return self._download_booking(element, link_template_maybe, maybe_dl)
+        return self._download_booking(element, maybe_dl)
 
     @anoncritical
     @_iorepeat(1, "downloading description")
@@ -545,36 +600,13 @@ instance's greatest bottleneck.
     async def _download_booking(
         self,
         element: IliasPageElement,
-        link_template: str,
         dl: DownloadToken,
     ) -> None:
         async with dl as (bar, sink):
-            self._write_link_content(link_template, element.url, element.name, element.description, sink)
-
-    async def _resolve_link_target(self, export_url: str) -> str:
-        async def impl() -> Optional[str]:
-            async with self.session.get(export_url, allow_redirects=False) as resp:
-                # No redirect means we were authenticated
-                if hdrs.LOCATION not in resp.headers:
-                    return soupify(await resp.read()).select_one("a").get("href").strip()  # type: ignore
-                # We are either unauthenticated or the link is not active
-                new_url = resp.headers[hdrs.LOCATION].lower()
-                if "baseclass=illinkresourcehandlergui" in new_url and "cmd=infoscreen" in new_url:
-                    return ""
-                return None
-
-        auth_id = await self._current_auth_id()
-        target = await impl()
-        if target is not None:
-            return target
-
-        await self.authenticate(auth_id)
-
-        target = await impl()
-        if target is not None:
-            return target
-
-        raise CrawlError("resolve_link_target failed even after authenticating")
+            links = [LinkData(name=element.name, description=element.description or "", url=element.url)]
+            rendered = self._links.interpolate(self._link_file_redirect_delay, element.name, links)
+            sink.file.write(rendered.encode("utf-8"))
+            sink.done()
 
     async def _handle_opencast_video(
         self,
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 105c606..70ec3d7 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -109,6 +109,7 @@ class IliasElementType(Enum):
     LEARNING_MODULE_HTML = "learning_module_html"
     LITERATURE_LIST = "literature_list"
     LINK = "link"
+    LINK_COLLECTION = "link_collection"
     MEDIA_POOL = "media_pool"
     MEDIACAST_VIDEO = "mediacast_video"
     MEDIACAST_VIDEO_FOLDER = "mediacast_video_folder"
@@ -202,7 +203,12 @@ class IliasElementType(Enum):
                         TypeMatcher.query("baseclass=illinkresourcehandlergui"),
                         TypeMatcher.query("calldirectlink"),
                     ),
-                    TypeMatcher.img_src("_webr.svg")
+                    TypeMatcher.img_src("_webr.svg")  # duplicated :(
+                )
+            case IliasElementType.LINK_COLLECTION:
+                return TypeMatcher.any(
+                    TypeMatcher.query("baseclass=illinkresourcehandlergui"),
+                    TypeMatcher.img_src("_webr.svg")  # duplicated :(
                 )
             case IliasElementType.MEDIA_POOL:
                 return TypeMatcher.any(

From 549ce6cce911f298eb0ea16c6e00dca2880d7dc4 Mon Sep 17 00:00:00 2001
From: "Mr. Pine" <git@mr-pine.de>
Date: Wed, 28 May 2025 17:04:57 +0200
Subject: [PATCH 505/524] Ignore unavailable elements (#119)

---
 CHANGELOG.md                           | 3 +++
 PFERD/crawl/ilias/ilias_web_crawler.py | 9 +++++++++
 2 files changed, 12 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f9bf6d0..59cc6fe 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,6 +30,9 @@ ambiguous situations.
 
 ## Fixed
 - Crawling of exercises with instructions
+- Don't download unavailable elements.  
+  Elements that are unavailable (for example, because their availability is
+  time restricted) will not download the HTML for the info page anymore.
 
 ## 3.8.2 - 2025-04-29
 
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index b682c0a..2eb8e9c 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -329,6 +329,15 @@ instance's greatest bottleneck.
         # directory escape attacks.
         element_path = PurePath(parent_path, element.name)
 
+        # This is symptomatic of no access to the element, for example, because
+        # of time availability restrictions.
+        if "cmdClass=ilInfoScreenGUI" in element.url and "cmd=showSummary" in element.url:
+            log.explain(
+                "Skipping element as url points to info screen, "
+                "this should only happen with not-yet-released elements"
+            )
+            return None
+
         if element.type in _VIDEO_ELEMENTS:
             if not self._videos:
                 log.status(

From 56e30659504b1583a8ce165d517bb93933b3c9f6 Mon Sep 17 00:00:00 2001
From: Christian Schliz <github@foxat.de>
Date: Fri, 30 May 2025 17:13:45 +0200
Subject: [PATCH 506/524] Document usage of pilot.ilias.studium.kit.edu (#111)

---
 CONFIG.md | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index 9b79be8..201ddde 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -163,13 +163,14 @@ out of the box for the corresponding universities:
 
 [ilias-dl]: https://github.com/V3lop5/ilias-downloader/blob/main/configs "ilias-downloader configs"
 
-| University    | `base_url`                              | `login_type` | `client_id`   |
-|---------------|-----------------------------------------|--------------|---------------|
-| FH Aachen     | https://www.ili.fh-aachen.de            | local        | elearning     |
-| Uni Köln      | https://www.ilias.uni-koeln.de/ilias    | local        | uk            |
-| Uni Konstanz  | https://ilias.uni-konstanz.de           | local        | ILIASKONSTANZ |
-| Uni Stuttgart | https://ilias3.uni-stuttgart.de         | local        | Uni_Stuttgart |
-| Uni Tübingen  | https://ovidius.uni-tuebingen.de/ilias3 | shibboleth   |               |
+| University      | `base_url`                              | `login_type` | `client_id`   |
+|-----------------|-----------------------------------------|--------------|---------------|
+| FH Aachen       | https://www.ili.fh-aachen.de            | local        | elearning     |
+| Uni Köln        | https://www.ilias.uni-koeln.de/ilias    | local        | uk            |
+| Uni Konstanz    | https://ilias.uni-konstanz.de           | local        | ILIASKONSTANZ |
+| Uni Stuttgart   | https://ilias3.uni-stuttgart.de         | local        | Uni_Stuttgart |
+| Uni Tübingen    | https://ovidius.uni-tuebingen.de/ilias3 | shibboleth   |               |
+| KIT ILIAS Pilot | https://pilot.ilias.studium.kit.edu     | shibboleth   | pilot         |
 
 If your university isn't listed, try navigating to your instance's login page.
 Assuming no custom login service is used, the URL will look something like this:

From 27e69af2f3cc0371f457e387b451c73eded43929 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 1 Jul 2025 14:26:10 +0200
Subject: [PATCH 507/524] Update changelog for 8caad00

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 59cc6fe..997d780 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -33,6 +33,7 @@ ambiguous situations.
 - Don't download unavailable elements.  
   Elements that are unavailable (for example, because their availability is
   time restricted) will not download the HTML for the info page anymore.
+- `base_url` argument for `ilias-web` crawler causing crashes
 
 ## 3.8.2 - 2025-04-29
 

From 465f8b28c0dc2a4abcebc846a3b2066701c78785 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 1 Jul 2025 14:28:30 +0200
Subject: [PATCH 508/524] Bump version to 3.8.3

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 997d780..5fdec53 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.8.3 - 2025-07-01
+
 ## Added
 - Support for link collections.  
   In "fancy" mode, a single HTML file with multiple links is generated.
diff --git a/PFERD/version.py b/PFERD/version.py
index 12c568a..c6c8b14 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.8.2"
+VERSION = "3.8.3"

From 3755f593ff60e06ffbeab01135f924c1f3664453 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 1 Jul 2025 14:33:11 +0200
Subject: [PATCH 509/524] Update nix flake to 25.05

---
 flake.lock | 8 ++++----
 flake.nix  | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/flake.lock b/flake.lock
index d9326af..8d211fc 100644
--- a/flake.lock
+++ b/flake.lock
@@ -2,16 +2,16 @@
   "nodes": {
     "nixpkgs": {
       "locked": {
-        "lastModified": 1744440957,
-        "narHash": "sha256-FHlSkNqFmPxPJvy+6fNLaNeWnF1lZSgqVCl/eWaJRc4=",
+        "lastModified": 1751211869,
+        "narHash": "sha256-1Cu92i1KSPbhPCKxoiVG5qnoRiKTgR5CcGSRyLpOd7Y=",
         "owner": "NixOS",
         "repo": "nixpkgs",
-        "rev": "26d499fc9f1d567283d5d56fcf367edd815dba1d",
+        "rev": "b43c397f6c213918d6cfe6e3550abfe79b5d1c51",
         "type": "github"
       },
       "original": {
         "owner": "NixOS",
-        "ref": "nixos-24.11",
+        "ref": "nixos-25.05",
         "repo": "nixpkgs",
         "type": "github"
       }
diff --git a/flake.nix b/flake.nix
index c8dbe0c..7027e20 100644
--- a/flake.nix
+++ b/flake.nix
@@ -2,7 +2,7 @@
   description = "Tool for downloading course-related files from ILIAS";
 
   inputs = {
-    nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11";
+    nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05";
   };
 
   outputs = { self, nixpkgs }:

From 207af51aa49d021d2ea4fd774044a0772a103a08 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 27 Jul 2025 20:13:05 +0200
Subject: [PATCH 510/524] Include description in internet-shortcut links

---
 CHANGELOG.md                        | 3 +++
 PFERD/crawl/ilias/file_templates.py | 1 +
 2 files changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5fdec53..7da225b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+## Added
+- Store the description when using the `internet-shortcut` link format
+
 ## 3.8.3 - 2025-07-01
 
 ## Added
diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index f959917..f256dd8 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -111,6 +111,7 @@ _link_template_fancy = """
 _link_template_internet_shortcut = """
 [InternetShortcut]
 URL={{link}}
+Desc={{description}}
 """.strip()
 
 _learning_module_template = """

From f6c713d62198f9970d81b56dbf86fcb04f760629 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 19 Oct 2025 15:08:21 +0200
Subject: [PATCH 511/524] Fix mypy errors

---
 PFERD/crawl/ilias/ilias_web_crawler.py | 2 --
 PFERD/crawl/ilias/kit_ilias_html.py    | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 2eb8e9c..ee1de9c 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -985,8 +985,6 @@ instance's greatest bottleneck.
         """
         log.explain_topic("Internalizing images")
         for elem in tag.find_all(recursive=True):
-            if not isinstance(elem, Tag):
-                continue
             if elem.name == "img":
                 if src := elem.attrs.get("src", None):
                     url = urljoin(self._base_url, cast(str, src))
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 70ec3d7..5b88e8d 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -435,7 +435,7 @@ class IliasPage:
         return None
 
     def get_description(self) -> Optional[BeautifulSoup]:
-        def is_interesting_class(name: str) -> bool:
+        def is_interesting_class(name: str | None) -> bool:
             return name in [
                 "ilCOPageSection", "ilc_Paragraph", "ilc_va_ihcap_VAccordIHeadCap",
                 "ilc_va_ihcap_AccordIHeadCap", "ilc_media_cont_MediaContainer"
@@ -1243,7 +1243,7 @@ class IliasPage:
         # In a series of divs.
         # Find the parent containing all those divs, so we can filter our what we need
         properties_parent = cast(Tag, cast(Tag, link_element.find_parent(
-            "div", {"class": lambda x: "il_ContainerListItem" in x}
+            "div", {"class": lambda x: x is not None and "il_ContainerListItem" in x}
         )).select_one(".il_ItemProperties"))
         # The first one is always the filetype
         file_type = cast(Tag, properties_parent.select_one("span.il_ItemProperty")).get_text().strip()

From ee4625be784263c979414d7c688c9243c0970967 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 19 Oct 2025 15:08:29 +0200
Subject: [PATCH 512/524] Hardcode max line length in scripts/check

---
 scripts/check | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/check b/scripts/check
index aea2783..6f4f4c2 100755
--- a/scripts/check
+++ b/scripts/check
@@ -3,4 +3,4 @@
 set -e
 
 mypy .
-flake8 PFERD
+flake8 PFERD --max-line-length 110

From 2cf0e060ed126537dd993896b6aa793e2a6b9e80 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 19 Oct 2025 15:19:43 +0200
Subject: [PATCH 513/524] Reformat and switch to ruff

---
 PFERD/auth/__init__.py                  |  28 +-
 PFERD/auth/keyring.py                   |   1 -
 PFERD/cli/command_ilias_web.py          |   9 +-
 PFERD/cli/command_kit_ilias_web.py      |   4 +-
 PFERD/cli/command_kit_ipd.py            |  10 +-
 PFERD/cli/command_local.py              |  14 +-
 PFERD/cli/common_ilias_args.py          |  26 +-
 PFERD/cli/parser.py                     | 103 +--
 PFERD/config.py                         |   8 +-
 PFERD/crawl/__init__.py                 |  27 +-
 PFERD/crawl/crawler.py                  |  49 +-
 PFERD/crawl/http_crawler.py             |  44 +-
 PFERD/crawl/ilias/__init__.py           |   8 +-
 PFERD/crawl/ilias/file_templates.py     |  14 +-
 PFERD/crawl/ilias/ilias_html_cleaner.py |   2 +-
 PFERD/crawl/ilias/ilias_web_crawler.py  | 136 ++--
 PFERD/crawl/ilias/kit_ilias_html.py     | 370 +++++-----
 PFERD/crawl/ilias/shibboleth_login.py   |  10 +-
 PFERD/crawl/kit_ipd_crawler.py          |  15 +-
 PFERD/crawl/local_crawler.py            |  37 +-
 PFERD/deduplicator.py                   |  25 +-
 PFERD/limiter.py                        |   7 +-
 PFERD/logging.py                        |  47 +-
 PFERD/output_dir.py                     | 107 +--
 PFERD/pferd.py                          |   8 +-
 PFERD/transformer.py                    |  36 +-
 PFERD/utils.py                          |   8 +-
 pyproject.toml                          |  31 +-
 scripts/check                           |   2 +-
 scripts/format                          |   3 +-
 uv.lock                                 | 905 ++++++++++++++++++++++++
 31 files changed, 1507 insertions(+), 587 deletions(-)
 create mode 100644 uv.lock

diff --git a/PFERD/auth/__init__.py b/PFERD/auth/__init__.py
index aa3ba8e..80d4586 100644
--- a/PFERD/auth/__init__.py
+++ b/PFERD/auth/__init__.py
@@ -9,21 +9,19 @@ from .pass_ import PassAuthenticator, PassAuthSection
 from .simple import SimpleAuthenticator, SimpleAuthSection
 from .tfa import TfaAuthenticator
 
-AuthConstructor = Callable[[
-    str,                # Name (without the "auth:" prefix)
-    SectionProxy,       # Authenticator's section of global config
-    Config,             # Global config
-], Authenticator]
+AuthConstructor = Callable[
+    [
+        str,  # Name (without the "auth:" prefix)
+        SectionProxy,  # Authenticator's section of global config
+        Config,  # Global config
+    ],
+    Authenticator,
+]
 
 AUTHENTICATORS: Dict[str, AuthConstructor] = {
-    "credential-file": lambda n, s, c:
-        CredentialFileAuthenticator(n, CredentialFileAuthSection(s), c),
-    "keyring": lambda n, s, c:
-        KeyringAuthenticator(n, KeyringAuthSection(s)),
-    "pass": lambda n, s, c:
-        PassAuthenticator(n, PassAuthSection(s)),
-    "simple": lambda n, s, c:
-        SimpleAuthenticator(n, SimpleAuthSection(s)),
-    "tfa": lambda n, s, c:
-        TfaAuthenticator(n),
+    "credential-file": lambda n, s, c: CredentialFileAuthenticator(n, CredentialFileAuthSection(s), c),
+    "keyring": lambda n, s, c: KeyringAuthenticator(n, KeyringAuthSection(s)),
+    "pass": lambda n, s, c: PassAuthenticator(n, PassAuthSection(s)),
+    "simple": lambda n, s, c: SimpleAuthenticator(n, SimpleAuthSection(s)),
+    "tfa": lambda n, s, c: TfaAuthenticator(n),
 }
diff --git a/PFERD/auth/keyring.py b/PFERD/auth/keyring.py
index 02a9269..7ff2673 100644
--- a/PFERD/auth/keyring.py
+++ b/PFERD/auth/keyring.py
@@ -17,7 +17,6 @@ class KeyringAuthSection(AuthSection):
 
 
 class KeyringAuthenticator(Authenticator):
-
     def __init__(self, name: str, section: KeyringAuthSection) -> None:
         super().__init__(name)
 
diff --git a/PFERD/cli/command_ilias_web.py b/PFERD/cli/command_ilias_web.py
index 5efec20..b68e48f 100644
--- a/PFERD/cli/command_ilias_web.py
+++ b/PFERD/cli/command_ilias_web.py
@@ -21,23 +21,20 @@ GROUP.add_argument(
     "--base-url",
     type=str,
     metavar="BASE_URL",
-    help="The base url of the ilias instance"
+    help="The base url of the ilias instance",
 )
 
 GROUP.add_argument(
     "--client-id",
     type=str,
     metavar="CLIENT_ID",
-    help="The client id of the ilias instance"
+    help="The client id of the ilias instance",
 )
 
 configure_common_group_args(GROUP)
 
 
-def load(
-        args: argparse.Namespace,
-        parser: configparser.ConfigParser,
-) -> None:
+def load(args: argparse.Namespace, parser: configparser.ConfigParser) -> None:
     log.explain(f"Creating config for command '{COMMAND_NAME}'")
 
     parser["crawl:ilias"] = {}
diff --git a/PFERD/cli/command_kit_ilias_web.py b/PFERD/cli/command_kit_ilias_web.py
index 10797c2..b3b45c5 100644
--- a/PFERD/cli/command_kit_ilias_web.py
+++ b/PFERD/cli/command_kit_ilias_web.py
@@ -21,8 +21,8 @@ configure_common_group_args(GROUP)
 
 
 def load(
-        args: argparse.Namespace,
-        parser: configparser.ConfigParser,
+    args: argparse.Namespace,
+    parser: configparser.ConfigParser,
 ) -> None:
     log.explain(f"Creating config for command '{COMMAND_NAME}'")
 
diff --git a/PFERD/cli/command_kit_ipd.py b/PFERD/cli/command_kit_ipd.py
index b53e67e..589d9a3 100644
--- a/PFERD/cli/command_kit_ipd.py
+++ b/PFERD/cli/command_kit_ipd.py
@@ -18,25 +18,25 @@ GROUP.add_argument(
     "--link-regex",
     type=str,
     metavar="REGEX",
-    help="href-matching regex to identify downloadable files"
+    help="href-matching regex to identify downloadable files",
 )
 GROUP.add_argument(
     "target",
     type=str,
     metavar="TARGET",
-    help="url to crawl"
+    help="url to crawl",
 )
 GROUP.add_argument(
     "output",
     type=Path,
     metavar="OUTPUT",
-    help="output directory"
+    help="output directory",
 )
 
 
 def load(
-        args: argparse.Namespace,
-        parser: configparser.ConfigParser,
+    args: argparse.Namespace,
+    parser: configparser.ConfigParser,
 ) -> None:
     log.explain("Creating config for command 'kit-ipd'")
 
diff --git a/PFERD/cli/command_local.py b/PFERD/cli/command_local.py
index 309c42f..6016afa 100644
--- a/PFERD/cli/command_local.py
+++ b/PFERD/cli/command_local.py
@@ -18,37 +18,37 @@ GROUP.add_argument(
     "target",
     type=Path,
     metavar="TARGET",
-    help="directory to crawl"
+    help="directory to crawl",
 )
 GROUP.add_argument(
     "output",
     type=Path,
     metavar="OUTPUT",
-    help="output directory"
+    help="output directory",
 )
 GROUP.add_argument(
     "--crawl-delay",
     type=float,
     metavar="SECONDS",
-    help="artificial delay to simulate for crawl requests"
+    help="artificial delay to simulate for crawl requests",
 )
 GROUP.add_argument(
     "--download-delay",
     type=float,
     metavar="SECONDS",
-    help="artificial delay to simulate for download requests"
+    help="artificial delay to simulate for download requests",
 )
 GROUP.add_argument(
     "--download-speed",
     type=int,
     metavar="BYTES_PER_SECOND",
-    help="download speed to simulate"
+    help="download speed to simulate",
 )
 
 
 def load(
-        args: argparse.Namespace,
-        parser: configparser.ConfigParser,
+    args: argparse.Namespace,
+    parser: configparser.ConfigParser,
 ) -> None:
     log.explain("Creating config for command 'local'")
 
diff --git a/PFERD/cli/common_ilias_args.py b/PFERD/cli/common_ilias_args.py
index bbbbee5..edad6da 100644
--- a/PFERD/cli/common_ilias_args.py
+++ b/PFERD/cli/common_ilias_args.py
@@ -12,58 +12,60 @@ def configure_common_group_args(group: argparse._ArgumentGroup) -> None:
         "target",
         type=str,
         metavar="TARGET",
-        help="course id, 'desktop', or ILIAS URL to crawl"
+        help="course id, 'desktop', or ILIAS URL to crawl",
     )
     group.add_argument(
         "output",
         type=Path,
         metavar="OUTPUT",
-        help="output directory"
+        help="output directory",
     )
     group.add_argument(
-        "--username", "-u",
+        "--username",
+        "-u",
         type=str,
         metavar="USERNAME",
-        help="user name for authentication"
+        help="user name for authentication",
     )
     group.add_argument(
         "--keyring",
         action=BooleanOptionalAction,
-        help="use the system keyring to store and retrieve passwords"
+        help="use the system keyring to store and retrieve passwords",
     )
     group.add_argument(
         "--credential-file",
         type=Path,
         metavar="PATH",
-        help="read username and password from a credential file"
+        help="read username and password from a credential file",
     )
     group.add_argument(
         "--links",
         type=show_value_error(Links.from_string),
         metavar="OPTION",
-        help="how to represent external links"
+        help="how to represent external links",
     )
     group.add_argument(
         "--link-redirect-delay",
         type=int,
         metavar="SECONDS",
-        help="time before 'fancy' links redirect to to their target (-1 to disable)"
+        help="time before 'fancy' links redirect to to their target (-1 to disable)",
     )
     group.add_argument(
         "--videos",
         action=BooleanOptionalAction,
-        help="crawl and download videos"
+        help="crawl and download videos",
     )
     group.add_argument(
         "--forums",
         action=BooleanOptionalAction,
-        help="crawl and download forum posts"
+        help="crawl and download forum posts",
     )
     group.add_argument(
-        "--http-timeout", "-t",
+        "--http-timeout",
+        "-t",
         type=float,
         metavar="SECONDS",
-        help="timeout for all HTTP requests"
+        help="timeout for all HTTP requests",
     )
 
 
diff --git a/PFERD/cli/parser.py b/PFERD/cli/parser.py
index be483fd..12bfeac 100644
--- a/PFERD/cli/parser.py
+++ b/PFERD/cli/parser.py
@@ -15,15 +15,15 @@ class ParserLoadError(Exception):
 # TODO Replace with argparse version when updating to 3.9?
 class BooleanOptionalAction(argparse.Action):
     def __init__(
-            self,
-            option_strings: List[str],
-            dest: Any,
-            default: Any = None,
-            type: Any = None,
-            choices: Any = None,
-            required: Any = False,
-            help: Any = None,
-            metavar: Any = None,
+        self,
+        option_strings: List[str],
+        dest: Any,
+        default: Any = None,
+        type: Any = None,
+        choices: Any = None,
+        required: Any = False,
+        help: Any = None,
+        metavar: Any = None,
     ):
         if len(option_strings) != 1:
             raise ValueError("There must be exactly one option string")
@@ -48,11 +48,11 @@ class BooleanOptionalAction(argparse.Action):
         )
 
     def __call__(
-            self,
-            parser: argparse.ArgumentParser,
-            namespace: argparse.Namespace,
-            values: Union[str, Sequence[Any], None],
-            option_string: Optional[str] = None,
+        self,
+        parser: argparse.ArgumentParser,
+        namespace: argparse.Namespace,
+        values: Union[str, Sequence[Any], None],
+        option_string: Optional[str] = None,
     ) -> None:
         if option_string and option_string in self.option_strings:
             value = not option_string.startswith("--no-")
@@ -67,11 +67,13 @@ def show_value_error(inner: Callable[[str], Any]) -> Callable[[str], Any]:
     Some validation functions (like the from_string in our enums) raise a ValueError.
     Argparse only pretty-prints ArgumentTypeErrors though, so we need to wrap our ValueErrors.
     """
+
     def wrapper(input: str) -> Any:
         try:
             return inner(input)
         except ValueError as e:
             raise ArgumentTypeError(e)
+
     return wrapper
 
 
@@ -81,52 +83,57 @@ CRAWLER_PARSER_GROUP = CRAWLER_PARSER.add_argument_group(
     description="arguments common to all crawlers",
 )
 CRAWLER_PARSER_GROUP.add_argument(
-    "--redownload", "-r",
+    "--redownload",
+    "-r",
     type=show_value_error(Redownload.from_string),
     metavar="OPTION",
-    help="when to download a file that's already present locally"
+    help="when to download a file that's already present locally",
 )
 CRAWLER_PARSER_GROUP.add_argument(
     "--on-conflict",
     type=show_value_error(OnConflict.from_string),
     metavar="OPTION",
-    help="what to do when local and remote files or directories differ"
+    help="what to do when local and remote files or directories differ",
 )
 CRAWLER_PARSER_GROUP.add_argument(
-    "--transform", "-T",
+    "--transform",
+    "-T",
     action="append",
     type=str,
     metavar="RULE",
-    help="add a single transformation rule. Can be specified multiple times"
+    help="add a single transformation rule. Can be specified multiple times",
 )
 CRAWLER_PARSER_GROUP.add_argument(
-    "--tasks", "-n",
+    "--tasks",
+    "-n",
     type=int,
     metavar="N",
-    help="maximum number of concurrent tasks (crawling, downloading)"
+    help="maximum number of concurrent tasks (crawling, downloading)",
 )
 CRAWLER_PARSER_GROUP.add_argument(
-    "--downloads", "-N",
+    "--downloads",
+    "-N",
     type=int,
     metavar="N",
-    help="maximum number of tasks that may download data at the same time"
+    help="maximum number of tasks that may download data at the same time",
 )
 CRAWLER_PARSER_GROUP.add_argument(
-    "--task-delay", "-d",
+    "--task-delay",
+    "-d",
     type=float,
     metavar="SECONDS",
-    help="time the crawler should wait between subsequent tasks"
+    help="time the crawler should wait between subsequent tasks",
 )
 CRAWLER_PARSER_GROUP.add_argument(
     "--windows-paths",
     action=BooleanOptionalAction,
-    help="whether to repair invalid paths on windows"
+    help="whether to repair invalid paths on windows",
 )
 
 
 def load_crawler(
-        args: argparse.Namespace,
-        section: configparser.SectionProxy,
+    args: argparse.Namespace,
+    section: configparser.SectionProxy,
 ) -> None:
     if args.redownload is not None:
         section["redownload"] = args.redownload.value
@@ -152,79 +159,79 @@ PARSER.add_argument(
     version=f"{NAME} {VERSION} (https://github.com/Garmelon/PFERD)",
 )
 PARSER.add_argument(
-    "--config", "-c",
+    "--config",
+    "-c",
     type=Path,
     metavar="PATH",
-    help="custom config file"
+    help="custom config file",
 )
 PARSER.add_argument(
     "--dump-config",
     action="store_true",
-    help="dump current configuration to the default config path and exit"
+    help="dump current configuration to the default config path and exit",
 )
 PARSER.add_argument(
     "--dump-config-to",
     metavar="PATH",
-    help="dump current configuration to a file and exit."
-    " Use '-' as path to print to stdout instead"
+    help="dump current configuration to a file and exit. Use '-' as path to print to stdout instead",
 )
 PARSER.add_argument(
     "--debug-transforms",
     action="store_true",
-    help="apply transform rules to files of previous run"
+    help="apply transform rules to files of previous run",
 )
 PARSER.add_argument(
-    "--crawler", "-C",
+    "--crawler",
+    "-C",
     action="append",
     type=str,
     metavar="NAME",
-    help="only execute a single crawler."
-    " Can be specified multiple times to execute multiple crawlers"
+    help="only execute a single crawler. Can be specified multiple times to execute multiple crawlers",
 )
 PARSER.add_argument(
-    "--skip", "-S",
+    "--skip",
+    "-S",
     action="append",
     type=str,
     metavar="NAME",
-    help="don't execute this particular crawler."
-    " Can be specified multiple times to skip multiple crawlers"
+    help="don't execute this particular crawler. Can be specified multiple times to skip multiple crawlers",
 )
 PARSER.add_argument(
     "--working-dir",
     type=Path,
     metavar="PATH",
-    help="custom working directory"
+    help="custom working directory",
 )
 PARSER.add_argument(
     "--explain",
     action=BooleanOptionalAction,
-    help="log and explain in detail what PFERD is doing"
+    help="log and explain in detail what PFERD is doing",
 )
 PARSER.add_argument(
     "--status",
     action=BooleanOptionalAction,
-    help="print status updates while PFERD is crawling"
+    help="print status updates while PFERD is crawling",
 )
 PARSER.add_argument(
     "--report",
     action=BooleanOptionalAction,
-    help="print a report of all local changes before exiting"
+    help="print a report of all local changes before exiting",
 )
 PARSER.add_argument(
     "--share-cookies",
     action=BooleanOptionalAction,
-    help="whether crawlers should share cookies where applicable"
+    help="whether crawlers should share cookies where applicable",
 )
 PARSER.add_argument(
     "--show-not-deleted",
     action=BooleanOptionalAction,
-    help="print messages in status and report when PFERD did not delete a local only file"
+    help="print messages in status and report when PFERD did not delete a local only file",
 )
 
 
 def load_default_section(
-        args: argparse.Namespace,
-        parser: configparser.ConfigParser,
+    args: argparse.Namespace,
+    parser: configparser.ConfigParser,
 ) -> None:
     section = parser[parser.default_section]
 
diff --git a/PFERD/config.py b/PFERD/config.py
index b2cff4e..1a0f017 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -53,10 +53,10 @@ class Section:
         raise ConfigOptionError(self.s.name, key, desc)
 
     def invalid_value(
-            self,
-            key: str,
-            value: Any,
-            reason: Optional[str],
+        self,
+        key: str,
+        value: Any,
+        reason: Optional[str],
     ) -> NoReturn:
         if reason is None:
             self.error(key, f"Invalid value {value!r}")
diff --git a/PFERD/crawl/__init__.py b/PFERD/crawl/__init__.py
index 9a0e080..04a5e3f 100644
--- a/PFERD/crawl/__init__.py
+++ b/PFERD/crawl/__init__.py
@@ -8,20 +8,19 @@ from .ilias import IliasWebCrawler, IliasWebCrawlerSection, KitIliasWebCrawler,
 from .kit_ipd_crawler import KitIpdCrawler, KitIpdCrawlerSection
 from .local_crawler import LocalCrawler, LocalCrawlerSection
 
-CrawlerConstructor = Callable[[
-    str,                       # Name (without the "crawl:" prefix)
-    SectionProxy,              # Crawler's section of global config
-    Config,                    # Global config
-    Dict[str, Authenticator],  # Loaded authenticators by name
-], Crawler]
+CrawlerConstructor = Callable[
+    [
+        str,  # Name (without the "crawl:" prefix)
+        SectionProxy,  # Crawler's section of global config
+        Config,  # Global config
+        Dict[str, Authenticator],  # Loaded authenticators by name
+    ],
+    Crawler,
+]
 
 CRAWLERS: Dict[str, CrawlerConstructor] = {
-    "local": lambda n, s, c, a:
-        LocalCrawler(n, LocalCrawlerSection(s), c),
-    "ilias-web": lambda n, s, c, a:
-        IliasWebCrawler(n, IliasWebCrawlerSection(s), c, a),
-    "kit-ilias-web": lambda n, s, c, a:
-        KitIliasWebCrawler(n, KitIliasWebCrawlerSection(s), c, a),
-    "kit-ipd": lambda n, s, c, a:
-        KitIpdCrawler(n, KitIpdCrawlerSection(s), c),
+    "local": lambda n, s, c, a: LocalCrawler(n, LocalCrawlerSection(s), c),
+    "ilias-web": lambda n, s, c, a: IliasWebCrawler(n, IliasWebCrawlerSection(s), c, a),
+    "kit-ilias-web": lambda n, s, c, a: KitIliasWebCrawler(n, KitIliasWebCrawlerSection(s), c, a),
+    "kit-ipd": lambda n, s, c, a: KitIpdCrawler(n, KitIpdCrawlerSection(s), c),
 }
diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index 7ef5fe4..f1aec5a 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -132,8 +132,9 @@ class DownloadToken(ReusableAsyncContextManager[Tuple[ProgressBar, FileSink]]):
         await self._stack.enter_async_context(self._limiter.limit_download())
         sink = await self._stack.enter_async_context(self._fs_token)
         # The "Downloaded ..." message is printed in the output dir, not here
-        bar = self._stack.enter_context(log.download_bar("[bold bright_cyan]", "Downloading",
-                                                         fmt_path(self._path)))
+        bar = self._stack.enter_context(
+            log.download_bar("[bold bright_cyan]", "Downloading", fmt_path(self._path))
+        )
 
         return bar, sink
 
@@ -216,10 +217,10 @@ class CrawlerSection(Section):
 
 class Crawler(ABC):
     def __init__(
-            self,
-            name: str,
-            section: CrawlerSection,
-            config: Config,
+        self,
+        name: str,
+        section: CrawlerSection,
+        config: Config,
     ) -> None:
         """
         Initialize a crawler from its name and its section in the config file.
@@ -293,13 +294,13 @@ class Crawler(ABC):
         return CrawlToken(self._limiter, path)
 
     def should_try_download(
-            self,
-            path: PurePath,
-            *,
-            etag_differs: Optional[bool] = None,
-            mtime: Optional[datetime] = None,
-            redownload: Optional[Redownload] = None,
-            on_conflict: Optional[OnConflict] = None,
+        self,
+        path: PurePath,
+        *,
+        etag_differs: Optional[bool] = None,
+        mtime: Optional[datetime] = None,
+        redownload: Optional[Redownload] = None,
+        on_conflict: Optional[OnConflict] = None,
     ) -> bool:
         log.explain_topic(f"Decision: Should Download {fmt_path(path)}")
 
@@ -308,11 +309,7 @@ class Crawler(ABC):
             return False
 
         should_download = self._output_dir.should_try_download(
-            path,
-            etag_differs=etag_differs,
-            mtime=mtime,
-            redownload=redownload,
-            on_conflict=on_conflict
+            path, etag_differs=etag_differs, mtime=mtime, redownload=redownload, on_conflict=on_conflict
         )
         if should_download:
             log.explain("Answer: Yes")
@@ -322,13 +319,13 @@ class Crawler(ABC):
             return False
 
     async def download(
-            self,
-            path: PurePath,
-            *,
-            etag_differs: Optional[bool] = None,
-            mtime: Optional[datetime] = None,
-            redownload: Optional[Redownload] = None,
-            on_conflict: Optional[OnConflict] = None,
+        self,
+        path: PurePath,
+        *,
+        etag_differs: Optional[bool] = None,
+        mtime: Optional[datetime] = None,
+        redownload: Optional[Redownload] = None,
+        on_conflict: Optional[OnConflict] = None,
     ) -> Optional[DownloadToken]:
         log.explain_topic(f"Decision: Download {fmt_path(path)}")
         path = self._deduplicator.mark(path)
@@ -346,7 +343,7 @@ class Crawler(ABC):
             etag_differs=etag_differs,
             mtime=mtime,
             redownload=redownload,
-            on_conflict=on_conflict
+            on_conflict=on_conflict,
         )
         if fs_token is None:
             log.explain("Answer: No")
diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index 471bf1e..572b39d 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -29,11 +29,11 @@ class HttpCrawler(Crawler):
     COOKIE_FILE = PurePath(".cookies")
 
     def __init__(
-            self,
-            name: str,
-            section: HttpCrawlerSection,
-            config: Config,
-            shared_auth: Optional[Authenticator] = None,
+        self,
+        name: str,
+        section: HttpCrawlerSection,
+        config: Config,
+        shared_auth: Optional[Authenticator] = None,
     ) -> None:
         super().__init__(name, section, config)
 
@@ -252,23 +252,23 @@ class HttpCrawler(Crawler):
         self._load_cookies()
 
         async with aiohttp.ClientSession(
-                headers={"User-Agent": f"{NAME}/{VERSION}"},
-                cookie_jar=self._cookie_jar,
-                connector=aiohttp.TCPConnector(ssl=ssl.create_default_context(cafile=certifi.where())),
-                timeout=ClientTimeout(
-                    # 30 minutes. No download in the history of downloads was longer than 30 minutes.
-                    # This is enough to transfer a 600 MB file over a 3 Mib/s connection.
-                    # Allowing an arbitrary value could be annoying for overnight batch jobs
-                    total=15 * 60,
-                    connect=self._http_timeout,
-                    sock_connect=self._http_timeout,
-                    sock_read=self._http_timeout,
-                ),
-                # See https://github.com/aio-libs/aiohttp/issues/6626
-                # Without this aiohttp will mangle the redirect header from Shibboleth, invalidating the
-                # passed signature. Shibboleth will not accept the broken signature and authentication will
-                # fail.
-                requote_redirect_url=False
+            headers={"User-Agent": f"{NAME}/{VERSION}"},
+            cookie_jar=self._cookie_jar,
+            connector=aiohttp.TCPConnector(ssl=ssl.create_default_context(cafile=certifi.where())),
+            timeout=ClientTimeout(
+                # 30 minutes. No download in the history of downloads was longer than 30 minutes.
+                # This is enough to transfer a 600 MB file over a 3 Mib/s connection.
+                # Allowing an arbitrary value could be annoying for overnight batch jobs
+                total=15 * 60,
+                connect=self._http_timeout,
+                sock_connect=self._http_timeout,
+                sock_read=self._http_timeout,
+            ),
+            # See https://github.com/aio-libs/aiohttp/issues/6626
+            # Without this aiohttp will mangle the redirect header from Shibboleth, invalidating the
+            # passed signature. Shibboleth will not accept the broken signature and authentication will
+            # fail.
+            requote_redirect_url=False,
         ) as session:
             self.session = session
             try:
diff --git a/PFERD/crawl/ilias/__init__.py b/PFERD/crawl/ilias/__init__.py
index 287bd3d..fa1aaed 100644
--- a/PFERD/crawl/ilias/__init__.py
+++ b/PFERD/crawl/ilias/__init__.py
@@ -1,5 +1,9 @@
-from .kit_ilias_web_crawler import (IliasWebCrawler, IliasWebCrawlerSection, KitIliasWebCrawler,
-                                    KitIliasWebCrawlerSection)
+from .kit_ilias_web_crawler import (
+    IliasWebCrawler,
+    IliasWebCrawlerSection,
+    KitIliasWebCrawler,
+    KitIliasWebCrawlerSection,
+)
 
 __all__ = [
     "IliasWebCrawler",
diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index f256dd8..814bb7b 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -254,8 +254,8 @@ def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next
         )
 
     if bot_nav := body.select_one(".ilc_page_bnav_BottomNavigation"):
-        bot_nav.replace_with(soupify(nav_template.replace(
-            "{{left}}", left).replace("{{right}}", right).encode())
+        bot_nav.replace_with(
+            soupify(nav_template.replace("{{left}}", left).replace("{{right}}", right).encode())
         )
 
     body_str = cast(str, body.prettify())
@@ -265,10 +265,11 @@ def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next
 def forum_thread_template(name: str, url: str, heading: bs4.Tag, content: bs4.Tag) -> str:
     if title := cast(Optional[bs4.Tag], heading.find(name="b")):
         title.wrap(bs4.Tag(name="a", attrs={"href": url}))
-    return _forum_thread_template \
-        .replace("{{name}}", name) \
-        .replace("{{heading}}", cast(str, heading.prettify())) \
+    return (
+        _forum_thread_template.replace("{{name}}", name)
+        .replace("{{heading}}", cast(str, heading.prettify()))
         .replace("{{content}}", cast(str, content.prettify()))
+    )
 
 
 @dataclasses.dataclass
@@ -330,8 +331,7 @@ class Links(Enum):
         # All others get coerced to fancy
         content = cast(str, Links.FANCY.template())
         repeated_content = cast(
-            re.Match[str],
-            re.search(r"<!-- REPEAT START -->([\s\S]+)<!-- REPEAT END -->", content)
+            re.Match[str], re.search(r"<!-- REPEAT START -->([\s\S]+)<!-- REPEAT END -->", content)
         ).group(1)
 
         parts = []
diff --git a/PFERD/crawl/ilias/ilias_html_cleaner.py b/PFERD/crawl/ilias/ilias_html_cleaner.py
index 35a7ea0..958860a 100644
--- a/PFERD/crawl/ilias/ilias_html_cleaner.py
+++ b/PFERD/crawl/ilias/ilias_html_cleaner.py
@@ -86,7 +86,7 @@ def clean(soup: BeautifulSoup) -> BeautifulSoup:
 
     for block in cast(list[Tag], soup.find_all(class_="ilc_va_ihcap_VAccordIHeadCap")):
         block.name = "h3"
-        block["class"] += ["accordion-head"]  # type: ignore
+        block["class"] += ["accordion-head"]
 
     for dummy in soup.select(".ilc_text_block_Standard.ilc_Paragraph"):
         children = list(dummy.children)
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index ee1de9c..e6929b5 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -21,8 +21,16 @@ from ..http_crawler import HttpCrawler, HttpCrawlerSection
 from .async_helper import _iorepeat
 from .file_templates import LinkData, Links, forum_thread_template, learning_module_template
 from .ilias_html_cleaner import clean, insert_base_markup
-from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
-                             IliasPageElement, IliasSoup, _sanitize_path_name, parse_ilias_forum_export)
+from .kit_ilias_html import (
+    IliasElementType,
+    IliasForumThread,
+    IliasLearningModulePage,
+    IliasPage,
+    IliasPageElement,
+    IliasSoup,
+    _sanitize_path_name,
+    parse_ilias_forum_export,
+)
 from .shibboleth_login import ShibbolethLogin
 
 TargetType = Union[str, int]
@@ -55,9 +63,7 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
 
         self.invalid_value("login_type", login_type, "Should be <shibboleth | local>")
 
-    def tfa_auth(
-        self, authenticators: Dict[str, Authenticator]
-    ) -> Optional[Authenticator]:
+    def tfa_auth(self, authenticators: Dict[str, Authenticator]) -> Optional[Authenticator]:
         value: Optional[str] = self.s.get("tfa_auth")
         if value is None:
             return None
@@ -166,17 +172,19 @@ class IliasWebCrawler(HttpCrawler):
         name: str,
         section: IliasWebCrawlerSection,
         config: Config,
-        authenticators: Dict[str, Authenticator]
+        authenticators: Dict[str, Authenticator],
     ):
         # Setting a main authenticator for cookie sharing
         auth = section.auth(authenticators)
         super().__init__(name, section, config, shared_auth=auth)
 
         if section.tasks() > 1:
-            log.warn("""
+            log.warn(
+                """
 Please avoid using too many parallel requests as these are the KIT ILIAS
 instance's greatest bottleneck.
-            """.strip())
+            """.strip()
+            )
 
         self._auth = auth
         self._base_url = section.base_url()
@@ -210,22 +218,19 @@ instance's greatest bottleneck.
         # Start crawling at the given course
         root_url = url_set_query_param(
             urljoin(self._base_url + "/", "goto.php"),
-            "target", f"crs_{course_id}",
+            "target",
+            f"crs_{course_id}",
         )
 
         await self._crawl_url(root_url, expected_id=course_id)
 
     async def _crawl_desktop(self) -> None:
         await self._crawl_url(
-            urljoin(self._base_url, "/ilias.php?baseClass=ilDashboardGUI&cmd=show"),
-            crawl_nested_courses=True
+            urljoin(self._base_url, "/ilias.php?baseClass=ilDashboardGUI&cmd=show"), crawl_nested_courses=True
         )
 
     async def _crawl_url(
-        self,
-        url: str,
-        expected_id: Optional[int] = None,
-        crawl_nested_courses: bool = False
+        self, url: str, expected_id: Optional[int] = None, crawl_nested_courses: bool = False
     ) -> None:
         if awaitable := await self._handle_ilias_page(
             url, None, PurePath("."), expected_id, crawl_nested_courses
@@ -238,7 +243,7 @@ instance's greatest bottleneck.
         current_element: Optional[IliasPageElement],
         path: PurePath,
         expected_course_id: Optional[int] = None,
-        crawl_nested_courses: bool = False
+        crawl_nested_courses: bool = False,
     ) -> Optional[Coroutine[Any, Any, None]]:
         maybe_cl = await self.crawl(path)
         if not maybe_cl:
@@ -319,10 +324,7 @@ instance's greatest bottleneck.
     # works correctly.
     @anoncritical
     async def _handle_ilias_element(
-        self,
-        parent_path: PurePath,
-        element: IliasPageElement,
-        crawl_nested_courses: bool = False
+        self, parent_path: PurePath, element: IliasPageElement, crawl_nested_courses: bool = False
     ) -> Optional[Coroutine[Any, Any, None]]:
         # element.name might contain `/` if the crawler created nested elements,
         # so we can not sanitize it here. We trust in the output dir to thwart worst-case
@@ -344,7 +346,7 @@ instance's greatest bottleneck.
                     "[bold bright_black]",
                     "Ignored",
                     fmt_path(element_path),
-                    "[bright_black](enable with option 'videos')"
+                    "[bright_black](enable with option 'videos')",
                 )
                 return None
 
@@ -356,7 +358,7 @@ instance's greatest bottleneck.
                     "[bold bright_black]",
                     "Ignored",
                     fmt_path(element_path),
-                    "[bright_black](enable with option 'forums')"
+                    "[bright_black](enable with option 'forums')",
                 )
                 return None
             return await self._handle_forum(element, element_path)
@@ -365,7 +367,7 @@ instance's greatest bottleneck.
                 "[bold bright_black]",
                 "Ignored",
                 fmt_path(element_path),
-                "[bright_black](tests contain no relevant data)"
+                "[bright_black](tests contain no relevant data)",
             )
             return None
         elif element.type == IliasElementType.SURVEY:
@@ -373,7 +375,7 @@ instance's greatest bottleneck.
                 "[bold bright_black]",
                 "Ignored",
                 fmt_path(element_path),
-                "[bright_black](surveys contain no relevant data)"
+                "[bright_black](surveys contain no relevant data)",
             )
             return None
         elif element.type == IliasElementType.SCORM_LEARNING_MODULE:
@@ -381,7 +383,7 @@ instance's greatest bottleneck.
                 "[bold bright_black]",
                 "Ignored",
                 fmt_path(element_path),
-                "[bright_black](scorm learning modules are not supported)"
+                "[bright_black](scorm learning modules are not supported)",
             )
             return None
         elif element.type == IliasElementType.LITERATURE_LIST:
@@ -389,7 +391,7 @@ instance's greatest bottleneck.
                 "[bold bright_black]",
                 "Ignored",
                 fmt_path(element_path),
-                "[bright_black](literature lists are not currently supported)"
+                "[bright_black](literature lists are not currently supported)",
             )
             return None
         elif element.type == IliasElementType.LEARNING_MODULE_HTML:
@@ -397,7 +399,7 @@ instance's greatest bottleneck.
                 "[bold bright_black]",
                 "Ignored",
                 fmt_path(element_path),
-                "[bright_black](HTML learning modules are not supported)"
+                "[bright_black](HTML learning modules are not supported)",
             )
             return None
         elif element.type == IliasElementType.BLOG:
@@ -405,7 +407,7 @@ instance's greatest bottleneck.
                 "[bold bright_black]",
                 "Ignored",
                 fmt_path(element_path),
-                "[bright_black](blogs are not currently supported)"
+                "[bright_black](blogs are not currently supported)",
             )
             return None
         elif element.type == IliasElementType.DCL_RECORD_LIST:
@@ -413,7 +415,7 @@ instance's greatest bottleneck.
                 "[bold bright_black]",
                 "Ignored",
                 fmt_path(element_path),
-                "[bright_black](dcl record lists are not currently supported)"
+                "[bright_black](dcl record lists are not currently supported)",
             )
             return None
         elif element.type == IliasElementType.MEDIA_POOL:
@@ -421,7 +423,7 @@ instance's greatest bottleneck.
                 "[bold bright_black]",
                 "Ignored",
                 fmt_path(element_path),
-                "[bright_black](media pools are not currently supported)"
+                "[bright_black](media pools are not currently supported)",
             )
             return None
         elif element.type == IliasElementType.COURSE:
@@ -431,7 +433,7 @@ instance's greatest bottleneck.
                 "[bold bright_black]",
                 "Ignored",
                 fmt_path(element_path),
-                "[bright_black](not descending into linked course)"
+                "[bright_black](not descending into linked course)",
             )
             return None
         elif element.type == IliasElementType.WIKI:
@@ -439,7 +441,7 @@ instance's greatest bottleneck.
                 "[bold bright_black]",
                 "Ignored",
                 fmt_path(element_path),
-                "[bright_black](wikis are not currently supported)"
+                "[bright_black](wikis are not currently supported)",
             )
             return None
         elif element.type == IliasElementType.LEARNING_MODULE:
@@ -513,19 +515,15 @@ instance's greatest bottleneck.
     @anoncritical
     @_iorepeat(3, "resolving link")
     async def _download_link(
-        self,
-        link_renderer: Links,
-        collection_name: str,
-        links: list[LinkData],
-        dl: DownloadToken
+        self, link_renderer: Links, collection_name: str, links: list[LinkData], dl: DownloadToken
     ) -> None:
         async with dl as (bar, sink):
             rendered = link_renderer.interpolate(self._link_file_redirect_delay, collection_name, links)
             sink.file.write(rendered.encode("utf-8"))
             sink.done()
 
-    async def _resolve_link_target(self, export_url: str) -> Union[BeautifulSoup, Literal['none']]:
-        async def impl() -> Optional[Union[BeautifulSoup, Literal['none']]]:
+    async def _resolve_link_target(self, export_url: str) -> Union[BeautifulSoup, Literal["none"]]:
+        async def impl() -> Optional[Union[BeautifulSoup, Literal["none"]]]:
             async with self.session.get(export_url, allow_redirects=False) as resp:
                 # No redirect means we were authenticated
                 if hdrs.LOCATION not in resp.headers:
@@ -626,7 +624,7 @@ instance's greatest bottleneck.
         if self.prev_report:
             self.report.add_custom_value(
                 _get_video_cache_key(element),
-                self.prev_report.get_custom_value(_get_video_cache_key(element))
+                self.prev_report.get_custom_value(_get_video_cache_key(element)),
             )
 
         # A video might contain other videos, so let's "crawl" the video first
@@ -698,7 +696,7 @@ instance's greatest bottleneck.
         def add_to_report(paths: list[str]) -> None:
             self.report.add_custom_value(
                 _get_video_cache_key(element),
-                {"known_paths": paths, "own_path": str(self._transformer.transform(dl.path))}
+                {"known_paths": paths, "own_path": str(self._transformer.transform(dl.path))},
             )
 
         async with dl as (bar, sink):
@@ -752,11 +750,7 @@ instance's greatest bottleneck.
             await self._stream_from_url(element, sink, bar, is_video)
 
     async def _stream_from_url(
-        self,
-        element: IliasPageElement,
-        sink: FileSink,
-        bar: ProgressBar,
-        is_video: bool
+        self, element: IliasPageElement, sink: FileSink, bar: ProgressBar, is_video: bool
     ) -> None:
         url = element.url
 
@@ -831,10 +825,10 @@ instance's greatest bottleneck.
                 log.warn("Could not extract forum export url")
                 return
 
-            export = await self._post(export_url, {
-                "format": "html",
-                "cmd[createExportFile]": ""
-            })
+            export = await self._post(
+                export_url,
+                {"format": "html", "cmd[createExportFile]": ""},
+            )
 
             elements = parse_ilias_forum_export(soupify(export))
 
@@ -848,10 +842,7 @@ instance's greatest bottleneck.
     @anoncritical
     @_iorepeat(3, "saving forum thread")
     async def _download_forum_thread(
-        self,
-        parent_path: PurePath,
-        thread: Union[IliasForumThread, IliasPageElement],
-        forum_url: str
+        self, parent_path: PurePath, thread: Union[IliasForumThread, IliasPageElement], forum_url: str
     ) -> None:
         path = parent_path / (_sanitize_path_name(thread.name) + ".html")
         maybe_dl = await self.download(path, mtime=thread.mtime)
@@ -860,10 +851,7 @@ instance's greatest bottleneck.
 
         async with maybe_dl as (bar, sink):
             rendered = forum_thread_template(
-                thread.name,
-                forum_url,
-                thread.name_tag,
-                await self.internalize_images(thread.content_tag)
+                thread.name, forum_url, thread.name_tag, await self.internalize_images(thread.content_tag)
             )
             sink.file.write(rendered.encode("utf-8"))
             sink.done()
@@ -891,13 +879,13 @@ instance's greatest bottleneck.
             soup = await self._get_page(element.url)
             page = IliasPage(soup, element)
             if next := page.get_learning_module_data():
-                elements.extend(await self._crawl_learning_module_direction(
-                    cl.path, next.previous_url, "left", element
-                ))
+                elements.extend(
+                    await self._crawl_learning_module_direction(cl.path, next.previous_url, "left", element)
+                )
                 elements.append(next)
-                elements.extend(await self._crawl_learning_module_direction(
-                    cl.path, next.next_url, "right", element
-                ))
+                elements.extend(
+                    await self._crawl_learning_module_direction(cl.path, next.next_url, "right", element)
+                )
 
         # Reflect their natural ordering in the file names
         for index, lm_element in enumerate(elements):
@@ -907,9 +895,9 @@ instance's greatest bottleneck.
         for index, elem in enumerate(elements):
             prev_url = elements[index - 1].title if index > 0 else None
             next_url = elements[index + 1].title if index < len(elements) - 1 else None
-            tasks.append(asyncio.create_task(
-                self._download_learning_module_page(cl.path, elem, prev_url, next_url)
-            ))
+            tasks.append(
+                asyncio.create_task(self._download_learning_module_page(cl.path, elem, prev_url, next_url))
+            )
 
         # And execute them
         await self.gather(tasks)
@@ -919,7 +907,7 @@ instance's greatest bottleneck.
         path: PurePath,
         start_url: Optional[str],
         dir: Union[Literal["left"], Literal["right"]],
-        parent_element: IliasPageElement
+        parent_element: IliasPageElement,
     ) -> List[IliasLearningModulePage]:
         elements: List[IliasLearningModulePage] = []
 
@@ -950,7 +938,7 @@ instance's greatest bottleneck.
         parent_path: PurePath,
         element: IliasLearningModulePage,
         prev: Optional[str],
-        next: Optional[str]
+        next: Optional[str],
     ) -> None:
         path = parent_path / (_sanitize_path_name(element.title) + ".html")
         maybe_dl = await self.download(path)
@@ -1037,11 +1025,7 @@ instance's greatest bottleneck.
             )
         return soup
 
-    async def _post(
-        self,
-        url: str,
-        data: dict[str, Union[str, List[str]]]
-    ) -> bytes:
+    async def _post(self, url: str, data: dict[str, Union[str, List[str]]]) -> bytes:
         form_data = aiohttp.FormData()
         for key, val in data.items():
             form_data.add_field(key, val)
@@ -1090,8 +1074,8 @@ instance's greatest bottleneck.
             username, password = await self._auth.credentials()
 
             login_form_data = aiohttp.FormData()
-            login_form_data.add_field('login_form/input_3/input_4', username)
-            login_form_data.add_field('login_form/input_3/input_5', password)
+            login_form_data.add_field("login_form/input_3/input_4", username)
+            login_form_data.add_field("login_form/input_3/input_5", password)
 
             # do the actual login
             async with self.session.post(urljoin(self._base_url, login_url), data=login_form_data) as request:
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 5b88e8d..4abb350 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -42,15 +42,15 @@ class TypeMatcher:
             self.alt = alt
 
     class All:
-        matchers: list['IliasElementMatcher']
+        matchers: list["IliasElementMatcher"]
 
-        def __init__(self, matchers: list['IliasElementMatcher']):
+        def __init__(self, matchers: list["IliasElementMatcher"]):
             self.matchers = matchers
 
     class Any:
-        matchers: list['IliasElementMatcher']
+        matchers: list["IliasElementMatcher"]
 
-        def __init__(self, matchers: list['IliasElementMatcher']):
+        def __init__(self, matchers: list["IliasElementMatcher"]):
             self.matchers = matchers
 
     @staticmethod
@@ -70,11 +70,11 @@ class TypeMatcher:
         return TypeMatcher.ImgAlt(alt)
 
     @staticmethod
-    def all(*matchers: 'IliasElementMatcher') -> All:
+    def all(*matchers: "IliasElementMatcher") -> All:
         return TypeMatcher.All(list(matchers))
 
     @staticmethod
-    def any(*matchers: 'IliasElementMatcher') -> Any:
+    def any(*matchers: "IliasElementMatcher") -> Any:
         return TypeMatcher.Any(list(matchers))
 
     @staticmethod
@@ -127,20 +127,14 @@ class IliasElementType(Enum):
     def matcher(self) -> IliasElementMatcher:
         match self:
             case IliasElementType.BLOG:
-                return TypeMatcher.any(
-                    TypeMatcher.img_src("_blog.svg")
-                )
+                return TypeMatcher.any(TypeMatcher.img_src("_blog.svg"))
             case IliasElementType.BOOKING:
-                return TypeMatcher.any(
-                    TypeMatcher.path("/book/"),
-                    TypeMatcher.img_src("_book.svg")
-                )
+                return TypeMatcher.any(TypeMatcher.path("/book/"), TypeMatcher.img_src("_book.svg"))
             case IliasElementType.COURSE:
                 return TypeMatcher.any(TypeMatcher.path("/crs/"), TypeMatcher.img_src("_crsr.svg"))
             case IliasElementType.DCL_RECORD_LIST:
                 return TypeMatcher.any(
-                    TypeMatcher.img_src("_dcl.svg"),
-                    TypeMatcher.query("cmdclass=ildclrecordlistgui")
+                    TypeMatcher.img_src("_dcl.svg"), TypeMatcher.query("cmdclass=ildclrecordlistgui")
                 )
             case IliasElementType.EXERCISE:
                 return TypeMatcher.never()
@@ -162,14 +156,11 @@ class IliasElementType(Enum):
                 return TypeMatcher.any(
                     TypeMatcher.path("/fold/"),
                     TypeMatcher.img_src("_fold.svg"),
-
                     TypeMatcher.path("/grp/"),
                     TypeMatcher.img_src("_grp.svg"),
-
                     TypeMatcher.path("/copa/"),
                     TypeMatcher.path("_copa_"),
                     TypeMatcher.img_src("_copa.svg"),
-
                     # Not supported right now but warn users
                     # TypeMatcher.query("baseclass=ilmediapoolpresentationgui"),
                     # TypeMatcher.img_alt("medienpool"),
@@ -188,14 +179,10 @@ class IliasElementType(Enum):
             case IliasElementType.LITERATURE_LIST:
                 return TypeMatcher.img_src("_bibl.svg")
             case IliasElementType.LEARNING_MODULE:
-                return TypeMatcher.any(
-                    TypeMatcher.path("/lm/"),
-                    TypeMatcher.img_src("_lm.svg")
-                )
+                return TypeMatcher.any(TypeMatcher.path("/lm/"), TypeMatcher.img_src("_lm.svg"))
             case IliasElementType.LEARNING_MODULE_HTML:
                 return TypeMatcher.any(
-                    TypeMatcher.query("baseclass=ilhtlmpresentationgui"),
-                    TypeMatcher.img_src("_htlm.svg")
+                    TypeMatcher.query("baseclass=ilhtlmpresentationgui"), TypeMatcher.img_src("_htlm.svg")
                 )
             case IliasElementType.LINK:
                 return TypeMatcher.any(
@@ -203,17 +190,16 @@ class IliasElementType(Enum):
                         TypeMatcher.query("baseclass=illinkresourcehandlergui"),
                         TypeMatcher.query("calldirectlink"),
                     ),
-                    TypeMatcher.img_src("_webr.svg")  # duplicated :(
+                    TypeMatcher.img_src("_webr.svg"),  # duplicated :(
                 )
             case IliasElementType.LINK_COLLECTION:
                 return TypeMatcher.any(
                     TypeMatcher.query("baseclass=illinkresourcehandlergui"),
-                    TypeMatcher.img_src("_webr.svg")  # duplicated :(
+                    TypeMatcher.img_src("_webr.svg"),  # duplicated :(
                 )
             case IliasElementType.MEDIA_POOL:
                 return TypeMatcher.any(
-                    TypeMatcher.query("baseclass=ilmediapoolpresentationgui"),
-                    TypeMatcher.img_src("_mep.svg")
+                    TypeMatcher.query("baseclass=ilmediapoolpresentationgui"), TypeMatcher.img_src("_mep.svg")
                 )
             case IliasElementType.MEDIACAST_VIDEO:
                 return TypeMatcher.never()
@@ -221,12 +207,10 @@ class IliasElementType(Enum):
                 return TypeMatcher.any(
                     TypeMatcher.path("/mcst/"),
                     TypeMatcher.query("baseclass=ilmediacasthandlergui"),
-                    TypeMatcher.img_src("_mcst.svg")
+                    TypeMatcher.img_src("_mcst.svg"),
                 )
             case IliasElementType.MEETING:
-                return TypeMatcher.any(
-                    TypeMatcher.img_src("_sess.svg")
-                )
+                return TypeMatcher.any(TypeMatcher.img_src("_sess.svg"))
             case IliasElementType.MOB_VIDEO:
                 return TypeMatcher.never()
             case IliasElementType.OPENCAST_VIDEO:
@@ -239,24 +223,19 @@ class IliasElementType(Enum):
                 return TypeMatcher.never()
             case IliasElementType.SCORM_LEARNING_MODULE:
                 return TypeMatcher.any(
-                    TypeMatcher.query("baseclass=ilsahspresentationgui"),
-                    TypeMatcher.img_src("_sahs.svg")
+                    TypeMatcher.query("baseclass=ilsahspresentationgui"), TypeMatcher.img_src("_sahs.svg")
                 )
             case IliasElementType.SURVEY:
-                return TypeMatcher.any(
-                    TypeMatcher.path("/svy/"),
-                    TypeMatcher.img_src("svy.svg")
-                )
+                return TypeMatcher.any(TypeMatcher.path("/svy/"), TypeMatcher.img_src("svy.svg"))
             case IliasElementType.TEST:
                 return TypeMatcher.any(
                     TypeMatcher.query("cmdclass=ilobjtestgui"),
                     TypeMatcher.query("cmdclass=iltestscreengui"),
-                    TypeMatcher.img_src("_tst.svg")
+                    TypeMatcher.img_src("_tst.svg"),
                 )
             case IliasElementType.WIKI:
                 return TypeMatcher.any(
-                    TypeMatcher.query("baseClass=ilwikihandlergui"),
-                    TypeMatcher.img_src("wiki.svg")
+                    TypeMatcher.query("baseClass=ilwikihandlergui"), TypeMatcher.img_src("wiki.svg")
                 )
 
         raise CrawlWarning(f"Unknown matcher {self}")
@@ -291,7 +270,7 @@ class IliasPageElement:
             r"thr_pk=(?P<id>\d+)",  # forums
             r"ref_id=(?P<id>\d+)",
             r"target=[a-z]+_(?P<id>\d+)",
-            r"mm_(?P<id>\d+)"
+            r"mm_(?P<id>\d+)",
         ]
 
         for regex in regexes:
@@ -309,8 +288,8 @@ class IliasPageElement:
         name: str,
         mtime: Optional[datetime] = None,
         description: Optional[str] = None,
-        skip_sanitize: bool = False
-    ) -> 'IliasPageElement':
+        skip_sanitize: bool = False,
+    ) -> "IliasPageElement":
         if typ == IliasElementType.MEETING:
             normalized = IliasPageElement._normalize_meeting_name(name)
             log.explain(f"Normalized meeting name from {name!r} to {normalized!r}")
@@ -382,7 +361,6 @@ class IliasSoup:
 
 
 class IliasPage:
-
     def __init__(self, ilias_soup: IliasSoup, source_element: Optional[IliasPageElement]):
         self._ilias_soup = ilias_soup
         self._soup = ilias_soup.soup
@@ -422,23 +400,26 @@ class IliasPage:
         return self._find_normal_entries()
 
     def get_info_tab(self) -> Optional[IliasPageElement]:
-        tab: Optional[Tag] = cast(Optional[Tag], self._soup.find(
-            name="a",
-            attrs={"href": lambda x: x is not None and "cmdClass=ilinfoscreengui" in x}
-        ))
+        tab: Optional[Tag] = cast(
+            Optional[Tag],
+            self._soup.find(
+                name="a", attrs={"href": lambda x: x is not None and "cmdClass=ilinfoscreengui" in x}
+            ),
+        )
         if tab is not None:
             return IliasPageElement.create_new(
-                IliasElementType.INFO_TAB,
-                self._abs_url_from_link(tab),
-                "infos"
+                IliasElementType.INFO_TAB, self._abs_url_from_link(tab), "infos"
             )
         return None
 
     def get_description(self) -> Optional[BeautifulSoup]:
         def is_interesting_class(name: str | None) -> bool:
             return name in [
-                "ilCOPageSection", "ilc_Paragraph", "ilc_va_ihcap_VAccordIHeadCap",
-                "ilc_va_ihcap_AccordIHeadCap", "ilc_media_cont_MediaContainer"
+                "ilCOPageSection",
+                "ilc_Paragraph",
+                "ilc_va_ihcap_VAccordIHeadCap",
+                "ilc_va_ihcap_AccordIHeadCap",
+                "ilc_media_cont_MediaContainer",
             ]
 
         paragraphs: list[Tag] = cast(list[Tag], self._soup.find_all(class_=is_interesting_class))
@@ -457,7 +438,7 @@ class IliasPage:
                 if video := p.select_one("video"):
                     url, title = self._find_mob_video_url_title(video, p)
                     raw_html += '<div style="min-width: 100px; min-height: 100px; border: 1px solid black;'
-                    raw_html += 'display: flex; justify-content: center; align-items: center;'
+                    raw_html += "display: flex; justify-content: center; align-items: center;"
                     raw_html += ' margin: 0.5rem;">'
                     if url is not None and urlparse(url).hostname != urlparse(self._page_url).hostname:
                         if url.startswith("//"):
@@ -486,7 +467,7 @@ class IliasPage:
             title=title,
             content=content,
             next_url=self._find_learning_module_next(),
-            previous_url=self._find_learning_module_prev()
+            previous_url=self._find_learning_module_prev(),
         )
 
     def _find_learning_module_next(self) -> Optional[str]:
@@ -517,7 +498,7 @@ class IliasPage:
 
         rtoken_form = cast(
             Optional[Tag],
-            self._soup.find("form", attrs={"action": lambda x: x is not None and "rtoken=" in x})
+            self._soup.find("form", attrs={"action": lambda x: x is not None and "rtoken=" in x}),
         )
         if not rtoken_form:
             log.explain("Found no rtoken anywhere")
@@ -557,9 +538,7 @@ class IliasPage:
             return True
 
         # Raw listing without ILIAS fluff
-        video_element_table = self._soup.find(
-            name="table", id=re.compile(r"tbl_xoct_.+")
-        )
+        video_element_table = self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
         return video_element_table is not None
 
     def _is_ilias_opencast_embedding(self) -> bool:
@@ -600,24 +579,28 @@ class IliasPage:
         return self._uncollapse_future_meetings_url() is not None
 
     def _uncollapse_future_meetings_url(self) -> Optional[IliasPageElement]:
-        element = cast(Optional[Tag], self._soup.find(
-            "a",
-            attrs={"href": lambda x: x is not None and ("crs_next_sess=1" in x or "crs_prev_sess=1" in x)}
-        ))
+        element = cast(
+            Optional[Tag],
+            self._soup.find(
+                "a",
+                attrs={
+                    "href": lambda x: x is not None and ("crs_next_sess=1" in x or "crs_prev_sess=1" in x)
+                },
+            ),
+        )
         if not element:
             return None
         link = self._abs_url_from_link(element)
         return IliasPageElement.create_new(IliasElementType.FOLDER, link, "show all meetings")
 
     def _is_exercise_not_all_shown(self) -> bool:
-        return (self._page_type == IliasElementType.EXERCISE_OVERVIEW
-                and "mode=all" not in self._page_url.lower())
+        return (
+            self._page_type == IliasElementType.EXERCISE_OVERVIEW and "mode=all" not in self._page_url.lower()
+        )
 
     def _show_all_exercises(self) -> Optional[IliasPageElement]:
         return IliasPageElement.create_new(
-            IliasElementType.EXERCISE_OVERVIEW,
-            self._page_url + "&mode=all",
-            "show all exercises"
+            IliasElementType.EXERCISE_OVERVIEW, self._page_url + "&mode=all", "show all exercises"
         )
 
     def _is_content_tab_selected(self) -> bool:
@@ -631,10 +614,12 @@ class IliasPage:
         return "baseClass=ilmembershipoverviewgui" in self._page_url
 
     def _select_content_page_url(self) -> Optional[IliasPageElement]:
-        tab = cast(Optional[Tag], self._soup.find(
-            id="tab_view_content",
-            attrs={"class": lambda x: x is not None and "active" not in x}
-        ))
+        tab = cast(
+            Optional[Tag],
+            self._soup.find(
+                id="tab_view_content", attrs={"class": lambda x: x is not None and "active" not in x}
+            ),
+        )
         # Already selected (or not found)
         if not tab:
             return None
@@ -654,9 +639,7 @@ class IliasPage:
         # on the page, but defined in a JS object inside a script tag, passed to the player
         # library.
         # We do the impossible and RegEx the stream JSON object out of the page's HTML source
-        regex = re.compile(
-            r"({\"streams\"[\s\S]+?),\s*{\"paella_config_file", re.IGNORECASE
-        )
+        regex = re.compile(r"({\"streams\"[\s\S]+?),\s*{\"paella_config_file", re.IGNORECASE)
         json_match = regex.search(str(self._soup))
 
         if json_match is None:
@@ -687,10 +670,12 @@ class IliasPage:
     def _get_show_max_forum_entries_per_page_url(
         self, wanted_max: Optional[int] = None
     ) -> Optional[IliasPageElement]:
-        correct_link = cast(Optional[Tag], self._soup.find(
-            "a",
-            attrs={"href": lambda x: x is not None and "trows=800" in x and "cmd=showThreads" in x}
-        ))
+        correct_link = cast(
+            Optional[Tag],
+            self._soup.find(
+                "a", attrs={"href": lambda x: x is not None and "trows=800" in x and "cmd=showThreads" in x}
+            ),
+        )
 
         if not correct_link:
             return None
@@ -775,11 +760,11 @@ class IliasPage:
                 continue
             if "cmd=sendfile" not in link["href"]:
                 continue
-            items.append(IliasPageElement.create_new(
-                IliasElementType.FILE,
-                self._abs_url_from_link(link),
-                _sanitize_path_name(link.get_text())
-            ))
+            items.append(
+                IliasPageElement.create_new(
+                    IliasElementType.FILE, self._abs_url_from_link(link), _sanitize_path_name(link.get_text())
+                )
+            )
 
         return items
 
@@ -791,9 +776,9 @@ class IliasPage:
         #
         # We need to figure out where we are.
 
-        video_element_table = cast(Optional[Tag], self._soup.find(
-            name="table", id=re.compile(r"tbl_xoct_.+")
-        ))
+        video_element_table = cast(
+            Optional[Tag], self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
+        )
 
         if video_element_table is None:
             # We are in stage 1
@@ -829,8 +814,7 @@ class IliasPage:
 
         table_id = id_match.group(1)
 
-        query_params = {f"tbl_xoct_{table_id}_trows": "800",
-                        "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
+        query_params = {f"tbl_xoct_{table_id}_trows": "800", "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
         url = url_set_query_params(self._page_url, query_params)
 
         log.explain("Disabled pagination, retrying folder as a new entry")
@@ -841,9 +825,9 @@ class IliasPage:
         Crawls the "second stage" video page. This page contains the actual video urls.
         """
         # Video start links are marked with an "Abspielen" link
-        video_links = cast(list[Tag], self._soup.find_all(
-            name="a", text=re.compile(r"\s*(Abspielen|Play)\s*")
-        ))
+        video_links = cast(
+            list[Tag], self._soup.find_all(name="a", text=re.compile(r"\s*(Abspielen|Play)\s*"))
+        )
 
         results: list[IliasPageElement] = []
 
@@ -857,12 +841,12 @@ class IliasPage:
         # 6th or 7th child (1 indexed) is the modification time string. Try to find it
         # by parsing backwards from the end and finding something that looks like a date
         modification_time = None
-        row: Tag = link.parent.parent.parent  # type: ignore
+        row: Tag = link.parent.parent.parent
         column_count = len(row.select("td.std"))
         for index in range(column_count, 0, -1):
-            modification_string = link.parent.parent.parent.select_one(  # type: ignore
-                f"td.std:nth-child({index})"
-            ).get_text().strip()
+            modification_string = (
+                link.parent.parent.parent.select_one(f"td.std:nth-child({index})").get_text().strip()
+            )
             if match := re.search(r"\d+\.\d+.\d+ \d+:\d+", modification_string):
                 modification_time = datetime.strptime(match.group(0), "%d.%m.%Y %H:%M")
                 break
@@ -871,7 +855,7 @@ class IliasPage:
             log.warn(f"Could not determine upload time for {link}")
             modification_time = datetime.now()
 
-        title = link.parent.parent.parent.select_one("td.std:nth-child(3)").get_text().strip()  # type: ignore
+        title = link.parent.parent.parent.select_one("td.std:nth-child(3)").get_text().strip()
         title += ".mp4"
 
         video_name: str = _sanitize_path_name(title)
@@ -900,25 +884,29 @@ class IliasPage:
         results: list[IliasPageElement] = []
 
         if link := cast(Optional[Tag], self._soup.select_one("#tab_submission > a")):
-            results.append(IliasPageElement.create_new(
-                IliasElementType.EXERCISE_FILES,
-                self._abs_url_from_link(link),
-                "Submission"
-            ))
+            results.append(
+                IliasPageElement.create_new(
+                    IliasElementType.EXERCISE_FILES, self._abs_url_from_link(link), "Submission"
+                )
+            )
         else:
             log.explain("Found no submission link for exercise, maybe it has not started yet?")
 
         # Find all download links in the container (this will contain all the *feedback* files)
-        download_links = cast(list[Tag], self._soup.find_all(
-            name="a",
-            # download links contain the given command class
-            attrs={"href": lambda x: x is not None and "cmd=download" in x},
-            text="Download"
-        ))
+        download_links = cast(
+            list[Tag],
+            self._soup.find_all(
+                name="a",
+                # download links contain the given command class
+                attrs={"href": lambda x: x is not None and "cmd=download" in x},
+                text="Download",
+            ),
+        )
 
         for link in download_links:
-            parent_row: Tag = cast(Tag, link.find_parent(
-                attrs={"class": lambda x: x is not None and "row" in x}))
+            parent_row: Tag = cast(
+                Tag, link.find_parent(attrs={"class": lambda x: x is not None and "row" in x})
+            )
             name_tag = cast(Optional[Tag], parent_row.find(name="div"))
 
             if not name_tag:
@@ -929,11 +917,9 @@ class IliasPage:
             name = _sanitize_path_name(name_tag.get_text().strip())
             log.explain(f"Found exercise detail entry {name!r}")
 
-            results.append(IliasPageElement.create_new(
-                IliasElementType.FILE,
-                self._abs_url_from_link(link),
-                name
-            ))
+            results.append(
+                IliasPageElement.create_new(IliasElementType.FILE, self._abs_url_from_link(link), name)
+            )
 
         return results
 
@@ -941,12 +927,15 @@ class IliasPage:
         results: list[IliasPageElement] = []
 
         # Find all download links in the container
-        download_links = cast(list[Tag], self._soup.find_all(
-            name="a",
-            # download links contain the given command class
-            attrs={"href": lambda x: x is not None and "cmd=download" in x},
-            text="Download"
-        ))
+        download_links = cast(
+            list[Tag],
+            self._soup.find_all(
+                name="a",
+                # download links contain the given command class
+                attrs={"href": lambda x: x is not None and "cmd=download" in x},
+                text="Download",
+            ),
+        )
 
         for link in download_links:
             parent_row: Tag = cast(Tag, link.find_parent("tr"))
@@ -963,12 +952,9 @@ class IliasPage:
             if date is None:
                 log.warn(f"Date parsing failed for exercise file entry {name!r}")
 
-            results.append(IliasPageElement.create_new(
-                IliasElementType.FILE,
-                self._abs_url_from_link(link),
-                name,
-                date
-            ))
+            results.append(
+                IliasPageElement.create_new(IliasElementType.FILE, self._abs_url_from_link(link), name, date)
+            )
 
         return results
 
@@ -993,11 +979,11 @@ class IliasPage:
                 continue
 
             name = _sanitize_path_name(exercise.get_text().strip())
-            results.append(IliasPageElement.create_new(
-                IliasElementType.EXERCISE,
-                self._abs_url_from_link(exercise),
-                name
-            ))
+            results.append(
+                IliasPageElement.create_new(
+                    IliasElementType.EXERCISE, self._abs_url_from_link(exercise), name
+                )
+            )
 
         for result in results:
             log.explain(f"Found exercise {result.name!r}")
@@ -1043,13 +1029,11 @@ class IliasPage:
                 continue
 
             log.explain(f"Found {element_name!r} of type {element_type}")
-            result.append(IliasPageElement.create_new(
-                element_type,
-                abs_url,
-                element_name,
-                description=description,
-                skip_sanitize=True
-            ))
+            result.append(
+                IliasPageElement.create_new(
+                    element_type, abs_url, element_name, description=description, skip_sanitize=True
+                )
+            )
 
         result += self._find_cards()
         result += self._find_mediacast_videos()
@@ -1086,11 +1070,13 @@ class IliasPage:
                     if not title.endswith(".mp4") and not title.endswith(".webm"):
                         # just to make sure it has some kinda-alrightish ending
                         title = title + ".mp4"
-                    videos.append(IliasPageElement.create_new(
-                        typ=IliasElementType.MEDIACAST_VIDEO,
-                        url=self._abs_url_from_relative(cast(str, url)),
-                        name=_sanitize_path_name(title)
-                    ))
+                    videos.append(
+                        IliasPageElement.create_new(
+                            typ=IliasElementType.MEDIACAST_VIDEO,
+                            url=self._abs_url_from_relative(cast(str, url)),
+                            name=_sanitize_path_name(title),
+                        )
+                    )
 
         return videos
 
@@ -1114,12 +1100,11 @@ class IliasPage:
                 log.explain(f"Found external video at {url}, ignoring")
                 continue
 
-            videos.append(IliasPageElement.create_new(
-                typ=IliasElementType.MOB_VIDEO,
-                url=url,
-                name=_sanitize_path_name(title),
-                mtime=None
-            ))
+            videos.append(
+                IliasPageElement.create_new(
+                    typ=IliasElementType.MOB_VIDEO, url=url, name=_sanitize_path_name(title), mtime=None
+                )
+            )
 
         return videos
 
@@ -1161,11 +1146,11 @@ class IliasPage:
 
             # We should not crawl files under meetings
             if "ilContainerListItemContentCB" in cast(str, parent.get("class")):
-                link: Tag = parent.parent.find("a")  # type: ignore
+                link: Tag = parent.parent.find("a")
                 typ = IliasPage._find_type_for_element(
                     "meeting",
                     self._abs_url_from_link(link),
-                    lambda: IliasPage._find_icon_for_folder_entry(link)
+                    lambda: IliasPage._find_icon_for_folder_entry(link),
                 )
                 return typ == IliasElementType.MEETING
 
@@ -1191,9 +1176,11 @@ class IliasPage:
 
             # This is for these weird JS-y blocks and custom item groups
             if "ilContainerItemsContainer" in cast(str, parent.get("class")):
-                data_store_url = parent.parent.get("data-store-url", "").lower()  # type: ignore
-                is_custom_item_group = "baseclass=ilcontainerblockpropertiesstoragegui" in data_store_url \
-                                       and "cont_block_id=" in data_store_url
+                data_store_url = parent.parent.get("data-store-url", "").lower()
+                is_custom_item_group = (
+                    "baseclass=ilcontainerblockpropertiesstoragegui" in data_store_url
+                    and "cont_block_id=" in data_store_url
+                )
                 # I am currently under the impression that *only* those JS blocks have an
                 # ilNoDisplay class.
                 if not is_custom_item_group and "ilNoDisplay" not in cast(str, parent.get("class")):
@@ -1212,11 +1199,15 @@ class IliasPage:
 
         if outer_accordion_content:
             accordion_tag = cast(Tag, outer_accordion_content.parent)
-            head_tag = cast(Tag, accordion_tag.find(attrs={
-                "class": lambda x: x is not None and (
-                    "ilc_va_ihead_VAccordIHead" in x or "ilc_va_ihead_AccordIHead" in x
-                )
-            }))
+            head_tag = cast(
+                Tag,
+                accordion_tag.find(
+                    attrs={
+                        "class": lambda x: x is not None
+                        and ("ilc_va_ihead_VAccordIHead" in x or "ilc_va_ihead_AccordIHead" in x)
+                    }
+                ),
+            )
             found_titles.append(head_tag.get_text().strip())
 
         return [_sanitize_path_name(x) for x in reversed(found_titles)]
@@ -1224,14 +1215,12 @@ class IliasPage:
     @staticmethod
     def _find_link_description(link: Tag) -> Optional[str]:
         tile = cast(
-            Tag,
-            link.find_parent("div", {"class": lambda x: x is not None and "il_ContainerListItem" in x})
+            Tag, link.find_parent("div", {"class": lambda x: x is not None and "il_ContainerListItem" in x})
         )
         if not tile:
             return None
         description_element = cast(
-            Tag,
-            tile.find("div", {"class": lambda x: x is not None and "il_Description" in x})
+            Tag, tile.find("div", {"class": lambda x: x is not None and "il_Description" in x})
         )
         if not description_element:
             return None
@@ -1242,9 +1231,15 @@ class IliasPage:
         # Files have a list of properties (type, modification date, size, etc.)
         # In a series of divs.
         # Find the parent containing all those divs, so we can filter our what we need
-        properties_parent = cast(Tag, cast(Tag, link_element.find_parent(
-            "div", {"class": lambda x: x is not None and "il_ContainerListItem" in x}
-        )).select_one(".il_ItemProperties"))
+        properties_parent = cast(
+            Tag,
+            cast(
+                Tag,
+                link_element.find_parent(
+                    "div", {"class": lambda x: x is not None and "il_ContainerListItem" in x}
+                ),
+            ).select_one(".il_ItemProperties"),
+        )
         # The first one is always the filetype
         file_type = cast(Tag, properties_parent.select_one("span.il_ItemProperty")).get_text().strip()
 
@@ -1271,9 +1266,7 @@ class IliasPage:
         for title in card_titles:
             url = self._abs_url_from_link(title)
             name = _sanitize_path_name(title.get_text().strip())
-            typ = IliasPage._find_type_for_element(
-                name, url, lambda: IliasPage._find_icon_from_card(title)
-            )
+            typ = IliasPage._find_type_for_element(name, url, lambda: IliasPage._find_icon_from_card(title))
 
             if not typ:
                 _unexpected_html_warning()
@@ -1300,13 +1293,14 @@ class IliasPage:
                 continue
             url = self._abs_url_from_relative(open_match.group(1))
             name = _sanitize_path_name(button.get_text().strip())
-            typ = IliasPage._find_type_for_element(
-                name, url, lambda: IliasPage._find_icon_from_card(button)
+            typ = IliasPage._find_type_for_element(name, url, lambda: IliasPage._find_icon_from_card(button))
+            caption_parent = cast(
+                Tag,
+                button.find_parent(
+                    "div",
+                    attrs={"class": lambda x: x is not None and "caption" in x},
+                ),
             )
-            caption_parent = cast(Tag, button.find_parent(
-                "div",
-                attrs={"class": lambda x: x is not None and "caption" in x},
-            ))
             caption_container = caption_parent.find_next_sibling("div")
             if caption_container:
                 description = caption_container.get_text().strip()
@@ -1377,9 +1371,7 @@ class IliasPage:
 
         if found_parent is None:
             _unexpected_html_warning()
-            log.warn_contd(
-                f"Tried to figure out element type, but did not find an icon for {link_element!r}"
-            )
+            log.warn_contd(f"Tried to figure out element type, but did not find an icon for {link_element!r}")
             return None
 
         # Find the small descriptive icon to figure out the type
@@ -1389,8 +1381,7 @@ class IliasPage:
             img_tag = found_parent.select_one("img.icon")
 
         is_session_expansion_button = found_parent.find(
-            "a",
-            attrs={"href": lambda x: x is not None and ("crs_next_sess=" in x or "crs_prev_sess=" in x)}
+            "a", attrs={"href": lambda x: x is not None and ("crs_next_sess=" in x or "crs_prev_sess=" in x)}
         )
         if img_tag is None and is_session_expansion_button:
             log.explain("Found session expansion button, skipping it as it has no content")
@@ -1447,9 +1438,7 @@ class IliasPage:
         # Video listing embeds do not have complete ILIAS html. Try to match them by
         # their video listing table
         video_table = soup.find(
-            recursive=True,
-            name="table",
-            attrs={"id": lambda x: x is not None and x.startswith("tbl_xoct")}
+            recursive=True, name="table", attrs={"id": lambda x: x is not None and x.startswith("tbl_xoct")}
         )
         if video_table is not None:
             return True
@@ -1462,8 +1451,7 @@ class IliasPage:
     @staticmethod
     def _find_date_in_text(text: str) -> Optional[datetime]:
         modification_date_match = re.search(
-            r"(((\d+\. \w+ \d+)|(Gestern|Yesterday)|(Heute|Today)|(Morgen|Tomorrow)), \d+:\d+)",
-            text
+            r"(((\d+\. \w+ \d+)|(Gestern|Yesterday)|(Heute|Today)|(Morgen|Tomorrow)), \d+:\d+)", text
         )
         if modification_date_match is not None:
             modification_date_str = modification_date_match.group(1)
@@ -1501,8 +1489,8 @@ def _unexpected_html_warning() -> None:
     log.warn("Encountered unexpected HTML structure, ignoring element.")
 
 
-german_months = ['Jan', 'Feb', 'Mär', 'Apr', 'Mai', 'Jun', 'Jul', 'Aug', 'Sep', 'Okt', 'Nov', 'Dez']
-english_months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
+german_months = ["Jan", "Feb", "Mär", "Apr", "Mai", "Jun", "Jul", "Aug", "Sep", "Okt", "Nov", "Dez"]
+english_months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
 
 
 def demangle_date(date_str: str, fail_silently: bool = False) -> Optional[datetime]:
@@ -1579,7 +1567,7 @@ def parse_ilias_forum_export(forum_export: BeautifulSoup) -> list[IliasForumThre
 
         title = cast(Tag, p.find("b")).text
         if ":" in title:
-            title = title[title.find(":") + 1:]
+            title = title[title.find(":") + 1 :]
         title = title.strip()
 
         if not content_tag or content_tag.find_previous_sibling("p") != title_tag:
@@ -1604,7 +1592,7 @@ def _guess_timestamp_from_forum_post_content(content: Tag) -> Optional[datetime]
 
     for post in posts:
         text = post.text.strip()
-        text = text[text.rfind("|") + 1:]
+        text = text[text.rfind("|") + 1 :]
         date = demangle_date(text, fail_silently=True)
         if not date:
             continue
diff --git a/PFERD/crawl/ilias/shibboleth_login.py b/PFERD/crawl/ilias/shibboleth_login.py
index 7e725f0..bdff4ea 100644
--- a/PFERD/crawl/ilias/shibboleth_login.py
+++ b/PFERD/crawl/ilias/shibboleth_login.py
@@ -38,9 +38,7 @@ class ShibbolethLogin:
         async with sess.get(url) as response:
             shib_url = response.url
             if str(shib_url).startswith(self._ilias_url):
-                log.explain(
-                    "ILIAS recognized our shib token and logged us in in the background, returning"
-                )
+                log.explain("ILIAS recognized our shib token and logged us in in the background, returning")
                 return
             soup: BeautifulSoup = soupify(await response.read())
 
@@ -62,7 +60,7 @@ class ShibbolethLogin:
                 "fudis_web_authn_assertion_input": "",
             }
             if csrf_token_input := form.find("input", {"name": "csrf_token"}):
-                data["csrf_token"] = csrf_token_input["value"]  # type: ignore
+                data["csrf_token"] = csrf_token_input["value"]
             soup = await _post(sess, url, data)
 
             if soup.find(id="attributeRelease"):
@@ -81,7 +79,7 @@ class ShibbolethLogin:
         # (or clicking "Continue" if you have JS disabled)
         relay_state = cast(Tag, soup.find("input", {"name": "RelayState"}))
         saml_response = cast(Tag, soup.find("input", {"name": "SAMLResponse"}))
-        url = form = soup.find("form", {"method": "post"})["action"]  # type: ignore
+        url = form = soup.find("form", {"method": "post"})["action"]
         data = {  # using the info obtained in the while loop above
             "RelayState": cast(str, relay_state["value"]),
             "SAMLResponse": cast(str, saml_response["value"]),
@@ -110,7 +108,7 @@ class ShibbolethLogin:
             "fudis_otp_input": tfa_token,
         }
         if csrf_token_input := form.find("input", {"name": "csrf_token"}):
-            data["csrf_token"] = csrf_token_input["value"]  # type: ignore
+            data["csrf_token"] = csrf_token_input["value"]
         return await _post(session, url, data)
 
     @staticmethod
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index 21d9dec..f47c969 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -53,12 +53,11 @@ class KitIpdFolder:
 
 
 class KitIpdCrawler(HttpCrawler):
-
     def __init__(
-            self,
-            name: str,
-            section: KitIpdCrawlerSection,
-            config: Config,
+        self,
+        name: str,
+        section: KitIpdCrawlerSection,
+        config: Config,
     ):
         super().__init__(name, section, config)
         self._url = section.target()
@@ -104,11 +103,7 @@ class KitIpdCrawler(HttpCrawler):
         await self.gather(tasks)
 
     async def _download_file(
-        self,
-        parent: PurePath,
-        file: KitIpdFile,
-        etag: Optional[str],
-        mtime: Optional[datetime]
+        self, parent: PurePath, file: KitIpdFile, etag: Optional[str], mtime: Optional[datetime]
     ) -> None:
         element_path = parent / file.name
 
diff --git a/PFERD/crawl/local_crawler.py b/PFERD/crawl/local_crawler.py
index f102bc9..dfc6f65 100644
--- a/PFERD/crawl/local_crawler.py
+++ b/PFERD/crawl/local_crawler.py
@@ -18,31 +18,28 @@ class LocalCrawlerSection(CrawlerSection):
     def crawl_delay(self) -> float:
         value = self.s.getfloat("crawl_delay", fallback=0.0)
         if value < 0:
-            self.invalid_value("crawl_delay", value,
-                               "Must not be negative")
+            self.invalid_value("crawl_delay", value, "Must not be negative")
         return value
 
     def download_delay(self) -> float:
         value = self.s.getfloat("download_delay", fallback=0.0)
         if value < 0:
-            self.invalid_value("download_delay", value,
-                               "Must not be negative")
+            self.invalid_value("download_delay", value, "Must not be negative")
         return value
 
     def download_speed(self) -> Optional[int]:
         value = self.s.getint("download_speed")
         if value is not None and value <= 0:
-            self.invalid_value("download_speed", value,
-                               "Must be greater than 0")
+            self.invalid_value("download_speed", value, "Must be greater than 0")
         return value
 
 
 class LocalCrawler(Crawler):
     def __init__(
-            self,
-            name: str,
-            section: LocalCrawlerSection,
-            config: Config,
+        self,
+        name: str,
+        section: LocalCrawlerSection,
+        config: Config,
     ):
         super().__init__(name, section, config)
 
@@ -74,10 +71,12 @@ class LocalCrawler(Crawler):
         tasks = []
 
         async with cl:
-            await asyncio.sleep(random.uniform(
-                0.5 * self._crawl_delay,
-                self._crawl_delay,
-            ))
+            await asyncio.sleep(
+                random.uniform(
+                    0.5 * self._crawl_delay,
+                    self._crawl_delay,
+                )
+            )
 
             for child in path.iterdir():
                 pure_child = cl.path / child.name
@@ -93,10 +92,12 @@ class LocalCrawler(Crawler):
             return
 
         async with dl as (bar, sink):
-            await asyncio.sleep(random.uniform(
-                0.5 * self._download_delay,
-                self._download_delay,
-            ))
+            await asyncio.sleep(
+                random.uniform(
+                    0.5 * self._download_delay,
+                    self._download_delay,
+                )
+            )
 
             bar.set_total(stat.st_size)
 
diff --git a/PFERD/deduplicator.py b/PFERD/deduplicator.py
index 559addb..c204726 100644
--- a/PFERD/deduplicator.py
+++ b/PFERD/deduplicator.py
@@ -16,9 +16,28 @@ def name_variants(path: PurePath) -> Iterator[PurePath]:
 class Deduplicator:
     FORBIDDEN_CHARS = '<>:"/\\|?*' + "".join([chr(i) for i in range(0, 32)])
     FORBIDDEN_NAMES = {
-        "CON", "PRN", "AUX", "NUL",
-        "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9",
-        "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
+        "CON",
+        "PRN",
+        "AUX",
+        "NUL",
+        "COM1",
+        "COM2",
+        "COM3",
+        "COM4",
+        "COM5",
+        "COM6",
+        "COM7",
+        "COM8",
+        "COM9",
+        "LPT1",
+        "LPT2",
+        "LPT3",
+        "LPT4",
+        "LPT5",
+        "LPT6",
+        "LPT7",
+        "LPT8",
+        "LPT9",
     }
 
     def __init__(self, windows_paths: bool) -> None:
diff --git a/PFERD/limiter.py b/PFERD/limiter.py
index 3122a7a..49de0ed 100644
--- a/PFERD/limiter.py
+++ b/PFERD/limiter.py
@@ -12,12 +12,7 @@ class Slot:
 
 
 class Limiter:
-    def __init__(
-            self,
-            task_limit: int,
-            download_limit: int,
-            task_delay: float
-    ):
+    def __init__(self, task_limit: int, download_limit: int, task_delay: float):
         if task_limit <= 0:
             raise ValueError("task limit must be at least 1")
         if download_limit <= 0:
diff --git a/PFERD/logging.py b/PFERD/logging.py
index c19e4a0..e371494 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -8,8 +8,15 @@ from rich.console import Console, Group
 from rich.live import Live
 from rich.markup import escape
 from rich.panel import Panel
-from rich.progress import (BarColumn, DownloadColumn, Progress, TaskID, TextColumn, TimeRemainingColumn,
-                           TransferSpeedColumn)
+from rich.progress import (
+    BarColumn,
+    DownloadColumn,
+    Progress,
+    TaskID,
+    TextColumn,
+    TimeRemainingColumn,
+    TransferSpeedColumn,
+)
 from rich.table import Column
 
 
@@ -176,10 +183,14 @@ class Log:
         # Our print function doesn't take types other than strings, but the
         # underlying rich.print function does. This call is a special case
         # anyways, and we're calling it internally, so this should be fine.
-        self.print(Panel.fit("""
+        self.print(
+            Panel.fit(
+                """
 Please copy your program output and send it to the PFERD maintainers, either
 directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
-        """.strip()))  # type: ignore
+        """.strip()
+            )
+        )
 
     def explain_topic(self, text: str) -> None:
         """
@@ -236,10 +247,10 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
 
     @contextmanager
     def _bar(
-            self,
-            progress: Progress,
-            description: str,
-            total: Optional[float],
+        self,
+        progress: Progress,
+        description: str,
+        total: Optional[float],
     ) -> Iterator[ProgressBar]:
         if total is None:
             # Indeterminate progress bar
@@ -255,11 +266,11 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
             self._update_live()
 
     def crawl_bar(
-            self,
-            style: str,
-            action: str,
-            text: str,
-            total: Optional[float] = None,
+        self,
+        style: str,
+        action: str,
+        text: str,
+        total: Optional[float] = None,
     ) -> AbstractContextManager[ProgressBar]:
         """
         Allows markup in the "style" argument which will be applied to the
@@ -271,11 +282,11 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
         return self._bar(self._crawl_progress, description, total)
 
     def download_bar(
-            self,
-            style: str,
-            action: str,
-            text: str,
-            total: Optional[float] = None,
+        self,
+        style: str,
+        action: str,
+        text: str,
+        total: Optional[float] = None,
     ) -> AbstractContextManager[ProgressBar]:
         """
         Allows markup in the "style" argument which will be applied to the
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 94337b6..c452c0f 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -35,8 +35,7 @@ class Redownload(Enum):
         try:
             return Redownload(string)
         except ValueError:
-            raise ValueError("must be one of 'never', 'never-smart',"
-                             " 'always', 'always-smart'")
+            raise ValueError("must be one of 'never', 'never-smart', 'always', 'always-smart'")
 
 
 class OnConflict(Enum):
@@ -51,8 +50,10 @@ class OnConflict(Enum):
         try:
             return OnConflict(string)
         except ValueError:
-            raise ValueError("must be one of 'prompt', 'local-first',"
-                             " 'remote-first', 'no-delete', 'no-delete-prompt-overwrite'")
+            raise ValueError(
+                "must be one of 'prompt', 'local-first',"
+                " 'remote-first', 'no-delete', 'no-delete-prompt-overwrite'"
+            )
 
 
 @dataclass
@@ -96,13 +97,13 @@ class FileSinkToken(ReusableAsyncContextManager[FileSink]):
     # download handed back to the OutputDirectory.
 
     def __init__(
-            self,
-            output_dir: "OutputDirectory",
-            remote_path: PurePath,
-            path: PurePath,
-            local_path: Path,
-            heuristics: Heuristics,
-            on_conflict: OnConflict,
+        self,
+        output_dir: "OutputDirectory",
+        remote_path: PurePath,
+        path: PurePath,
+        local_path: Path,
+        heuristics: Heuristics,
+        on_conflict: OnConflict,
     ):
         super().__init__()
 
@@ -118,15 +119,17 @@ class FileSinkToken(ReusableAsyncContextManager[FileSink]):
         sink = FileSink(file)
 
         async def after_download() -> None:
-            await self._output_dir._after_download(DownloadInfo(
-                self._remote_path,
-                self._path,
-                self._local_path,
-                tmp_path,
-                self._heuristics,
-                self._on_conflict,
-                sink.is_done(),
-            ))
+            await self._output_dir._after_download(
+                DownloadInfo(
+                    self._remote_path,
+                    self._path,
+                    self._local_path,
+                    tmp_path,
+                    self._heuristics,
+                    self._on_conflict,
+                    sink.is_done(),
+                )
+            )
 
         self._stack.push_async_callback(after_download)
         self._stack.enter_context(file)
@@ -138,10 +141,10 @@ class OutputDirectory:
     REPORT_FILE = PurePath(".report")
 
     def __init__(
-            self,
-            root: Path,
-            redownload: Redownload,
-            on_conflict: OnConflict,
+        self,
+        root: Path,
+        redownload: Redownload,
+        on_conflict: OnConflict,
     ):
         if os.name == "nt":
             # Windows limits the path length to 260 for some historical reason.
@@ -193,11 +196,11 @@ class OutputDirectory:
         return self._root / path
 
     def _should_download(
-            self,
-            local_path: Path,
-            heuristics: Heuristics,
-            redownload: Redownload,
-            on_conflict: OnConflict,
+        self,
+        local_path: Path,
+        heuristics: Heuristics,
+        redownload: Redownload,
+        on_conflict: OnConflict,
     ) -> bool:
         if not local_path.exists():
             log.explain("No corresponding file present locally")
@@ -270,9 +273,9 @@ class OutputDirectory:
     # files.
 
     async def _conflict_lfrf(
-            self,
-            on_conflict: OnConflict,
-            path: PurePath,
+        self,
+        on_conflict: OnConflict,
+        path: PurePath,
     ) -> bool:
         if on_conflict in {OnConflict.PROMPT, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
             async with log.exclusive_output():
@@ -289,9 +292,9 @@ class OutputDirectory:
         raise ValueError(f"{on_conflict!r} is not a valid conflict policy")
 
     async def _conflict_ldrf(
-            self,
-            on_conflict: OnConflict,
-            path: PurePath,
+        self,
+        on_conflict: OnConflict,
+        path: PurePath,
     ) -> bool:
         if on_conflict in {OnConflict.PROMPT, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
             async with log.exclusive_output():
@@ -308,10 +311,10 @@ class OutputDirectory:
         raise ValueError(f"{on_conflict!r} is not a valid conflict policy")
 
     async def _conflict_lfrd(
-            self,
-            on_conflict: OnConflict,
-            path: PurePath,
-            parent: PurePath,
+        self,
+        on_conflict: OnConflict,
+        path: PurePath,
+        parent: PurePath,
     ) -> bool:
         if on_conflict in {OnConflict.PROMPT, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
             async with log.exclusive_output():
@@ -328,9 +331,9 @@ class OutputDirectory:
         raise ValueError(f"{on_conflict!r} is not a valid conflict policy")
 
     async def _conflict_delete_lf(
-            self,
-            on_conflict: OnConflict,
-            path: PurePath,
+        self,
+        on_conflict: OnConflict,
+        path: PurePath,
     ) -> bool:
         if on_conflict == OnConflict.PROMPT:
             async with log.exclusive_output():
@@ -353,8 +356,8 @@ class OutputDirectory:
         return base.parent / name
 
     async def _create_tmp_file(
-            self,
-            local_path: Path,
+        self,
+        local_path: Path,
     ) -> Tuple[Path, BinaryIO]:
         """
         May raise an OutputDirError.
@@ -388,14 +391,14 @@ class OutputDirectory:
         return self._should_download(local_path, heuristics, redownload, on_conflict)
 
     async def download(
-            self,
-            remote_path: PurePath,
-            path: PurePath,
-            *,
-            etag_differs: Optional[bool] = None,
-            mtime: Optional[datetime] = None,
-            redownload: Optional[Redownload] = None,
-            on_conflict: Optional[OnConflict] = None,
+        self,
+        remote_path: PurePath,
+        path: PurePath,
+        *,
+        etag_differs: Optional[bool] = None,
+        mtime: Optional[datetime] = None,
+        redownload: Optional[Redownload] = None,
+        on_conflict: Optional[OnConflict] = None,
     ) -> Optional[FileSinkToken]:
         """
         May throw an OutputDirError, a MarkDuplicateError or a
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index ca2e5b7..c805c13 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -66,10 +66,10 @@ class Pferd:
         return crawlers_to_run
 
     def _find_crawlers_to_run(
-            self,
-            config: Config,
-            cli_crawlers: Optional[List[str]],
-            cli_skips: Optional[List[str]],
+        self,
+        config: Config,
+        cli_crawlers: Optional[List[str]],
+        cli_skips: Optional[List[str]],
     ) -> List[str]:
         log.explain_topic("Deciding which crawlers to run")
 
diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index a48c827..96b5ca7 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -208,7 +208,7 @@ class Line:
 
     @property
     def rest(self) -> str:
-        return self.line[self.index:]
+        return self.line[self.index :]
 
     def peek(self, amount: int = 1) -> str:
         return self.rest[:amount]
@@ -327,21 +327,27 @@ def parse_right(line: Line) -> Union[str, Ignore]:
 
 
 def parse_arrow_name(line: Line) -> str:
-    return line.one_of([
-        lambda: line.expect("exact-re"),
-        lambda: line.expect("exact"),
-        lambda: line.expect("name-re"),
-        lambda: line.expect("name"),
-        lambda: line.expect("re"),
-        lambda: line.expect(""),
-    ], "Expected arrow name")
+    return line.one_of(
+        [
+            lambda: line.expect("exact-re"),
+            lambda: line.expect("exact"),
+            lambda: line.expect("name-re"),
+            lambda: line.expect("name"),
+            lambda: line.expect("re"),
+            lambda: line.expect(""),
+        ],
+        "Expected arrow name",
+    )
 
 
 def parse_arrow_head(line: Line) -> ArrowHead:
-    return line.one_of([
-        lambda: line.expect_with(">>", ArrowHead.SEQUENCE),
-        lambda: line.expect_with(">", ArrowHead.NORMAL),
-    ], "Expected arrow head")
+    return line.one_of(
+        [
+            lambda: line.expect_with(">>", ArrowHead.SEQUENCE),
+            lambda: line.expect_with(">", ArrowHead.NORMAL),
+        ],
+        "Expected arrow head",
+    )
 
 
 def parse_eol(line: Line) -> None:
@@ -413,12 +419,12 @@ class Transformer:
 
     def transform(self, path: PurePath) -> Optional[PurePath]:
         for i, (line, tf) in enumerate(self._tfs):
-            log.explain(f"Testing rule {i+1}: {line}")
+            log.explain(f"Testing rule {i + 1}: {line}")
 
             try:
                 result = tf.transform(path)
             except Exception as e:
-                log.warn(f"Error while testing rule {i+1}: {line}")
+                log.warn(f"Error while testing rule {i + 1}: {line}")
                 log.warn_contd(str(e))
                 continue
 
diff --git a/PFERD/utils.py b/PFERD/utils.py
index 7c7b6f4..acd282e 100644
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -131,10 +131,10 @@ class ReusableAsyncContextManager(ABC, Generic[T]):
         return result
 
     async def __aexit__(
-            self,
-            exc_type: Optional[Type[BaseException]],
-            exc_value: Optional[BaseException],
-            traceback: Optional[TracebackType],
+        self,
+        exc_type: Optional[Type[BaseException]],
+        exc_value: Optional[BaseException],
+        traceback: Optional[TracebackType],
     ) -> Optional[bool]:
         if not self._active:
             raise RuntimeError("__aexit__ called too many times")
diff --git a/pyproject.toml b/pyproject.toml
index e22fe85..9d4460b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,16 +20,29 @@ pferd = "PFERD.__main__:main"
 [tool.setuptools.dynamic]
 version = {attr = "PFERD.version.VERSION"}
 
-[tool.flake8]
-max-line-length = 110
+[tool.ruff]
+line-length = 110
 
-[tool.isort]
-line_length = 110
-
-[tool.autopep8]
-max_line_length = 110
-in-place = true
-recursive = true
+[tool.ruff.lint]
+select = [
+    # pycodestyle
+    "E",
+    # Pyflakes
+    "F",
+    # pyupgrade
+    "UP",
+    # flake8-bugbear
+    "B",
+    # flake8-simplify
+    "SIM",
+    # isort
+    "I",
+]
+ignore = [
+  "UP045",
+  "SIM114",
+  "B023"
+]
 
 [tool.mypy]
 disallow_any_generics = true
diff --git a/scripts/check b/scripts/check
index 6f4f4c2..cce6a38 100755
--- a/scripts/check
+++ b/scripts/check
@@ -3,4 +3,4 @@
 set -e
 
 mypy .
-flake8 PFERD --max-line-length 110
+ruff check
diff --git a/scripts/format b/scripts/format
index 981cd75..38b10fd 100755
--- a/scripts/format
+++ b/scripts/format
@@ -2,5 +2,4 @@
 
 set -e
 
-autopep8 .
-isort .
+ruff format
diff --git a/uv.lock b/uv.lock
new file mode 100644
index 0000000..eba384b
--- /dev/null
+++ b/uv.lock
@@ -0,0 +1,905 @@
+version = 1
+revision = 3
+requires-python = ">=3.11"
+
+[[package]]
+name = "aiohappyeyeballs"
+version = "2.6.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/26/30/f84a107a9c4331c14b2b586036f40965c128aa4fee4dda5d3d51cb14ad54/aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558", size = 22760, upload-time = "2025-03-12T01:42:48.764Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0f/15/5bf3b99495fb160b63f95972b81750f18f7f4e02ad051373b669d17d44f2/aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8", size = 15265, upload-time = "2025-03-12T01:42:47.083Z" },
+]
+
+[[package]]
+name = "aiohttp"
+version = "3.13.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "aiohappyeyeballs" },
+    { name = "aiosignal" },
+    { name = "attrs" },
+    { name = "frozenlist" },
+    { name = "multidict" },
+    { name = "propcache" },
+    { name = "yarl" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ba/fa/3ae643cd525cf6844d3dc810481e5748107368eb49563c15a5fb9f680750/aiohttp-3.13.1.tar.gz", hash = "sha256:4b7ee9c355015813a6aa085170b96ec22315dabc3d866fd77d147927000e9464", size = 7835344, upload-time = "2025-10-17T14:03:29.337Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/be/2c/739d03730ffce57d2093e2e611e1541ac9a4b3bb88288c33275058b9ffc2/aiohttp-3.13.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9eefa0a891e85dca56e2d00760945a6325bd76341ec386d3ad4ff72eb97b7e64", size = 742004, upload-time = "2025-10-17T13:59:29.73Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/f8/7f5b7f7184d7c80e421dbaecbd13e0b2a0bb8663fd0406864f9a167a438c/aiohttp-3.13.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6c20eb646371a5a57a97de67e52aac6c47badb1564e719b3601bbb557a2e8fd0", size = 495601, upload-time = "2025-10-17T13:59:31.312Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/af/fb78d028b9642dd33ff127d9a6a151586f33daff631b05250fecd0ab23f8/aiohttp-3.13.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bfc28038cd86fb1deed5cc75c8fda45c6b0f5c51dfd76f8c63d3d22dc1ab3d1b", size = 491790, upload-time = "2025-10-17T13:59:33.304Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/ae/e40e422ee995e4f91f7f087b86304e3dd622d3a5b9ca902a1e94ebf9a117/aiohttp-3.13.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b22eeffca2e522451990c31a36fe0e71079e6112159f39a4391f1c1e259a795", size = 1746350, upload-time = "2025-10-17T13:59:35.158Z" },
+    { url = "https://files.pythonhosted.org/packages/28/a5/fe6022bb869bf2d2633b155ed8348d76358c22d5ff9692a15016b2d1019f/aiohttp-3.13.1-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:65782b2977c05ebd78787e3c834abe499313bf69d6b8be4ff9c340901ee7541f", size = 1703046, upload-time = "2025-10-17T13:59:37.077Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/a5/c4ef3617d7cdc49f2d5af077f19794946f0f2d94b93c631ace79047361a2/aiohttp-3.13.1-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:dacba54f9be3702eb866b0b9966754b475e1e39996e29e442c3cd7f1117b43a9", size = 1806161, upload-time = "2025-10-17T13:59:38.837Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/45/b87d2430aee7e7d00b24e3dff2c5bd69f21017f6edb19cfd91e514664fc8/aiohttp-3.13.1-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:aa878da718e8235302c365e376b768035add36b55177706d784a122cb822a6a4", size = 1894546, upload-time = "2025-10-17T13:59:40.741Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/a2/79eb466786a7f11a0292c353a8a9b95e88268c48c389239d7531d66dbb48/aiohttp-3.13.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e4b4e607fbd4964d65945a7b9d1e7f98b0d5545736ea613f77d5a2a37ff1e46", size = 1745683, upload-time = "2025-10-17T13:59:42.59Z" },
+    { url = "https://files.pythonhosted.org/packages/93/1a/153b0ad694f377e94eacc85338efe03ed4776a396c8bb47bd9227135792a/aiohttp-3.13.1-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0c3db2d0e5477ad561bf7ba978c3ae5f8f78afda70daa05020179f759578754f", size = 1605418, upload-time = "2025-10-17T13:59:45.229Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/4e/18605b1bfeb4b00d3396d833647cdb213118e2a96862e5aebee62ad065b4/aiohttp-3.13.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9739d34506fdf59bf2c092560d502aa728b8cdb33f34ba15fb5e2852c35dd829", size = 1722379, upload-time = "2025-10-17T13:59:46.969Z" },
+    { url = "https://files.pythonhosted.org/packages/72/13/0a38ad385d547fb283e0e1fe1ff1dff8899bd4ed0aaceeb13ec14abbf136/aiohttp-3.13.1-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:b902e30a268a85d50197b4997edc6e78842c14c0703450f632c2d82f17577845", size = 1716693, upload-time = "2025-10-17T13:59:49.217Z" },
+    { url = "https://files.pythonhosted.org/packages/55/65/7029d7573ab9009adde380052c6130d02c8db52195fda112db35e914fe7b/aiohttp-3.13.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:1bbfc04c8de7def6504cce0a97f9885a5c805fd2395a0634bc10f9d6ecb42524", size = 1784174, upload-time = "2025-10-17T13:59:51.439Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/36/fd46e39cb85418e45b0e4a8bfc39651ee0b8f08ea006adf217a221cdb269/aiohttp-3.13.1-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:6941853405a38a5eeb7d9776db77698df373ff7fa8c765cb81ea14a344fccbeb", size = 1593716, upload-time = "2025-10-17T13:59:53.367Z" },
+    { url = "https://files.pythonhosted.org/packages/85/b8/188e0cb1be37b4408373171070fda17c3bf9c67c0d3d4fd5ee5b1fa108e1/aiohttp-3.13.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:7764adcd2dc8bd21c8228a53dda2005428498dc4d165f41b6086f0ac1c65b1c9", size = 1799254, upload-time = "2025-10-17T13:59:55.352Z" },
+    { url = "https://files.pythonhosted.org/packages/67/ff/fdf768764eb427b0cc9ebb2cebddf990f94d98b430679f8383c35aa114be/aiohttp-3.13.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c09e08d38586fa59e5a2f9626505a0326fadb8e9c45550f029feeb92097a0afc", size = 1738122, upload-time = "2025-10-17T13:59:57.263Z" },
+    { url = "https://files.pythonhosted.org/packages/94/84/fce7a4d575943394d7c0e632273838eb6f39de8edf25386017bf5f0de23b/aiohttp-3.13.1-cp311-cp311-win32.whl", hash = "sha256:ce1371675e74f6cf271d0b5530defb44cce713fd0ab733713562b3a2b870815c", size = 430491, upload-time = "2025-10-17T13:59:59.466Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/d2/d21b8ab6315a5d588c550ab285b4f02ae363edf012920e597904c5a56608/aiohttp-3.13.1-cp311-cp311-win_amd64.whl", hash = "sha256:77a2f5cc28cf4704cc157be135c6a6cfb38c9dea478004f1c0fd7449cf445c28", size = 454808, upload-time = "2025-10-17T14:00:01.247Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/72/d463a10bf29871f6e3f63bcf3c91362dc4d72ed5917a8271f96672c415ad/aiohttp-3.13.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0760bd9a28efe188d77b7c3fe666e6ef74320d0f5b105f2e931c7a7e884c8230", size = 736218, upload-time = "2025-10-17T14:00:03.51Z" },
+    { url = "https://files.pythonhosted.org/packages/26/13/f7bccedbe52ea5a6eef1e4ebb686a8d7765319dfd0a5939f4238cb6e79e6/aiohttp-3.13.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7129a424b441c3fe018a414401bf1b9e1d49492445f5676a3aecf4f74f67fcdb", size = 491251, upload-time = "2025-10-17T14:00:05.756Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/7c/7ea51b5aed6cc69c873f62548da8345032aa3416336f2d26869d4d37b4a2/aiohttp-3.13.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e1cb04ae64a594f6ddf5cbb024aba6b4773895ab6ecbc579d60414f8115e9e26", size = 490394, upload-time = "2025-10-17T14:00:07.504Z" },
+    { url = "https://files.pythonhosted.org/packages/31/05/1172cc4af4557f6522efdee6eb2b9f900e1e320a97e25dffd3c5a6af651b/aiohttp-3.13.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:782d656a641e755decd6bd98d61d2a8ea062fd45fd3ff8d4173605dd0d2b56a1", size = 1737455, upload-time = "2025-10-17T14:00:09.403Z" },
+    { url = "https://files.pythonhosted.org/packages/24/3d/ce6e4eca42f797d6b1cd3053cf3b0a22032eef3e4d1e71b9e93c92a3f201/aiohttp-3.13.1-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:f92ad8169767429a6d2237331726c03ccc5f245222f9373aa045510976af2b35", size = 1699176, upload-time = "2025-10-17T14:00:11.314Z" },
+    { url = "https://files.pythonhosted.org/packages/25/04/7127ba55653e04da51477372566b16ae786ef854e06222a1c96b4ba6c8ef/aiohttp-3.13.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:0e778f634ca50ec005eefa2253856921c429581422d887be050f2c1c92e5ce12", size = 1767216, upload-time = "2025-10-17T14:00:13.668Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/3b/43bca1e75847e600f40df829a6b2f0f4e1d4c70fb6c4818fdc09a462afd5/aiohttp-3.13.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:9bc36b41cf4aab5d3b34d22934a696ab83516603d1bc1f3e4ff9930fe7d245e5", size = 1865870, upload-time = "2025-10-17T14:00:15.852Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/69/b204e5d43384197a614c88c1717c324319f5b4e7d0a1b5118da583028d40/aiohttp-3.13.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3fd4570ea696aee27204dd524f287127ed0966d14d309dc8cc440f474e3e7dbd", size = 1751021, upload-time = "2025-10-17T14:00:18.297Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/af/845dc6b6fdf378791d720364bf5150f80d22c990f7e3a42331d93b337cc7/aiohttp-3.13.1-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7bda795f08b8a620836ebfb0926f7973972a4bf8c74fdf9145e489f88c416811", size = 1561448, upload-time = "2025-10-17T14:00:20.152Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/91/d2ab08cd77ed76a49e4106b1cfb60bce2768242dd0c4f9ec0cb01e2cbf94/aiohttp-3.13.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:055a51d90e351aae53dcf324d0eafb2abe5b576d3ea1ec03827d920cf81a1c15", size = 1698196, upload-time = "2025-10-17T14:00:22.131Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/d1/082f0620dc428ecb8f21c08a191a4694915cd50f14791c74a24d9161cc50/aiohttp-3.13.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:d4131df864cbcc09bb16d3612a682af0db52f10736e71312574d90f16406a867", size = 1719252, upload-time = "2025-10-17T14:00:24.453Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/78/2af2f44491be7b08e43945b72d2b4fd76f0a14ba850ba9e41d28a7ce716a/aiohttp-3.13.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:163d3226e043f79bf47c87f8dfc89c496cc7bc9128cb7055ce026e435d551720", size = 1736529, upload-time = "2025-10-17T14:00:26.567Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/34/3e919ecdc93edaea8d140138049a0d9126141072e519535e2efa38eb7a02/aiohttp-3.13.1-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:a2370986a3b75c1a5f3d6f6d763fc6be4b430226577b0ed16a7c13a75bf43d8f", size = 1553723, upload-time = "2025-10-17T14:00:28.592Z" },
+    { url = "https://files.pythonhosted.org/packages/21/4b/d8003aeda2f67f359b37e70a5a4b53fee336d8e89511ac307ff62aeefcdb/aiohttp-3.13.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:d7c14de0c7c9f1e6e785ce6cbe0ed817282c2af0012e674f45b4e58c6d4ea030", size = 1763394, upload-time = "2025-10-17T14:00:31.051Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/7b/1dbe6a39e33af9baaafc3fc016a280663684af47ba9f0e5d44249c1f72ec/aiohttp-3.13.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bb611489cf0db10b99beeb7280bd39e0ef72bc3eb6d8c0f0a16d8a56075d1eb7", size = 1718104, upload-time = "2025-10-17T14:00:33.407Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/88/bd1b38687257cce67681b9b0fa0b16437be03383fa1be4d1a45b168bef25/aiohttp-3.13.1-cp312-cp312-win32.whl", hash = "sha256:f90fe0ee75590f7428f7c8b5479389d985d83c949ea10f662ab928a5ed5cf5e6", size = 425303, upload-time = "2025-10-17T14:00:35.829Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/e3/4481f50dd6f27e9e58c19a60cff44029641640237e35d32b04aaee8cf95f/aiohttp-3.13.1-cp312-cp312-win_amd64.whl", hash = "sha256:3461919a9dca272c183055f2aab8e6af0adc810a1b386cce28da11eb00c859d9", size = 452071, upload-time = "2025-10-17T14:00:37.764Z" },
+    { url = "https://files.pythonhosted.org/packages/16/6d/d267b132342e1080f4c1bb7e1b4e96b168b3cbce931ec45780bff693ff95/aiohttp-3.13.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:55785a7f8f13df0c9ca30b5243d9909bd59f48b274262a8fe78cee0828306e5d", size = 730727, upload-time = "2025-10-17T14:00:39.681Z" },
+    { url = "https://files.pythonhosted.org/packages/92/c8/1cf495bac85cf71b80fad5f6d7693e84894f11b9fe876b64b0a1e7cbf32f/aiohttp-3.13.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4bef5b83296cebb8167707b4f8d06c1805db0af632f7a72d7c5288a84667e7c3", size = 488678, upload-time = "2025-10-17T14:00:41.541Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/19/23c6b81cca587ec96943d977a58d11d05a82837022e65cd5502d665a7d11/aiohttp-3.13.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:27af0619c33f9ca52f06069ec05de1a357033449ab101836f431768ecfa63ff5", size = 487637, upload-time = "2025-10-17T14:00:43.527Z" },
+    { url = "https://files.pythonhosted.org/packages/48/58/8f9464afb88b3eed145ad7c665293739b3a6f91589694a2bb7e5778cbc72/aiohttp-3.13.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a47fe43229a8efd3764ef7728a5c1158f31cdf2a12151fe99fde81c9ac87019c", size = 1718975, upload-time = "2025-10-17T14:00:45.496Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/8b/c3da064ca392b2702f53949fd7c403afa38d9ee10bf52c6ad59a42537103/aiohttp-3.13.1-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:6e68e126de5b46e8b2bee73cab086b5d791e7dc192056916077aa1e2e2b04437", size = 1686905, upload-time = "2025-10-17T14:00:47.707Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/a4/9c8a3843ecf526daee6010af1a66eb62579be1531d2d5af48ea6f405ad3c/aiohttp-3.13.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e65ef49dd22514329c55970d39079618a8abf856bae7147913bb774a3ab3c02f", size = 1754907, upload-time = "2025-10-17T14:00:49.702Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/80/1f470ed93e06436e3fc2659a9fc329c192fa893fb7ed4e884d399dbfb2a8/aiohttp-3.13.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0e425a7e0511648b3376839dcc9190098671a47f21a36e815b97762eb7d556b0", size = 1857129, upload-time = "2025-10-17T14:00:51.822Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/e6/33d305e6cce0a8daeb79c7d8d6547d6e5f27f4e35fa4883fc9c9eb638596/aiohttp-3.13.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:010dc9b7110f055006acd3648d5d5955bb6473b37c3663ec42a1b4cba7413e6b", size = 1738189, upload-time = "2025-10-17T14:00:53.976Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/42/8df03367e5a64327fe0c39291080697795430c438fc1139c7cc1831aa1df/aiohttp-3.13.1-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:1b5c722d0ca5f57d61066b5dfa96cdb87111e2519156b35c1f8dd17c703bee7a", size = 1553608, upload-time = "2025-10-17T14:00:56.144Z" },
+    { url = "https://files.pythonhosted.org/packages/96/17/6d5c73cd862f1cf29fddcbb54aac147037ff70a043a2829d03a379e95742/aiohttp-3.13.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:93029f0e9b77b714904a281b5aa578cdc8aa8ba018d78c04e51e1c3d8471b8ec", size = 1681809, upload-time = "2025-10-17T14:00:58.603Z" },
+    { url = "https://files.pythonhosted.org/packages/be/31/8926c8ab18533f6076ce28d2c329a203b58c6861681906e2d73b9c397588/aiohttp-3.13.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:d1824c7d08d8ddfc8cb10c847f696942e5aadbd16fd974dfde8bd2c3c08a9fa1", size = 1711161, upload-time = "2025-10-17T14:01:01.744Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/36/2f83e1ca730b1e0a8cf1c8ab9559834c5eec9f5da86e77ac71f0d16b521d/aiohttp-3.13.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:8f47d0ff5b3eb9c1278a2f56ea48fda667da8ebf28bd2cb378b7c453936ce003", size = 1731999, upload-time = "2025-10-17T14:01:04.626Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/ec/1f818cc368dfd4d5ab4e9efc8f2f6f283bfc31e1c06d3e848bcc862d4591/aiohttp-3.13.1-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:8a396b1da9b51ded79806ac3b57a598f84e0769eaa1ba300655d8b5e17b70c7b", size = 1548684, upload-time = "2025-10-17T14:01:06.828Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/ad/33d36efd16e4fefee91b09a22a3a0e1b830f65471c3567ac5a8041fac812/aiohttp-3.13.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d9c52a65f54796e066b5d674e33b53178014752d28bca555c479c2c25ffcec5b", size = 1756676, upload-time = "2025-10-17T14:01:09.517Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/c4/4a526d84e77d464437713ca909364988ed2e0cd0cdad2c06cb065ece9e08/aiohttp-3.13.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a89da72d18d6c95a653470b78d8ee5aa3c4b37212004c103403d0776cbea6ff0", size = 1715577, upload-time = "2025-10-17T14:01:11.958Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/21/e39638b7d9c7f1362c4113a91870f89287e60a7ea2d037e258b81e8b37d5/aiohttp-3.13.1-cp313-cp313-win32.whl", hash = "sha256:02e0258b7585ddf5d01c79c716ddd674386bfbf3041fbbfe7bdf9c7c32eb4a9b", size = 424468, upload-time = "2025-10-17T14:01:14.344Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/00/f3a92c592a845ebb2f47d102a67f35f0925cb854c5e7386f1a3a1fdff2ab/aiohttp-3.13.1-cp313-cp313-win_amd64.whl", hash = "sha256:ef56ffe60e8d97baac123272bde1ab889ee07d3419606fae823c80c2b86c403e", size = 450806, upload-time = "2025-10-17T14:01:16.437Z" },
+    { url = "https://files.pythonhosted.org/packages/97/be/0f6c41d2fd0aab0af133c509cabaf5b1d78eab882cb0ceb872e87ceeabf7/aiohttp-3.13.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:77f83b3dc5870a2ea79a0fcfdcc3fc398187ec1675ff61ec2ceccad27ecbd303", size = 733828, upload-time = "2025-10-17T14:01:18.58Z" },
+    { url = "https://files.pythonhosted.org/packages/75/14/24e2ac5efa76ae30e05813e0f50737005fd52da8ddffee474d4a5e7f38a6/aiohttp-3.13.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:9cafd2609ebb755e47323306c7666283fbba6cf82b5f19982ea627db907df23a", size = 489320, upload-time = "2025-10-17T14:01:20.644Z" },
+    { url = "https://files.pythonhosted.org/packages/da/5a/4cbe599358d05ea7db4869aff44707b57d13f01724d48123dc68b3288d5a/aiohttp-3.13.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9c489309a2ca548d5f11131cfb4092f61d67954f930bba7e413bcdbbb82d7fae", size = 489899, upload-time = "2025-10-17T14:01:22.638Z" },
+    { url = "https://files.pythonhosted.org/packages/67/96/3aec9d9cfc723273d4386328a1e2562cf23629d2f57d137047c49adb2afb/aiohttp-3.13.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79ac15fe5fdbf3c186aa74b656cd436d9a1e492ba036db8901c75717055a5b1c", size = 1716556, upload-time = "2025-10-17T14:01:25.406Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/99/39a3d250595b5c8172843831221fa5662884f63f8005b00b4034f2a7a836/aiohttp-3.13.1-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:095414be94fce3bc080684b4cd50fb70d439bc4662b2a1984f45f3bf9ede08aa", size = 1665814, upload-time = "2025-10-17T14:01:27.683Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/96/8319e7060a85db14a9c178bc7b3cf17fad458db32ba6d2910de3ca71452d/aiohttp-3.13.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c68172e1a2dca65fa1272c85ca72e802d78b67812b22827df01017a15c5089fa", size = 1755767, upload-time = "2025-10-17T14:01:29.914Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/c6/0a2b3d886b40aa740fa2294cd34ed46d2e8108696748492be722e23082a7/aiohttp-3.13.1-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3751f9212bcd119944d4ea9de6a3f0fee288c177b8ca55442a2cdff0c8201eb3", size = 1836591, upload-time = "2025-10-17T14:01:32.28Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/34/8ab5904b3331c91a58507234a1e2f662f837e193741609ee5832eb436251/aiohttp-3.13.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8619dca57d98a8353abdc7a1eeb415548952b39d6676def70d9ce76d41a046a9", size = 1714915, upload-time = "2025-10-17T14:01:35.138Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/d3/d36077ca5f447649112189074ac6c192a666bf68165b693e48c23b0d008c/aiohttp-3.13.1-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:97795a0cb0a5f8a843759620e9cbd8889f8079551f5dcf1ccd99ed2f056d9632", size = 1546579, upload-time = "2025-10-17T14:01:38.237Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/14/dbc426a1bb1305c4fc78ce69323498c9e7c699983366ef676aa5d3f949fa/aiohttp-3.13.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1060e058da8f9f28a7026cdfca9fc886e45e551a658f6a5c631188f72a3736d2", size = 1680633, upload-time = "2025-10-17T14:01:40.902Z" },
+    { url = "https://files.pythonhosted.org/packages/29/83/1e68e519aff9f3ef6d4acb6cdda7b5f592ef5c67c8f095dc0d8e06ce1c3e/aiohttp-3.13.1-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:f48a2c26333659101ef214907d29a76fe22ad7e912aa1e40aeffdff5e8180977", size = 1678675, upload-time = "2025-10-17T14:01:43.779Z" },
+    { url = "https://files.pythonhosted.org/packages/38/b9/7f3e32a81c08b6d29ea15060c377e1f038ad96cd9923a85f30e817afff22/aiohttp-3.13.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:f1dfad638b9c91ff225162b2824db0e99ae2d1abe0dc7272b5919701f0a1e685", size = 1726829, upload-time = "2025-10-17T14:01:46.546Z" },
+    { url = "https://files.pythonhosted.org/packages/23/ce/610b1f77525a0a46639aea91377b12348e9f9412cc5ddcb17502aa4681c7/aiohttp-3.13.1-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:8fa09ab6dd567cb105db4e8ac4d60f377a7a94f67cf669cac79982f626360f32", size = 1542985, upload-time = "2025-10-17T14:01:49.082Z" },
+    { url = "https://files.pythonhosted.org/packages/53/39/3ac8dfdad5de38c401846fa071fcd24cb3b88ccfb024854df6cbd9b4a07e/aiohttp-3.13.1-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:4159fae827f9b5f655538a4f99b7cbc3a2187e5ca2eee82f876ef1da802ccfa9", size = 1741556, upload-time = "2025-10-17T14:01:51.846Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/48/b1948b74fea7930b0f29595d1956842324336de200593d49a51a40607fdc/aiohttp-3.13.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ad671118c19e9cfafe81a7a05c294449fe0ebb0d0c6d5bb445cd2190023f5cef", size = 1696175, upload-time = "2025-10-17T14:01:54.232Z" },
+    { url = "https://files.pythonhosted.org/packages/96/26/063bba38e4b27b640f56cc89fe83cc3546a7ae162c2e30ca345f0ccdc3d1/aiohttp-3.13.1-cp314-cp314-win32.whl", hash = "sha256:c5c970c148c48cf6acb65224ca3c87a47f74436362dde75c27bc44155ccf7dfc", size = 430254, upload-time = "2025-10-17T14:01:56.451Z" },
+    { url = "https://files.pythonhosted.org/packages/88/aa/25fd764384dc4eab714023112d3548a8dd69a058840d61d816ea736097a2/aiohttp-3.13.1-cp314-cp314-win_amd64.whl", hash = "sha256:748a00167b7a88385756fa615417d24081cba7e58c8727d2e28817068b97c18c", size = 456256, upload-time = "2025-10-17T14:01:58.752Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/9f/9ba6059de4bad25c71cd88e3da53f93e9618ea369cf875c9f924b1c167e2/aiohttp-3.13.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:390b73e99d7a1f0f658b3f626ba345b76382f3edc65f49d6385e326e777ed00e", size = 765956, upload-time = "2025-10-17T14:02:01.515Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/30/b86da68b494447d3060f45c7ebb461347535dab4af9162a9267d9d86ca31/aiohttp-3.13.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:27e83abb330e687e019173d8fc1fd6a1cf471769624cf89b1bb49131198a810a", size = 503206, upload-time = "2025-10-17T14:02:03.818Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/21/d27a506552843ff9eeb9fcc2d45f943b09eefdfdf205aab044f4f1f39f6a/aiohttp-3.13.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2b20eed07131adbf3e873e009c2869b16a579b236e9d4b2f211bf174d8bef44a", size = 507719, upload-time = "2025-10-17T14:02:05.947Z" },
+    { url = "https://files.pythonhosted.org/packages/58/23/4042230ec7e4edc7ba43d0342b5a3d2fe0222ca046933c4251a35aaf17f5/aiohttp-3.13.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:58fee9ef8477fd69e823b92cfd1f590ee388521b5ff8f97f3497e62ee0656212", size = 1862758, upload-time = "2025-10-17T14:02:08.469Z" },
+    { url = "https://files.pythonhosted.org/packages/df/88/525c45bea7cbb9f65df42cadb4ff69f6a0dbf95931b0ff7d1fdc40a1cb5f/aiohttp-3.13.1-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:1f62608fcb7b3d034d5e9496bea52d94064b7b62b06edba82cd38191336bbeda", size = 1717790, upload-time = "2025-10-17T14:02:11.37Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/80/21e9b5eb77df352a5788713f37359b570a793f0473f3a72db2e46df379b9/aiohttp-3.13.1-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:fdc4d81c3dfc999437f23e36d197e8b557a3f779625cd13efe563a9cfc2ce712", size = 1842088, upload-time = "2025-10-17T14:02:13.872Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/bf/d1738f6d63fe8b2a0ad49533911b3347f4953cd001bf3223cb7b61f18dff/aiohttp-3.13.1-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:601d7ec812f746fd80ff8af38eeb3f196e1bab4a4d39816ccbc94c222d23f1d0", size = 1934292, upload-time = "2025-10-17T14:02:16.624Z" },
+    { url = "https://files.pythonhosted.org/packages/04/e6/26cab509b42610ca49573f2fc2867810f72bd6a2070182256c31b14f2e98/aiohttp-3.13.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:47c3f21c469b840d9609089435c0d9918ae89f41289bf7cc4afe5ff7af5458db", size = 1791328, upload-time = "2025-10-17T14:02:19.051Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/6d/baf7b462852475c9d045bee8418d9cdf280efb687752b553e82d0c58bcc2/aiohttp-3.13.1-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d6c6cdc0750db88520332d4aaa352221732b0cafe89fd0e42feec7cb1b5dc236", size = 1622663, upload-time = "2025-10-17T14:02:21.397Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/48/396a97318af9b5f4ca8b3dc14a67976f71c6400a9609c622f96da341453f/aiohttp-3.13.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:58a12299eeb1fca2414ee2bc345ac69b0f765c20b82c3ab2a75d91310d95a9f6", size = 1787791, upload-time = "2025-10-17T14:02:24.212Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/e2/6925f6784134ce3ff3ce1a8502ab366432a3b5605387618c1a939ce778d9/aiohttp-3.13.1-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:0989cbfc195a4de1bb48f08454ef1cb47424b937e53ed069d08404b9d3c7aea1", size = 1775459, upload-time = "2025-10-17T14:02:26.971Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/e3/b372047ba739fc39f199b99290c4cc5578ce5fd125f69168c967dac44021/aiohttp-3.13.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:feb5ee664300e2435e0d1bc3443a98925013dfaf2cae9699c1f3606b88544898", size = 1789250, upload-time = "2025-10-17T14:02:29.686Z" },
+    { url = "https://files.pythonhosted.org/packages/02/8c/9f48b93d7d57fc9ef2ad4adace62e4663ea1ce1753806c4872fb36b54c39/aiohttp-3.13.1-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:58a6f8702da0c3606fb5cf2e669cce0ca681d072fe830968673bb4c69eb89e88", size = 1616139, upload-time = "2025-10-17T14:02:32.151Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/c6/c64e39d61aaa33d7de1be5206c0af3ead4b369bf975dac9fdf907a4291c1/aiohttp-3.13.1-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:a417ceb433b9d280e2368ffea22d4bc6e3e0d894c4bc7768915124d57d0964b6", size = 1815829, upload-time = "2025-10-17T14:02:34.635Z" },
+    { url = "https://files.pythonhosted.org/packages/22/75/e19e93965ea675f1151753b409af97a14f1d888588a555e53af1e62b83eb/aiohttp-3.13.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8ac8854f7b0466c5d6a9ea49249b3f6176013859ac8f4bb2522ad8ed6b94ded2", size = 1760923, upload-time = "2025-10-17T14:02:37.364Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/a4/06ed38f1dabd98ea136fd116cba1d02c9b51af5a37d513b6850a9a567d86/aiohttp-3.13.1-cp314-cp314t-win32.whl", hash = "sha256:be697a5aeff42179ed13b332a411e674994bcd406c81642d014ace90bf4bb968", size = 463318, upload-time = "2025-10-17T14:02:39.924Z" },
+    { url = "https://files.pythonhosted.org/packages/04/0f/27e4fdde899e1e90e35eeff56b54ed63826435ad6cdb06b09ed312d1b3fa/aiohttp-3.13.1-cp314-cp314t-win_amd64.whl", hash = "sha256:f1d6aa90546a4e8f20c3500cb68ab14679cd91f927fa52970035fd3207dfb3da", size = 496721, upload-time = "2025-10-17T14:02:42.199Z" },
+]
+
+[[package]]
+name = "aiosignal"
+version = "1.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "frozenlist" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/61/62/06741b579156360248d1ec624842ad0edf697050bbaf7c3e46394e106ad1/aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7", size = 25007, upload-time = "2025-07-03T22:54:43.528Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" },
+]
+
+[[package]]
+name = "attrs"
+version = "25.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6b/5c/685e6633917e101e5dcb62b9dd76946cbb57c26e133bae9e0cd36033c0a9/attrs-25.4.0.tar.gz", hash = "sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11", size = 934251, upload-time = "2025-10-06T13:54:44.725Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3a/2a/7cc015f5b9f5db42b7d48157e23356022889fc354a2813c15934b7cb5c0e/attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373", size = 67615, upload-time = "2025-10-06T13:54:43.17Z" },
+]
+
+[[package]]
+name = "backports-tarfile"
+version = "1.2.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/86/72/cd9b395f25e290e633655a100af28cb253e4393396264a98bd5f5951d50f/backports_tarfile-1.2.0.tar.gz", hash = "sha256:d75e02c268746e1b8144c278978b6e98e85de6ad16f8e4b0844a154557eca991", size = 86406, upload-time = "2024-05-28T17:01:54.731Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b9/fa/123043af240e49752f1c4bd24da5053b6bd00cad78c2be53c0d1e8b975bc/backports.tarfile-1.2.0-py3-none-any.whl", hash = "sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34", size = 30181, upload-time = "2024-05-28T17:01:53.112Z" },
+]
+
+[[package]]
+name = "beautifulsoup4"
+version = "4.14.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "soupsieve" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/77/e9/df2358efd7659577435e2177bfa69cba6c33216681af51a707193dec162a/beautifulsoup4-4.14.2.tar.gz", hash = "sha256:2a98ab9f944a11acee9cc848508ec28d9228abfd522ef0fad6a02a72e0ded69e", size = 625822, upload-time = "2025-09-29T10:05:42.613Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/94/fe/3aed5d0be4d404d12d36ab97e2f1791424d9ca39c2f754a6285d59a3b01d/beautifulsoup4-4.14.2-py3-none-any.whl", hash = "sha256:5ef6fa3a8cbece8488d66985560f97ed091e22bbc4e9c2338508a9d5de6d4515", size = 106392, upload-time = "2025-09-29T10:05:43.771Z" },
+]
+
+[[package]]
+name = "certifi"
+version = "2025.10.5"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/4c/5b/b6ce21586237c77ce67d01dc5507039d444b630dd76611bbca2d8e5dcd91/certifi-2025.10.5.tar.gz", hash = "sha256:47c09d31ccf2acf0be3f701ea53595ee7e0b8fa08801c6624be771df09ae7b43", size = 164519, upload-time = "2025-10-05T04:12:15.808Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e4/37/af0d2ef3967ac0d6113837b44a4f0bfe1328c2b9763bd5b1744520e5cfed/certifi-2025.10.5-py3-none-any.whl", hash = "sha256:0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de", size = 163286, upload-time = "2025-10-05T04:12:14.03Z" },
+]
+
+[[package]]
+name = "cffi"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pycparser", marker = "implementation_name != 'PyPy'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b1/b7/1200d354378ef52ec227395d95c2576330fd22a869f7a70e88e1447eb234/cffi-2.0.0-cp311-cp311-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:baf5215e0ab74c16e2dd324e8ec067ef59e41125d3eade2b863d294fd5035c92", size = 209613, upload-time = "2025-09-08T23:22:29.475Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/56/6033f5e86e8cc9bb629f0077ba71679508bdf54a9a5e112a3c0b91870332/cffi-2.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:730cacb21e1bdff3ce90babf007d0a0917cc3e6492f336c2f0134101e0944f93", size = 216476, upload-time = "2025-09-08T23:22:31.063Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/7f/55fecd70f7ece178db2f26128ec41430d8720f2d12ca97bf8f0a628207d5/cffi-2.0.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:6824f87845e3396029f3820c206e459ccc91760e8fa24422f8b0c3d1731cbec5", size = 203374, upload-time = "2025-09-08T23:22:32.507Z" },
+    { url = "https://files.pythonhosted.org/packages/84/ef/a7b77c8bdc0f77adc3b46888f1ad54be8f3b7821697a7b89126e829e676a/cffi-2.0.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:9de40a7b0323d889cf8d23d1ef214f565ab154443c42737dfe52ff82cf857664", size = 202597, upload-time = "2025-09-08T23:22:34.132Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/91/500d892b2bf36529a75b77958edfcd5ad8e2ce4064ce2ecfeab2125d72d1/cffi-2.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8941aaadaf67246224cee8c3803777eed332a19d909b47e29c9842ef1e79ac26", size = 215574, upload-time = "2025-09-08T23:22:35.443Z" },
+    { url = "https://files.pythonhosted.org/packages/44/64/58f6255b62b101093d5df22dcb752596066c7e89dd725e0afaed242a61be/cffi-2.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a05d0c237b3349096d3981b727493e22147f934b20f6f125a3eba8f994bec4a9", size = 218971, upload-time = "2025-09-08T23:22:36.805Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/49/fa72cebe2fd8a55fbe14956f9970fe8eb1ac59e5df042f603ef7c8ba0adc/cffi-2.0.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:94698a9c5f91f9d138526b48fe26a199609544591f859c870d477351dc7b2414", size = 211972, upload-time = "2025-09-08T23:22:38.436Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/28/dd0967a76aab36731b6ebfe64dec4e981aff7e0608f60c2d46b46982607d/cffi-2.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:5fed36fccc0612a53f1d4d9a816b50a36702c28a2aa880cb8a122b3466638743", size = 217078, upload-time = "2025-09-08T23:22:39.776Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/df/a4f0fbd47331ceeba3d37c2e51e9dfc9722498becbeec2bd8bc856c9538a/cffi-2.0.0-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe", size = 212529, upload-time = "2025-09-08T23:22:47.349Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/72/12b5f8d3865bf0f87cf1404d8c374e7487dcf097a1c91c436e72e6badd83/cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062", size = 220097, upload-time = "2025-09-08T23:22:48.677Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/95/7a135d52a50dfa7c882ab0ac17e8dc11cec9d55d2c18dda414c051c5e69e/cffi-2.0.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e", size = 207983, upload-time = "2025-09-08T23:22:50.06Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/c8/15cb9ada8895957ea171c62dc78ff3e99159ee7adb13c0123c001a2546c1/cffi-2.0.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037", size = 206519, upload-time = "2025-09-08T23:22:51.364Z" },
+    { url = "https://files.pythonhosted.org/packages/78/2d/7fa73dfa841b5ac06c7b8855cfc18622132e365f5b81d02230333ff26e9e/cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba", size = 219572, upload-time = "2025-09-08T23:22:52.902Z" },
+    { url = "https://files.pythonhosted.org/packages/07/e0/267e57e387b4ca276b90f0434ff88b2c2241ad72b16d31836adddfd6031b/cffi-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94", size = 222963, upload-time = "2025-09-08T23:22:54.518Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/75/1f2747525e06f53efbd878f4d03bac5b859cbc11c633d0fb81432d98a795/cffi-2.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187", size = 221361, upload-time = "2025-09-08T23:22:55.867Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/1e/d22cc63332bd59b06481ceaac49d6c507598642e2230f201649058a7e704/cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b", size = 212446, upload-time = "2025-09-08T23:23:03.472Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/f5/a2c23eb03b61a0b8747f211eb716446c826ad66818ddc7810cc2cc19b3f2/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", size = 220101, upload-time = "2025-09-08T23:23:04.792Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/7f/e6647792fc5850d634695bc0e6ab4111ae88e89981d35ac269956605feba/cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2", size = 207948, upload-time = "2025-09-08T23:23:06.127Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/1e/a5a1bd6f1fb30f22573f76533de12a00bf274abcdc55c8edab639078abb6/cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3", size = 206422, upload-time = "2025-09-08T23:23:07.753Z" },
+    { url = "https://files.pythonhosted.org/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499, upload-time = "2025-09-08T23:23:09.648Z" },
+    { url = "https://files.pythonhosted.org/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928, upload-time = "2025-09-08T23:23:10.928Z" },
+    { url = "https://files.pythonhosted.org/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302, upload-time = "2025-09-08T23:23:12.42Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/43/0e822876f87ea8a4ef95442c3d766a06a51fc5298823f884ef87aaad168c/cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b", size = 220049, upload-time = "2025-09-08T23:23:20.853Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/89/76799151d9c2d2d1ead63c2429da9ea9d7aac304603de0c6e8764e6e8e70/cffi-2.0.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c", size = 207793, upload-time = "2025-09-08T23:23:22.08Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/dd/3465b14bb9e24ee24cb88c9e3730f6de63111fffe513492bf8c808a3547e/cffi-2.0.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef", size = 206300, upload-time = "2025-09-08T23:23:23.314Z" },
+    { url = "https://files.pythonhosted.org/packages/47/d9/d83e293854571c877a92da46fdec39158f8d7e68da75bf73581225d28e90/cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775", size = 219244, upload-time = "2025-09-08T23:23:24.541Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/0f/1f177e3683aead2bb00f7679a16451d302c436b5cbf2505f0ea8146ef59e/cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205", size = 222828, upload-time = "2025-09-08T23:23:26.143Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/0f/cafacebd4b040e3119dcb32fed8bdef8dfe94da653155f9d0b9dc660166e/cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1", size = 220926, upload-time = "2025-09-08T23:23:27.873Z" },
+    { url = "https://files.pythonhosted.org/packages/be/b4/c56878d0d1755cf9caa54ba71e5d049479c52f9e4afc230f06822162ab2f/cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c", size = 221593, upload-time = "2025-09-08T23:23:31.91Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/0d/eb704606dfe8033e7128df5e90fee946bbcb64a04fcdaa97321309004000/cffi-2.0.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8", size = 209354, upload-time = "2025-09-08T23:23:33.214Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/19/3c435d727b368ca475fb8742ab97c9cb13a0de600ce86f62eab7fa3eea60/cffi-2.0.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc", size = 208480, upload-time = "2025-09-08T23:23:34.495Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/44/681604464ed9541673e486521497406fadcc15b5217c3e326b061696899a/cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592", size = 221584, upload-time = "2025-09-08T23:23:36.096Z" },
+    { url = "https://files.pythonhosted.org/packages/25/8e/342a504ff018a2825d395d44d63a767dd8ebc927ebda557fecdaca3ac33a/cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512", size = 224443, upload-time = "2025-09-08T23:23:37.328Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/5e/b666bacbbc60fbf415ba9988324a132c9a7a0448a9a8f125074671c0f2c3/cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4", size = 223437, upload-time = "2025-09-08T23:23:38.945Z" },
+]
+
+[[package]]
+name = "cryptography"
+version = "46.0.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cffi", marker = "platform_python_implementation != 'PyPy'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/9f/33/c00162f49c0e2fe8064a62cb92b93e50c74a72bc370ab92f86112b33ff62/cryptography-46.0.3.tar.gz", hash = "sha256:a8b17438104fed022ce745b362294d9ce35b4c2e45c1d958ad4a4b019285f4a1", size = 749258, upload-time = "2025-10-15T23:18:31.74Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1c/67/38769ca6b65f07461eb200e85fc1639b438bdc667be02cf7f2cd6a64601c/cryptography-46.0.3-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:09859af8466b69bc3c27bdf4f5d84a665e0f7ab5088412e9e2ec49758eca5cbc", size = 4296667, upload-time = "2025-10-15T23:16:54.369Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/49/498c86566a1d80e978b42f0d702795f69887005548c041636df6ae1ca64c/cryptography-46.0.3-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:01ca9ff2885f3acc98c29f1860552e37f6d7c7d013d7334ff2a9de43a449315d", size = 4450807, upload-time = "2025-10-15T23:16:56.414Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/0a/863a3604112174c8624a2ac3c038662d9e59970c7f926acdcfaed8d61142/cryptography-46.0.3-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:6eae65d4c3d33da080cff9c4ab1f711b15c1d9760809dad6ea763f3812d254cb", size = 4299615, upload-time = "2025-10-15T23:16:58.442Z" },
+    { url = "https://files.pythonhosted.org/packages/64/02/b73a533f6b64a69f3cd3872acb6ebc12aef924d8d103133bb3ea750dc703/cryptography-46.0.3-cp311-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5bf0ed4490068a2e72ac03d786693adeb909981cc596425d09032d372bcc849", size = 4016800, upload-time = "2025-10-15T23:17:00.378Z" },
+    { url = "https://files.pythonhosted.org/packages/25/d5/16e41afbfa450cde85a3b7ec599bebefaef16b5c6ba4ec49a3532336ed72/cryptography-46.0.3-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:5ecfccd2329e37e9b7112a888e76d9feca2347f12f37918facbb893d7bb88ee8", size = 4984707, upload-time = "2025-10-15T23:17:01.98Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/56/e7e69b427c3878352c2fb9b450bd0e19ed552753491d39d7d0a2f5226d41/cryptography-46.0.3-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a2c0cd47381a3229c403062f764160d57d4d175e022c1df84e168c6251a22eec", size = 4482541, upload-time = "2025-10-15T23:17:04.078Z" },
+    { url = "https://files.pythonhosted.org/packages/78/f6/50736d40d97e8483172f1bb6e698895b92a223dba513b0ca6f06b2365339/cryptography-46.0.3-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:549e234ff32571b1f4076ac269fcce7a808d3bf98b76c8dd560e42dbc66d7d91", size = 4299464, upload-time = "2025-10-15T23:17:05.483Z" },
+    { url = "https://files.pythonhosted.org/packages/00/de/d8e26b1a855f19d9994a19c702fa2e93b0456beccbcfe437eda00e0701f2/cryptography-46.0.3-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:c0a7bb1a68a5d3471880e264621346c48665b3bf1c3759d682fc0864c540bd9e", size = 4950838, upload-time = "2025-10-15T23:17:07.425Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/29/798fc4ec461a1c9e9f735f2fc58741b0daae30688f41b2497dcbc9ed1355/cryptography-46.0.3-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:10b01676fc208c3e6feeb25a8b83d81767e8059e1fe86e1dc62d10a3018fa926", size = 4481596, upload-time = "2025-10-15T23:17:09.343Z" },
+    { url = "https://files.pythonhosted.org/packages/15/8d/03cd48b20a573adfff7652b76271078e3045b9f49387920e7f1f631d125e/cryptography-46.0.3-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0abf1ffd6e57c67e92af68330d05760b7b7efb243aab8377e583284dbab72c71", size = 4426782, upload-time = "2025-10-15T23:17:11.22Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/b1/ebacbfe53317d55cf33165bda24c86523497a6881f339f9aae5c2e13e57b/cryptography-46.0.3-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a04bee9ab6a4da801eb9b51f1b708a1b5b5c9eb48c03f74198464c66f0d344ac", size = 4698381, upload-time = "2025-10-15T23:17:12.829Z" },
+    { url = "https://files.pythonhosted.org/packages/73/dc/9aa866fbdbb95b02e7f9d086f1fccfeebf8953509b87e3f28fff927ff8a0/cryptography-46.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c8daeb2d2174beb4575b77482320303f3d39b8e81153da4f0fb08eb5fe86a6c5", size = 4288728, upload-time = "2025-10-15T23:17:21.527Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/fd/bc1daf8230eaa075184cbbf5f8cd00ba9db4fd32d63fb83da4671b72ed8a/cryptography-46.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:39b6755623145ad5eff1dab323f4eae2a32a77a7abef2c5089a04a3d04366715", size = 4435078, upload-time = "2025-10-15T23:17:23.042Z" },
+    { url = "https://files.pythonhosted.org/packages/82/98/d3bd5407ce4c60017f8ff9e63ffee4200ab3e23fe05b765cab805a7db008/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:db391fa7c66df6762ee3f00c95a89e6d428f4d60e7abc8328f4fe155b5ac6e54", size = 4293460, upload-time = "2025-10-15T23:17:24.885Z" },
+    { url = "https://files.pythonhosted.org/packages/26/e9/e23e7900983c2b8af7a08098db406cf989d7f09caea7897e347598d4cd5b/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:78a97cf6a8839a48c49271cdcbd5cf37ca2c1d6b7fdd86cc864f302b5e9bf459", size = 3995237, upload-time = "2025-10-15T23:17:26.449Z" },
+    { url = "https://files.pythonhosted.org/packages/91/15/af68c509d4a138cfe299d0d7ddb14afba15233223ebd933b4bbdbc7155d3/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:dfb781ff7eaa91a6f7fd41776ec37c5853c795d3b358d4896fdbb5df168af422", size = 4967344, upload-time = "2025-10-15T23:17:28.06Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/e3/8643d077c53868b681af077edf6b3cb58288b5423610f21c62aadcbe99f4/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:6f61efb26e76c45c4a227835ddeae96d83624fb0d29eb5df5b96e14ed1a0afb7", size = 4466564, upload-time = "2025-10-15T23:17:29.665Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/43/c1e8726fa59c236ff477ff2b5dc071e54b21e5a1e51aa2cee1676f1c986f/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:23b1a8f26e43f47ceb6d6a43115f33a5a37d57df4ea0ca295b780ae8546e8044", size = 4292415, upload-time = "2025-10-15T23:17:31.686Z" },
+    { url = "https://files.pythonhosted.org/packages/42/f9/2f8fefdb1aee8a8e3256a0568cffc4e6d517b256a2fe97a029b3f1b9fe7e/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:b419ae593c86b87014b9be7396b385491ad7f320bde96826d0dd174459e54665", size = 4931457, upload-time = "2025-10-15T23:17:33.478Z" },
+    { url = "https://files.pythonhosted.org/packages/79/30/9b54127a9a778ccd6d27c3da7563e9f2d341826075ceab89ae3b41bf5be2/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:50fc3343ac490c6b08c0cf0d704e881d0d660be923fd3076db3e932007e726e3", size = 4466074, upload-time = "2025-10-15T23:17:35.158Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/68/b4f4a10928e26c941b1b6a179143af9f4d27d88fe84a6a3c53592d2e76bf/cryptography-46.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:22d7e97932f511d6b0b04f2bfd818d73dcd5928db509460aaf48384778eb6d20", size = 4420569, upload-time = "2025-10-15T23:17:37.188Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/49/3746dab4c0d1979888f125226357d3262a6dd40e114ac29e3d2abdf1ec55/cryptography-46.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:d55f3dffadd674514ad19451161118fd010988540cee43d8bc20675e775925de", size = 4681941, upload-time = "2025-10-15T23:17:39.236Z" },
+    { url = "https://files.pythonhosted.org/packages/27/32/b68d27471372737054cbd34c84981f9edbc24fe67ca225d389799614e27f/cryptography-46.0.3-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4b7387121ac7d15e550f5cb4a43aef2559ed759c35df7336c402bb8275ac9683", size = 4294089, upload-time = "2025-10-15T23:17:48.269Z" },
+    { url = "https://files.pythonhosted.org/packages/26/42/fa8389d4478368743e24e61eea78846a0006caffaf72ea24a15159215a14/cryptography-46.0.3-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:15ab9b093e8f09daab0f2159bb7e47532596075139dd74365da52ecc9cb46c5d", size = 4440029, upload-time = "2025-10-15T23:17:49.837Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/eb/f483db0ec5ac040824f269e93dd2bd8a21ecd1027e77ad7bdf6914f2fd80/cryptography-46.0.3-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:46acf53b40ea38f9c6c229599a4a13f0d46a6c3fa9ef19fc1a124d62e338dfa0", size = 4297222, upload-time = "2025-10-15T23:17:51.357Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/cf/da9502c4e1912cb1da3807ea3618a6829bee8207456fbbeebc361ec38ba3/cryptography-46.0.3-cp38-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:10ca84c4668d066a9878890047f03546f3ae0a6b8b39b697457b7757aaf18dbc", size = 4012280, upload-time = "2025-10-15T23:17:52.964Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/8f/9adb86b93330e0df8b3dcf03eae67c33ba89958fc2e03862ef1ac2b42465/cryptography-46.0.3-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:36e627112085bb3b81b19fed209c05ce2a52ee8b15d161b7c643a7d5a88491f3", size = 4978958, upload-time = "2025-10-15T23:17:54.965Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/a0/5fa77988289c34bdb9f913f5606ecc9ada1adb5ae870bd0d1054a7021cc4/cryptography-46.0.3-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:1000713389b75c449a6e979ffc7dcc8ac90b437048766cef052d4d30b8220971", size = 4473714, upload-time = "2025-10-15T23:17:56.754Z" },
+    { url = "https://files.pythonhosted.org/packages/14/e5/fc82d72a58d41c393697aa18c9abe5ae1214ff6f2a5c18ac470f92777895/cryptography-46.0.3-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:b02cf04496f6576afffef5ddd04a0cb7d49cf6be16a9059d793a30b035f6b6ac", size = 4296970, upload-time = "2025-10-15T23:17:58.588Z" },
+    { url = "https://files.pythonhosted.org/packages/78/06/5663ed35438d0b09056973994f1aec467492b33bd31da36e468b01ec1097/cryptography-46.0.3-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:71e842ec9bc7abf543b47cf86b9a743baa95f4677d22baa4c7d5c69e49e9bc04", size = 4940236, upload-time = "2025-10-15T23:18:00.897Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/59/873633f3f2dcd8a053b8dd1d38f783043b5fce589c0f6988bf55ef57e43e/cryptography-46.0.3-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:402b58fc32614f00980b66d6e56a5b4118e6cb362ae8f3fda141ba4689bd4506", size = 4472642, upload-time = "2025-10-15T23:18:02.749Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/39/8e71f3930e40f6877737d6f69248cf74d4e34b886a3967d32f919cc50d3b/cryptography-46.0.3-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ef639cb3372f69ec44915fafcd6698b6cc78fbe0c2ea41be867f6ed612811963", size = 4423126, upload-time = "2025-10-15T23:18:04.85Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/c7/f65027c2810e14c3e7268353b1681932b87e5a48e65505d8cc17c99e36ae/cryptography-46.0.3-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3b51b8ca4f1c6453d8829e1eb7299499ca7f313900dd4d89a24b8b87c0a780d4", size = 4686573, upload-time = "2025-10-15T23:18:06.908Z" },
+    { url = "https://files.pythonhosted.org/packages/da/38/f59940ec4ee91e93d3311f7532671a5cef5570eb04a144bf203b58552d11/cryptography-46.0.3-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:191bb60a7be5e6f54e30ba16fdfae78ad3a342a0599eb4193ba88e3f3d6e185b", size = 4243992, upload-time = "2025-10-15T23:18:18.695Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/0c/35b3d92ddebfdfda76bb485738306545817253d0a3ded0bfe80ef8e67aa5/cryptography-46.0.3-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c70cc23f12726be8f8bc72e41d5065d77e4515efae3690326764ea1b07845cfb", size = 4409944, upload-time = "2025-10-15T23:18:20.597Z" },
+    { url = "https://files.pythonhosted.org/packages/99/55/181022996c4063fc0e7666a47049a1ca705abb9c8a13830f074edb347495/cryptography-46.0.3-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:9394673a9f4de09e28b5356e7fff97d778f8abad85c9d5ac4a4b7e25a0de7717", size = 4242957, upload-time = "2025-10-15T23:18:22.18Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/af/72cd6ef29f9c5f731251acadaeb821559fe25f10852f44a63374c9ca08c1/cryptography-46.0.3-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:94cd0549accc38d1494e1f8de71eca837d0509d0d44bf11d158524b0e12cebf9", size = 4409447, upload-time = "2025-10-15T23:18:24.209Z" },
+]
+
+[[package]]
+name = "frozenlist"
+version = "1.8.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/2d/f5/c831fac6cc817d26fd54c7eaccd04ef7e0288806943f7cc5bbf69f3ac1f0/frozenlist-1.8.0.tar.gz", hash = "sha256:3ede829ed8d842f6cd48fc7081d7a41001a56f1f38603f9d49bf3020d59a31ad", size = 45875, upload-time = "2025-10-06T05:38:17.865Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bc/03/077f869d540370db12165c0aa51640a873fb661d8b315d1d4d67b284d7ac/frozenlist-1.8.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:09474e9831bc2b2199fad6da3c14c7b0fbdd377cce9d3d77131be28906cb7d84", size = 86912, upload-time = "2025-10-06T05:35:45.98Z" },
+    { url = "https://files.pythonhosted.org/packages/df/b5/7610b6bd13e4ae77b96ba85abea1c8cb249683217ef09ac9e0ae93f25a91/frozenlist-1.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:17c883ab0ab67200b5f964d2b9ed6b00971917d5d8a92df149dc2c9779208ee9", size = 50046, upload-time = "2025-10-06T05:35:47.009Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/ef/0e8f1fe32f8a53dd26bdd1f9347efe0778b0fddf62789ea683f4cc7d787d/frozenlist-1.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fa47e444b8ba08fffd1c18e8cdb9a75db1b6a27f17507522834ad13ed5922b93", size = 50119, upload-time = "2025-10-06T05:35:48.38Z" },
+    { url = "https://files.pythonhosted.org/packages/11/b1/71a477adc7c36e5fb628245dfbdea2166feae310757dea848d02bd0689fd/frozenlist-1.8.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2552f44204b744fba866e573be4c1f9048d6a324dfe14475103fd51613eb1d1f", size = 231067, upload-time = "2025-10-06T05:35:49.97Z" },
+    { url = "https://files.pythonhosted.org/packages/45/7e/afe40eca3a2dc19b9904c0f5d7edfe82b5304cb831391edec0ac04af94c2/frozenlist-1.8.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:957e7c38f250991e48a9a73e6423db1bb9dd14e722a10f6b8bb8e16a0f55f695", size = 233160, upload-time = "2025-10-06T05:35:51.729Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/aa/7416eac95603ce428679d273255ffc7c998d4132cfae200103f164b108aa/frozenlist-1.8.0-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:8585e3bb2cdea02fc88ffa245069c36555557ad3609e83be0ec71f54fd4abb52", size = 228544, upload-time = "2025-10-06T05:35:53.246Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/3d/2a2d1f683d55ac7e3875e4263d28410063e738384d3adc294f5ff3d7105e/frozenlist-1.8.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:edee74874ce20a373d62dc28b0b18b93f645633c2943fd90ee9d898550770581", size = 243797, upload-time = "2025-10-06T05:35:54.497Z" },
+    { url = "https://files.pythonhosted.org/packages/78/1e/2d5565b589e580c296d3bb54da08d206e797d941a83a6fdea42af23be79c/frozenlist-1.8.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c9a63152fe95756b85f31186bddf42e4c02c6321207fd6601a1c89ebac4fe567", size = 247923, upload-time = "2025-10-06T05:35:55.861Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/c3/65872fcf1d326a7f101ad4d86285c403c87be7d832b7470b77f6d2ed5ddc/frozenlist-1.8.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b6db2185db9be0a04fecf2f241c70b63b1a242e2805be291855078f2b404dd6b", size = 230886, upload-time = "2025-10-06T05:35:57.399Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/76/ac9ced601d62f6956f03cc794f9e04c81719509f85255abf96e2510f4265/frozenlist-1.8.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:f4be2e3d8bc8aabd566f8d5b8ba7ecc09249d74ba3c9ed52e54dc23a293f0b92", size = 245731, upload-time = "2025-10-06T05:35:58.563Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/49/ecccb5f2598daf0b4a1415497eba4c33c1e8ce07495eb07d2860c731b8d5/frozenlist-1.8.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:c8d1634419f39ea6f5c427ea2f90ca85126b54b50837f31497f3bf38266e853d", size = 241544, upload-time = "2025-10-06T05:35:59.719Z" },
+    { url = "https://files.pythonhosted.org/packages/53/4b/ddf24113323c0bbcc54cb38c8b8916f1da7165e07b8e24a717b4a12cbf10/frozenlist-1.8.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:1a7fa382a4a223773ed64242dbe1c9c326ec09457e6b8428efb4118c685c3dfd", size = 241806, upload-time = "2025-10-06T05:36:00.959Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/fb/9b9a084d73c67175484ba2789a59f8eebebd0827d186a8102005ce41e1ba/frozenlist-1.8.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:11847b53d722050808926e785df837353bd4d75f1d494377e59b23594d834967", size = 229382, upload-time = "2025-10-06T05:36:02.22Z" },
+    { url = "https://files.pythonhosted.org/packages/95/a3/c8fb25aac55bf5e12dae5c5aa6a98f85d436c1dc658f21c3ac73f9fa95e5/frozenlist-1.8.0-cp311-cp311-win32.whl", hash = "sha256:27c6e8077956cf73eadd514be8fb04d77fc946a7fe9f7fe167648b0b9085cc25", size = 39647, upload-time = "2025-10-06T05:36:03.409Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/f5/603d0d6a02cfd4c8f2a095a54672b3cf967ad688a60fb9faf04fc4887f65/frozenlist-1.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:ac913f8403b36a2c8610bbfd25b8013488533e71e62b4b4adce9c86c8cea905b", size = 44064, upload-time = "2025-10-06T05:36:04.368Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/16/c2c9ab44e181f043a86f9a8f84d5124b62dbcb3a02c0977ec72b9ac1d3e0/frozenlist-1.8.0-cp311-cp311-win_arm64.whl", hash = "sha256:d4d3214a0f8394edfa3e303136d0575eece0745ff2b47bd2cb2e66dd92d4351a", size = 39937, upload-time = "2025-10-06T05:36:05.669Z" },
+    { url = "https://files.pythonhosted.org/packages/69/29/948b9aa87e75820a38650af445d2ef2b6b8a6fab1a23b6bb9e4ef0be2d59/frozenlist-1.8.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:78f7b9e5d6f2fdb88cdde9440dc147259b62b9d3b019924def9f6478be254ac1", size = 87782, upload-time = "2025-10-06T05:36:06.649Z" },
+    { url = "https://files.pythonhosted.org/packages/64/80/4f6e318ee2a7c0750ed724fa33a4bdf1eacdc5a39a7a24e818a773cd91af/frozenlist-1.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:229bf37d2e4acdaf808fd3f06e854a4a7a3661e871b10dc1f8f1896a3b05f18b", size = 50594, upload-time = "2025-10-06T05:36:07.69Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/94/5c8a2b50a496b11dd519f4a24cb5496cf125681dd99e94c604ccdea9419a/frozenlist-1.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f833670942247a14eafbb675458b4e61c82e002a148f49e68257b79296e865c4", size = 50448, upload-time = "2025-10-06T05:36:08.78Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/bd/d91c5e39f490a49df14320f4e8c80161cfcce09f1e2cde1edd16a551abb3/frozenlist-1.8.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:494a5952b1c597ba44e0e78113a7266e656b9794eec897b19ead706bd7074383", size = 242411, upload-time = "2025-10-06T05:36:09.801Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/83/f61505a05109ef3293dfb1ff594d13d64a2324ac3482be2cedc2be818256/frozenlist-1.8.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96f423a119f4777a4a056b66ce11527366a8bb92f54e541ade21f2374433f6d4", size = 243014, upload-time = "2025-10-06T05:36:11.394Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/cb/cb6c7b0f7d4023ddda30cf56b8b17494eb3a79e3fda666bf735f63118b35/frozenlist-1.8.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3462dd9475af2025c31cc61be6652dfa25cbfb56cbbf52f4ccfe029f38decaf8", size = 234909, upload-time = "2025-10-06T05:36:12.598Z" },
+    { url = "https://files.pythonhosted.org/packages/31/c5/cd7a1f3b8b34af009fb17d4123c5a778b44ae2804e3ad6b86204255f9ec5/frozenlist-1.8.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c4c800524c9cd9bac5166cd6f55285957fcfc907db323e193f2afcd4d9abd69b", size = 250049, upload-time = "2025-10-06T05:36:14.065Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/01/2f95d3b416c584a1e7f0e1d6d31998c4a795f7544069ee2e0962a4b60740/frozenlist-1.8.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d6a5df73acd3399d893dafc71663ad22534b5aa4f94e8a2fabfe856c3c1b6a52", size = 256485, upload-time = "2025-10-06T05:36:15.39Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/03/024bf7720b3abaebcff6d0793d73c154237b85bdf67b7ed55e5e9596dc9a/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:405e8fe955c2280ce66428b3ca55e12b3c4e9c336fb2103a4937e891c69a4a29", size = 237619, upload-time = "2025-10-06T05:36:16.558Z" },
+    { url = "https://files.pythonhosted.org/packages/69/fa/f8abdfe7d76b731f5d8bd217827cf6764d4f1d9763407e42717b4bed50a0/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:908bd3f6439f2fef9e85031b59fd4f1297af54415fb60e4254a95f75b3cab3f3", size = 250320, upload-time = "2025-10-06T05:36:17.821Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/3c/b051329f718b463b22613e269ad72138cc256c540f78a6de89452803a47d/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:294e487f9ec720bd8ffcebc99d575f7eff3568a08a253d1ee1a0378754b74143", size = 246820, upload-time = "2025-10-06T05:36:19.046Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/ae/58282e8f98e444b3f4dd42448ff36fa38bef29e40d40f330b22e7108f565/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:74c51543498289c0c43656701be6b077f4b265868fa7f8a8859c197006efb608", size = 250518, upload-time = "2025-10-06T05:36:20.763Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/96/007e5944694d66123183845a106547a15944fbbb7154788cbf7272789536/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:776f352e8329135506a1d6bf16ac3f87bc25b28e765949282dcc627af36123aa", size = 239096, upload-time = "2025-10-06T05:36:22.129Z" },
+    { url = "https://files.pythonhosted.org/packages/66/bb/852b9d6db2fa40be96f29c0d1205c306288f0684df8fd26ca1951d461a56/frozenlist-1.8.0-cp312-cp312-win32.whl", hash = "sha256:433403ae80709741ce34038da08511d4a77062aa924baf411ef73d1146e74faf", size = 39985, upload-time = "2025-10-06T05:36:23.661Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/af/38e51a553dd66eb064cdf193841f16f077585d4d28394c2fa6235cb41765/frozenlist-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:34187385b08f866104f0c0617404c8eb08165ab1272e884abc89c112e9c00746", size = 44591, upload-time = "2025-10-06T05:36:24.958Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/06/1dc65480ab147339fecc70797e9c2f69d9cea9cf38934ce08df070fdb9cb/frozenlist-1.8.0-cp312-cp312-win_arm64.whl", hash = "sha256:fe3c58d2f5db5fbd18c2987cba06d51b0529f52bc3a6cdc33d3f4eab725104bd", size = 40102, upload-time = "2025-10-06T05:36:26.333Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/40/0832c31a37d60f60ed79e9dfb5a92e1e2af4f40a16a29abcc7992af9edff/frozenlist-1.8.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8d92f1a84bb12d9e56f818b3a746f3efba93c1b63c8387a73dde655e1e42282a", size = 85717, upload-time = "2025-10-06T05:36:27.341Z" },
+    { url = "https://files.pythonhosted.org/packages/30/ba/b0b3de23f40bc55a7057bd38434e25c34fa48e17f20ee273bbde5e0650f3/frozenlist-1.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:96153e77a591c8adc2ee805756c61f59fef4cf4073a9275ee86fe8cba41241f7", size = 49651, upload-time = "2025-10-06T05:36:28.855Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/ab/6e5080ee374f875296c4243c381bbdef97a9ac39c6e3ce1d5f7d42cb78d6/frozenlist-1.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f21f00a91358803399890ab167098c131ec2ddd5f8f5fd5fe9c9f2c6fcd91e40", size = 49417, upload-time = "2025-10-06T05:36:29.877Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/4e/e4691508f9477ce67da2015d8c00acd751e6287739123113a9fca6f1604e/frozenlist-1.8.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:fb30f9626572a76dfe4293c7194a09fb1fe93ba94c7d4f720dfae3b646b45027", size = 234391, upload-time = "2025-10-06T05:36:31.301Z" },
+    { url = "https://files.pythonhosted.org/packages/40/76/c202df58e3acdf12969a7895fd6f3bc016c642e6726aa63bd3025e0fc71c/frozenlist-1.8.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eaa352d7047a31d87dafcacbabe89df0aa506abb5b1b85a2fb91bc3faa02d822", size = 233048, upload-time = "2025-10-06T05:36:32.531Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/c0/8746afb90f17b73ca5979c7a3958116e105ff796e718575175319b5bb4ce/frozenlist-1.8.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:03ae967b4e297f58f8c774c7eabcce57fe3c2434817d4385c50661845a058121", size = 226549, upload-time = "2025-10-06T05:36:33.706Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/eb/4c7eefc718ff72f9b6c4893291abaae5fbc0c82226a32dcd8ef4f7a5dbef/frozenlist-1.8.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f6292f1de555ffcc675941d65fffffb0a5bcd992905015f85d0592201793e0e5", size = 239833, upload-time = "2025-10-06T05:36:34.947Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/4e/e5c02187cf704224f8b21bee886f3d713ca379535f16893233b9d672ea71/frozenlist-1.8.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:29548f9b5b5e3460ce7378144c3010363d8035cea44bc0bf02d57f5a685e084e", size = 245363, upload-time = "2025-10-06T05:36:36.534Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/96/cb85ec608464472e82ad37a17f844889c36100eed57bea094518bf270692/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ec3cc8c5d4084591b4237c0a272cc4f50a5b03396a47d9caaf76f5d7b38a4f11", size = 229314, upload-time = "2025-10-06T05:36:38.582Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/6f/4ae69c550e4cee66b57887daeebe006fe985917c01d0fff9caab9883f6d0/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:517279f58009d0b1f2e7c1b130b377a349405da3f7621ed6bfae50b10adf20c1", size = 243365, upload-time = "2025-10-06T05:36:40.152Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/58/afd56de246cf11780a40a2c28dc7cbabbf06337cc8ddb1c780a2d97e88d8/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:db1e72ede2d0d7ccb213f218df6a078a9c09a7de257c2fe8fcef16d5925230b1", size = 237763, upload-time = "2025-10-06T05:36:41.355Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/36/cdfaf6ed42e2644740d4a10452d8e97fa1c062e2a8006e4b09f1b5fd7d63/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:b4dec9482a65c54a5044486847b8a66bf10c9cb4926d42927ec4e8fd5db7fed8", size = 240110, upload-time = "2025-10-06T05:36:42.716Z" },
+    { url = "https://files.pythonhosted.org/packages/03/a8/9ea226fbefad669f11b52e864c55f0bd57d3c8d7eb07e9f2e9a0b39502e1/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:21900c48ae04d13d416f0e1e0c4d81f7931f73a9dfa0b7a8746fb2fe7dd970ed", size = 233717, upload-time = "2025-10-06T05:36:44.251Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/0b/1b5531611e83ba7d13ccc9988967ea1b51186af64c42b7a7af465dcc9568/frozenlist-1.8.0-cp313-cp313-win32.whl", hash = "sha256:8b7b94a067d1c504ee0b16def57ad5738701e4ba10cec90529f13fa03c833496", size = 39628, upload-time = "2025-10-06T05:36:45.423Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/cf/174c91dbc9cc49bc7b7aab74d8b734e974d1faa8f191c74af9b7e80848e6/frozenlist-1.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:878be833caa6a3821caf85eb39c5ba92d28e85df26d57afb06b35b2efd937231", size = 43882, upload-time = "2025-10-06T05:36:46.796Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/17/502cd212cbfa96eb1388614fe39a3fc9ab87dbbe042b66f97acb57474834/frozenlist-1.8.0-cp313-cp313-win_arm64.whl", hash = "sha256:44389d135b3ff43ba8cc89ff7f51f5a0bb6b63d829c8300f79a2fe4fe61bcc62", size = 39676, upload-time = "2025-10-06T05:36:47.8Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/5c/3bbfaa920dfab09e76946a5d2833a7cbdf7b9b4a91c714666ac4855b88b4/frozenlist-1.8.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:e25ac20a2ef37e91c1b39938b591457666a0fa835c7783c3a8f33ea42870db94", size = 89235, upload-time = "2025-10-06T05:36:48.78Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/d6/f03961ef72166cec1687e84e8925838442b615bd0b8854b54923ce5b7b8a/frozenlist-1.8.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:07cdca25a91a4386d2e76ad992916a85038a9b97561bf7a3fd12d5d9ce31870c", size = 50742, upload-time = "2025-10-06T05:36:49.837Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/bb/a6d12b7ba4c3337667d0e421f7181c82dda448ce4e7ad7ecd249a16fa806/frozenlist-1.8.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4e0c11f2cc6717e0a741f84a527c52616140741cd812a50422f83dc31749fb52", size = 51725, upload-time = "2025-10-06T05:36:50.851Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/71/d1fed0ffe2c2ccd70b43714c6cab0f4188f09f8a67a7914a6b46ee30f274/frozenlist-1.8.0-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b3210649ee28062ea6099cfda39e147fa1bc039583c8ee4481cb7811e2448c51", size = 284533, upload-time = "2025-10-06T05:36:51.898Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/1f/fb1685a7b009d89f9bf78a42d94461bc06581f6e718c39344754a5d9bada/frozenlist-1.8.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:581ef5194c48035a7de2aefc72ac6539823bb71508189e5de01d60c9dcd5fa65", size = 292506, upload-time = "2025-10-06T05:36:53.101Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/3b/b991fe1612703f7e0d05c0cf734c1b77aaf7c7d321df4572e8d36e7048c8/frozenlist-1.8.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3ef2d026f16a2b1866e1d86fc4e1291e1ed8a387b2c333809419a2f8b3a77b82", size = 274161, upload-time = "2025-10-06T05:36:54.309Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/ec/c5c618767bcdf66e88945ec0157d7f6c4a1322f1473392319b7a2501ded7/frozenlist-1.8.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5500ef82073f599ac84d888e3a8c1f77ac831183244bfd7f11eaa0289fb30714", size = 294676, upload-time = "2025-10-06T05:36:55.566Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/ce/3934758637d8f8a88d11f0585d6495ef54b2044ed6ec84492a91fa3b27aa/frozenlist-1.8.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:50066c3997d0091c411a66e710f4e11752251e6d2d73d70d8d5d4c76442a199d", size = 300638, upload-time = "2025-10-06T05:36:56.758Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/4f/a7e4d0d467298f42de4b41cbc7ddaf19d3cfeabaf9ff97c20c6c7ee409f9/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5c1c8e78426e59b3f8005e9b19f6ff46e5845895adbde20ece9218319eca6506", size = 283067, upload-time = "2025-10-06T05:36:57.965Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/48/c7b163063d55a83772b268e6d1affb960771b0e203b632cfe09522d67ea5/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:eefdba20de0d938cec6a89bd4d70f346a03108a19b9df4248d3cf0d88f1b0f51", size = 292101, upload-time = "2025-10-06T05:36:59.237Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/d0/2366d3c4ecdc2fd391e0afa6e11500bfba0ea772764d631bbf82f0136c9d/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:cf253e0e1c3ceb4aaff6df637ce033ff6535fb8c70a764a8f46aafd3d6ab798e", size = 289901, upload-time = "2025-10-06T05:37:00.811Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/94/daff920e82c1b70e3618a2ac39fbc01ae3e2ff6124e80739ce5d71c9b920/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:032efa2674356903cd0261c4317a561a6850f3ac864a63fc1583147fb05a79b0", size = 289395, upload-time = "2025-10-06T05:37:02.115Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/20/bba307ab4235a09fdcd3cc5508dbabd17c4634a1af4b96e0f69bfe551ebd/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6da155091429aeba16851ecb10a9104a108bcd32f6c1642867eadaee401c1c41", size = 283659, upload-time = "2025-10-06T05:37:03.711Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/00/04ca1c3a7a124b6de4f8a9a17cc2fcad138b4608e7a3fc5877804b8715d7/frozenlist-1.8.0-cp313-cp313t-win32.whl", hash = "sha256:0f96534f8bfebc1a394209427d0f8a63d343c9779cda6fc25e8e121b5fd8555b", size = 43492, upload-time = "2025-10-06T05:37:04.915Z" },
+    { url = "https://files.pythonhosted.org/packages/59/5e/c69f733a86a94ab10f68e496dc6b7e8bc078ebb415281d5698313e3af3a1/frozenlist-1.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5d63a068f978fc69421fb0e6eb91a9603187527c86b7cd3f534a5b77a592b888", size = 48034, upload-time = "2025-10-06T05:37:06.343Z" },
+    { url = "https://files.pythonhosted.org/packages/16/6c/be9d79775d8abe79b05fa6d23da99ad6e7763a1d080fbae7290b286093fd/frozenlist-1.8.0-cp313-cp313t-win_arm64.whl", hash = "sha256:bf0a7e10b077bf5fb9380ad3ae8ce20ef919a6ad93b4552896419ac7e1d8e042", size = 41749, upload-time = "2025-10-06T05:37:07.431Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/c8/85da824b7e7b9b6e7f7705b2ecaf9591ba6f79c1177f324c2735e41d36a2/frozenlist-1.8.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:cee686f1f4cadeb2136007ddedd0aaf928ab95216e7691c63e50a8ec066336d0", size = 86127, upload-time = "2025-10-06T05:37:08.438Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/e8/a1185e236ec66c20afd72399522f142c3724c785789255202d27ae992818/frozenlist-1.8.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:119fb2a1bd47307e899c2fac7f28e85b9a543864df47aa7ec9d3c1b4545f096f", size = 49698, upload-time = "2025-10-06T05:37:09.48Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/93/72b1736d68f03fda5fdf0f2180fb6caaae3894f1b854d006ac61ecc727ee/frozenlist-1.8.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4970ece02dbc8c3a92fcc5228e36a3e933a01a999f7094ff7c23fbd2beeaa67c", size = 49749, upload-time = "2025-10-06T05:37:10.569Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/b2/fabede9fafd976b991e9f1b9c8c873ed86f202889b864756f240ce6dd855/frozenlist-1.8.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:cba69cb73723c3f329622e34bdbf5ce1f80c21c290ff04256cff1cd3c2036ed2", size = 231298, upload-time = "2025-10-06T05:37:11.993Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/3b/d9b1e0b0eed36e70477ffb8360c49c85c8ca8ef9700a4e6711f39a6e8b45/frozenlist-1.8.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:778a11b15673f6f1df23d9586f83c4846c471a8af693a22e066508b77d201ec8", size = 232015, upload-time = "2025-10-06T05:37:13.194Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/94/be719d2766c1138148564a3960fc2c06eb688da592bdc25adcf856101be7/frozenlist-1.8.0-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0325024fe97f94c41c08872db482cf8ac4800d80e79222c6b0b7b162d5b13686", size = 225038, upload-time = "2025-10-06T05:37:14.577Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/09/6712b6c5465f083f52f50cf74167b92d4ea2f50e46a9eea0523d658454ae/frozenlist-1.8.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:97260ff46b207a82a7567b581ab4190bd4dfa09f4db8a8b49d1a958f6aa4940e", size = 240130, upload-time = "2025-10-06T05:37:15.781Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/d4/cd065cdcf21550b54f3ce6a22e143ac9e4836ca42a0de1022da8498eac89/frozenlist-1.8.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:54b2077180eb7f83dd52c40b2750d0a9f175e06a42e3213ce047219de902717a", size = 242845, upload-time = "2025-10-06T05:37:17.037Z" },
+    { url = "https://files.pythonhosted.org/packages/62/c3/f57a5c8c70cd1ead3d5d5f776f89d33110b1addae0ab010ad774d9a44fb9/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2f05983daecab868a31e1da44462873306d3cbfd76d1f0b5b69c473d21dbb128", size = 229131, upload-time = "2025-10-06T05:37:18.221Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/52/232476fe9cb64f0742f3fde2b7d26c1dac18b6d62071c74d4ded55e0ef94/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:33f48f51a446114bc5d251fb2954ab0164d5be02ad3382abcbfe07e2531d650f", size = 240542, upload-time = "2025-10-06T05:37:19.771Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/85/07bf3f5d0fb5414aee5f47d33c6f5c77bfe49aac680bfece33d4fdf6a246/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:154e55ec0655291b5dd1b8731c637ecdb50975a2ae70c606d100750a540082f7", size = 237308, upload-time = "2025-10-06T05:37:20.969Z" },
+    { url = "https://files.pythonhosted.org/packages/11/99/ae3a33d5befd41ac0ca2cc7fd3aa707c9c324de2e89db0e0f45db9a64c26/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:4314debad13beb564b708b4a496020e5306c7333fa9a3ab90374169a20ffab30", size = 238210, upload-time = "2025-10-06T05:37:22.252Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/60/b1d2da22f4970e7a155f0adde9b1435712ece01b3cd45ba63702aea33938/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:073f8bf8becba60aa931eb3bc420b217bb7d5b8f4750e6f8b3be7f3da85d38b7", size = 231972, upload-time = "2025-10-06T05:37:23.5Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/ab/945b2f32de889993b9c9133216c068b7fcf257d8595a0ac420ac8677cab0/frozenlist-1.8.0-cp314-cp314-win32.whl", hash = "sha256:bac9c42ba2ac65ddc115d930c78d24ab8d4f465fd3fc473cdedfccadb9429806", size = 40536, upload-time = "2025-10-06T05:37:25.581Z" },
+    { url = "https://files.pythonhosted.org/packages/59/ad/9caa9b9c836d9ad6f067157a531ac48b7d36499f5036d4141ce78c230b1b/frozenlist-1.8.0-cp314-cp314-win_amd64.whl", hash = "sha256:3e0761f4d1a44f1d1a47996511752cf3dcec5bbdd9cc2b4fe595caf97754b7a0", size = 44330, upload-time = "2025-10-06T05:37:26.928Z" },
+    { url = "https://files.pythonhosted.org/packages/82/13/e6950121764f2676f43534c555249f57030150260aee9dcf7d64efda11dd/frozenlist-1.8.0-cp314-cp314-win_arm64.whl", hash = "sha256:d1eaff1d00c7751b7c6662e9c5ba6eb2c17a2306ba5e2a37f24ddf3cc953402b", size = 40627, upload-time = "2025-10-06T05:37:28.075Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/c7/43200656ecc4e02d3f8bc248df68256cd9572b3f0017f0a0c4e93440ae23/frozenlist-1.8.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:d3bb933317c52d7ea5004a1c442eef86f426886fba134ef8cf4226ea6ee1821d", size = 89238, upload-time = "2025-10-06T05:37:29.373Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/29/55c5f0689b9c0fb765055629f472c0de484dcaf0acee2f7707266ae3583c/frozenlist-1.8.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:8009897cdef112072f93a0efdce29cd819e717fd2f649ee3016efd3cd885a7ed", size = 50738, upload-time = "2025-10-06T05:37:30.792Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/7d/b7282a445956506fa11da8c2db7d276adcbf2b17d8bb8407a47685263f90/frozenlist-1.8.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2c5dcbbc55383e5883246d11fd179782a9d07a986c40f49abe89ddf865913930", size = 51739, upload-time = "2025-10-06T05:37:32.127Z" },
+    { url = "https://files.pythonhosted.org/packages/62/1c/3d8622e60d0b767a5510d1d3cf21065b9db874696a51ea6d7a43180a259c/frozenlist-1.8.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:39ecbc32f1390387d2aa4f5a995e465e9e2f79ba3adcac92d68e3e0afae6657c", size = 284186, upload-time = "2025-10-06T05:37:33.21Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/14/aa36d5f85a89679a85a1d44cd7a6657e0b1c75f61e7cad987b203d2daca8/frozenlist-1.8.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92db2bf818d5cc8d9c1f1fc56b897662e24ea5adb36ad1f1d82875bd64e03c24", size = 292196, upload-time = "2025-10-06T05:37:36.107Z" },
+    { url = "https://files.pythonhosted.org/packages/05/23/6bde59eb55abd407d34f77d39a5126fb7b4f109a3f611d3929f14b700c66/frozenlist-1.8.0-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:2dc43a022e555de94c3b68a4ef0b11c4f747d12c024a520c7101709a2144fb37", size = 273830, upload-time = "2025-10-06T05:37:37.663Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/3f/22cff331bfad7a8afa616289000ba793347fcd7bc275f3b28ecea2a27909/frozenlist-1.8.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:cb89a7f2de3602cfed448095bab3f178399646ab7c61454315089787df07733a", size = 294289, upload-time = "2025-10-06T05:37:39.261Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/89/5b057c799de4838b6c69aa82b79705f2027615e01be996d2486a69ca99c4/frozenlist-1.8.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:33139dc858c580ea50e7e60a1b0ea003efa1fd42e6ec7fdbad78fff65fad2fd2", size = 300318, upload-time = "2025-10-06T05:37:43.213Z" },
+    { url = "https://files.pythonhosted.org/packages/30/de/2c22ab3eb2a8af6d69dc799e48455813bab3690c760de58e1bf43b36da3e/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:168c0969a329b416119507ba30b9ea13688fafffac1b7822802537569a1cb0ef", size = 282814, upload-time = "2025-10-06T05:37:45.337Z" },
+    { url = "https://files.pythonhosted.org/packages/59/f7/970141a6a8dbd7f556d94977858cfb36fa9b66e0892c6dd780d2219d8cd8/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:28bd570e8e189d7f7b001966435f9dac6718324b5be2990ac496cf1ea9ddb7fe", size = 291762, upload-time = "2025-10-06T05:37:46.657Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/15/ca1adae83a719f82df9116d66f5bb28bb95557b3951903d39135620ef157/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:b2a095d45c5d46e5e79ba1e5b9cb787f541a8dee0433836cea4b96a2c439dcd8", size = 289470, upload-time = "2025-10-06T05:37:47.946Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/83/dca6dc53bf657d371fbc88ddeb21b79891e747189c5de990b9dfff2ccba1/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:eab8145831a0d56ec9c4139b6c3e594c7a83c2c8be25d5bcf2d86136a532287a", size = 289042, upload-time = "2025-10-06T05:37:49.499Z" },
+    { url = "https://files.pythonhosted.org/packages/96/52/abddd34ca99be142f354398700536c5bd315880ed0a213812bc491cff5e4/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:974b28cf63cc99dfb2188d8d222bc6843656188164848c4f679e63dae4b0708e", size = 283148, upload-time = "2025-10-06T05:37:50.745Z" },
+    { url = "https://files.pythonhosted.org/packages/af/d3/76bd4ed4317e7119c2b7f57c3f6934aba26d277acc6309f873341640e21f/frozenlist-1.8.0-cp314-cp314t-win32.whl", hash = "sha256:342c97bf697ac5480c0a7ec73cd700ecfa5a8a40ac923bd035484616efecc2df", size = 44676, upload-time = "2025-10-06T05:37:52.222Z" },
+    { url = "https://files.pythonhosted.org/packages/89/76/c615883b7b521ead2944bb3480398cbb07e12b7b4e4d073d3752eb721558/frozenlist-1.8.0-cp314-cp314t-win_amd64.whl", hash = "sha256:06be8f67f39c8b1dc671f5d83aaefd3358ae5cdcf8314552c57e7ed3e6475bdd", size = 49451, upload-time = "2025-10-06T05:37:53.425Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/a3/5982da14e113d07b325230f95060e2169f5311b1017ea8af2a29b374c289/frozenlist-1.8.0-cp314-cp314t-win_arm64.whl", hash = "sha256:102e6314ca4da683dca92e3b1355490fed5f313b768500084fbe6371fddfdb79", size = 42507, upload-time = "2025-10-06T05:37:54.513Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/9a/e35b4a917281c0b8419d4207f4334c8e8c5dbf4f3f5f9ada73958d937dcc/frozenlist-1.8.0-py3-none-any.whl", hash = "sha256:0c18a16eab41e82c295618a77502e17b195883241c563b00f0aa5106fc4eaa0d", size = 13409, upload-time = "2025-10-06T05:38:16.721Z" },
+]
+
+[[package]]
+name = "idna"
+version = "3.11"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
+]
+
+[[package]]
+name = "importlib-metadata"
+version = "8.7.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "zipp" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/76/66/650a33bd90f786193e4de4b3ad86ea60b53c89b669a5c7be931fac31cdb0/importlib_metadata-8.7.0.tar.gz", hash = "sha256:d13b81ad223b890aa16c5471f2ac3056cf76c5f10f82d6f9292f0b415f389000", size = 56641, upload-time = "2025-04-27T15:29:01.736Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656, upload-time = "2025-04-27T15:29:00.214Z" },
+]
+
+[[package]]
+name = "jaraco-classes"
+version = "3.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "more-itertools" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/06/c0/ed4a27bc5571b99e3cff68f8a9fa5b56ff7df1c2251cc715a652ddd26402/jaraco.classes-3.4.0.tar.gz", hash = "sha256:47a024b51d0239c0dd8c8540c6c7f484be3b8fcf0b2d85c13825780d3b3f3acd", size = 11780, upload-time = "2024-03-31T07:27:36.643Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7f/66/b15ce62552d84bbfcec9a4873ab79d993a1dd4edb922cbfccae192bd5b5f/jaraco.classes-3.4.0-py3-none-any.whl", hash = "sha256:f662826b6bed8cace05e7ff873ce0f9283b5c924470fe664fff1c2f00f581790", size = 6777, upload-time = "2024-03-31T07:27:34.792Z" },
+]
+
+[[package]]
+name = "jaraco-context"
+version = "6.0.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "backports-tarfile", marker = "python_full_version < '3.12'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/df/ad/f3777b81bf0b6e7bc7514a1656d3e637b2e8e15fab2ce3235730b3e7a4e6/jaraco_context-6.0.1.tar.gz", hash = "sha256:9bae4ea555cf0b14938dc0aee7c9f32ed303aa20a3b73e7dc80111628792d1b3", size = 13912, upload-time = "2024-08-20T03:39:27.358Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ff/db/0c52c4cf5e4bd9f5d7135ec7669a3a767af21b3a308e1ed3674881e52b62/jaraco.context-6.0.1-py3-none-any.whl", hash = "sha256:f797fc481b490edb305122c9181830a3a5b76d84ef6d1aef2fb9b47ab956f9e4", size = 6825, upload-time = "2024-08-20T03:39:25.966Z" },
+]
+
+[[package]]
+name = "jaraco-functools"
+version = "4.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "more-itertools" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f7/ed/1aa2d585304ec07262e1a83a9889880701079dde796ac7b1d1826f40c63d/jaraco_functools-4.3.0.tar.gz", hash = "sha256:cfd13ad0dd2c47a3600b439ef72d8615d482cedcff1632930d6f28924d92f294", size = 19755, upload-time = "2025-08-18T20:05:09.91Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b4/09/726f168acad366b11e420df31bf1c702a54d373a83f968d94141a8c3fde0/jaraco_functools-4.3.0-py3-none-any.whl", hash = "sha256:227ff8ed6f7b8f62c56deff101545fa7543cf2c8e7b82a7c2116e672f29c26e8", size = 10408, upload-time = "2025-08-18T20:05:08.69Z" },
+]
+
+[[package]]
+name = "jeepney"
+version = "0.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7b/6f/357efd7602486741aa73ffc0617fb310a29b588ed0fd69c2399acbb85b0c/jeepney-0.9.0.tar.gz", hash = "sha256:cf0e9e845622b81e4a28df94c40345400256ec608d0e55bb8a3feaa9163f5732", size = 106758, upload-time = "2025-02-27T18:51:01.684Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b2/a3/e137168c9c44d18eff0376253da9f1e9234d0239e0ee230d2fee6cea8e55/jeepney-0.9.0-py3-none-any.whl", hash = "sha256:97e5714520c16fc0a45695e5365a2e11b81ea79bba796e26f9f1d178cb182683", size = 49010, upload-time = "2025-02-27T18:51:00.104Z" },
+]
+
+[[package]]
+name = "keyring"
+version = "25.6.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "importlib-metadata", marker = "python_full_version < '3.12'" },
+    { name = "jaraco-classes" },
+    { name = "jaraco-context" },
+    { name = "jaraco-functools" },
+    { name = "jeepney", marker = "sys_platform == 'linux'" },
+    { name = "pywin32-ctypes", marker = "sys_platform == 'win32'" },
+    { name = "secretstorage", marker = "sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/70/09/d904a6e96f76ff214be59e7aa6ef7190008f52a0ab6689760a98de0bf37d/keyring-25.6.0.tar.gz", hash = "sha256:0b39998aa941431eb3d9b0d4b2460bc773b9df6fed7621c2dfb291a7e0187a66", size = 62750, upload-time = "2024-12-25T15:26:45.782Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d3/32/da7f44bcb1105d3e88a0b74ebdca50c59121d2ddf71c9e34ba47df7f3a56/keyring-25.6.0-py3-none-any.whl", hash = "sha256:552a3f7af126ece7ed5c89753650eec89c7eaae8617d0aa4d9ad2b75111266bd", size = 39085, upload-time = "2024-12-25T15:26:44.377Z" },
+]
+
+[[package]]
+name = "markdown-it-py"
+version = "4.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mdurl" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" },
+]
+
+[[package]]
+name = "mdurl"
+version = "0.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
+]
+
+[[package]]
+name = "more-itertools"
+version = "10.8.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ea/5d/38b681d3fce7a266dd9ab73c66959406d565b3e85f21d5e66e1181d93721/more_itertools-10.8.0.tar.gz", hash = "sha256:f638ddf8a1a0d134181275fb5d58b086ead7c6a72429ad725c67503f13ba30bd", size = 137431, upload-time = "2025-09-02T15:23:11.018Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a4/8e/469e5a4a2f5855992e425f3cb33804cc07bf18d48f2db061aec61ce50270/more_itertools-10.8.0-py3-none-any.whl", hash = "sha256:52d4362373dcf7c52546bc4af9a86ee7c4579df9a8dc268be0a2f949d376cc9b", size = 69667, upload-time = "2025-09-02T15:23:09.635Z" },
+]
+
+[[package]]
+name = "multidict"
+version = "6.7.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/80/1e/5492c365f222f907de1039b91f922b93fa4f764c713ee858d235495d8f50/multidict-6.7.0.tar.gz", hash = "sha256:c6e99d9a65ca282e578dfea819cfa9c0a62b2499d8677392e09feaf305e9e6f5", size = 101834, upload-time = "2025-10-06T14:52:30.657Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/34/9e/5c727587644d67b2ed479041e4b1c58e30afc011e3d45d25bbe35781217c/multidict-6.7.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4d409aa42a94c0b3fa617708ef5276dfe81012ba6753a0370fcc9d0195d0a1fc", size = 76604, upload-time = "2025-10-06T14:48:54.277Z" },
+    { url = "https://files.pythonhosted.org/packages/17/e4/67b5c27bd17c085a5ea8f1ec05b8a3e5cba0ca734bfcad5560fb129e70ca/multidict-6.7.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:14c9e076eede3b54c636f8ce1c9c252b5f057c62131211f0ceeec273810c9721", size = 44715, upload-time = "2025-10-06T14:48:55.445Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/e1/866a5d77be6ea435711bef2a4291eed11032679b6b28b56b4776ab06ba3e/multidict-6.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4c09703000a9d0fa3c3404b27041e574cc7f4df4c6563873246d0e11812a94b6", size = 44332, upload-time = "2025-10-06T14:48:56.706Z" },
+    { url = "https://files.pythonhosted.org/packages/31/61/0c2d50241ada71ff61a79518db85ada85fdabfcf395d5968dae1cbda04e5/multidict-6.7.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a265acbb7bb33a3a2d626afbe756371dce0279e7b17f4f4eda406459c2b5ff1c", size = 245212, upload-time = "2025-10-06T14:48:58.042Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/e0/919666a4e4b57fff1b57f279be1c9316e6cdc5de8a8b525d76f6598fefc7/multidict-6.7.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:51cb455de290ae462593e5b1cb1118c5c22ea7f0d3620d9940bf695cea5a4bd7", size = 246671, upload-time = "2025-10-06T14:49:00.004Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/cc/d027d9c5a520f3321b65adea289b965e7bcbd2c34402663f482648c716ce/multidict-6.7.0-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:db99677b4457c7a5c5a949353e125ba72d62b35f74e26da141530fbb012218a7", size = 225491, upload-time = "2025-10-06T14:49:01.393Z" },
+    { url = "https://files.pythonhosted.org/packages/75/c4/bbd633980ce6155a28ff04e6a6492dd3335858394d7bb752d8b108708558/multidict-6.7.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f470f68adc395e0183b92a2f4689264d1ea4b40504a24d9882c27375e6662bb9", size = 257322, upload-time = "2025-10-06T14:49:02.745Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/6d/d622322d344f1f053eae47e033b0b3f965af01212de21b10bcf91be991fb/multidict-6.7.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0db4956f82723cc1c270de9c6e799b4c341d327762ec78ef82bb962f79cc07d8", size = 254694, upload-time = "2025-10-06T14:49:04.15Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/9f/78f8761c2705d4c6d7516faed63c0ebdac569f6db1bef95e0d5218fdc146/multidict-6.7.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3e56d780c238f9e1ae66a22d2adf8d16f485381878250db8d496623cd38b22bd", size = 246715, upload-time = "2025-10-06T14:49:05.967Z" },
+    { url = "https://files.pythonhosted.org/packages/78/59/950818e04f91b9c2b95aab3d923d9eabd01689d0dcd889563988e9ea0fd8/multidict-6.7.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9d14baca2ee12c1a64740d4531356ba50b82543017f3ad6de0deb943c5979abb", size = 243189, upload-time = "2025-10-06T14:49:07.37Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/3d/77c79e1934cad2ee74991840f8a0110966d9599b3af95964c0cd79bb905b/multidict-6.7.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:295a92a76188917c7f99cda95858c822f9e4aae5824246bba9b6b44004ddd0a6", size = 237845, upload-time = "2025-10-06T14:49:08.759Z" },
+    { url = "https://files.pythonhosted.org/packages/63/1b/834ce32a0a97a3b70f86437f685f880136677ac00d8bce0027e9fd9c2db7/multidict-6.7.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:39f1719f57adbb767ef592a50ae5ebb794220d1188f9ca93de471336401c34d2", size = 246374, upload-time = "2025-10-06T14:49:10.574Z" },
+    { url = "https://files.pythonhosted.org/packages/23/ef/43d1c3ba205b5dec93dc97f3fba179dfa47910fc73aaaea4f7ceb41cec2a/multidict-6.7.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:0a13fb8e748dfc94749f622de065dd5c1def7e0d2216dba72b1d8069a389c6ff", size = 253345, upload-time = "2025-10-06T14:49:12.331Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/03/eaf95bcc2d19ead522001f6a650ef32811aa9e3624ff0ad37c445c7a588c/multidict-6.7.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:e3aa16de190d29a0ea1b48253c57d99a68492c8dd8948638073ab9e74dc9410b", size = 246940, upload-time = "2025-10-06T14:49:13.821Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/df/ec8a5fd66ea6cd6f525b1fcbb23511b033c3e9bc42b81384834ffa484a62/multidict-6.7.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a048ce45dcdaaf1defb76b2e684f997fb5abf74437b6cb7b22ddad934a964e34", size = 242229, upload-time = "2025-10-06T14:49:15.603Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/a2/59b405d59fd39ec86d1142630e9049243015a5f5291ba49cadf3c090c541/multidict-6.7.0-cp311-cp311-win32.whl", hash = "sha256:a90af66facec4cebe4181b9e62a68be65e45ac9b52b67de9eec118701856e7ff", size = 41308, upload-time = "2025-10-06T14:49:16.871Z" },
+    { url = "https://files.pythonhosted.org/packages/32/0f/13228f26f8b882c34da36efa776c3b7348455ec383bab4a66390e42963ae/multidict-6.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:95b5ffa4349df2887518bb839409bcf22caa72d82beec453216802f475b23c81", size = 46037, upload-time = "2025-10-06T14:49:18.457Z" },
+    { url = "https://files.pythonhosted.org/packages/84/1f/68588e31b000535a3207fd3c909ebeec4fb36b52c442107499c18a896a2a/multidict-6.7.0-cp311-cp311-win_arm64.whl", hash = "sha256:329aa225b085b6f004a4955271a7ba9f1087e39dcb7e65f6284a988264a63912", size = 43023, upload-time = "2025-10-06T14:49:19.648Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/9e/9f61ac18d9c8b475889f32ccfa91c9f59363480613fc807b6e3023d6f60b/multidict-6.7.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:8a3862568a36d26e650a19bb5cbbba14b71789032aebc0423f8cc5f150730184", size = 76877, upload-time = "2025-10-06T14:49:20.884Z" },
+    { url = "https://files.pythonhosted.org/packages/38/6f/614f09a04e6184f8824268fce4bc925e9849edfa654ddd59f0b64508c595/multidict-6.7.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:960c60b5849b9b4f9dcc9bea6e3626143c252c74113df2c1540aebce70209b45", size = 45467, upload-time = "2025-10-06T14:49:22.054Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/93/c4f67a436dd026f2e780c433277fff72be79152894d9fc36f44569cab1a6/multidict-6.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2049be98fb57a31b4ccf870bf377af2504d4ae35646a19037ec271e4c07998aa", size = 43834, upload-time = "2025-10-06T14:49:23.566Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/f5/013798161ca665e4a422afbc5e2d9e4070142a9ff8905e482139cd09e4d0/multidict-6.7.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0934f3843a1860dd465d38895c17fce1f1cb37295149ab05cd1b9a03afacb2a7", size = 250545, upload-time = "2025-10-06T14:49:24.882Z" },
+    { url = "https://files.pythonhosted.org/packages/71/2f/91dbac13e0ba94669ea5119ba267c9a832f0cb65419aca75549fcf09a3dc/multidict-6.7.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b3e34f3a1b8131ba06f1a73adab24f30934d148afcd5f5de9a73565a4404384e", size = 258305, upload-time = "2025-10-06T14:49:26.778Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/b0/754038b26f6e04488b48ac621f779c341338d78503fb45403755af2df477/multidict-6.7.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:efbb54e98446892590dc2458c19c10344ee9a883a79b5cec4bc34d6656e8d546", size = 242363, upload-time = "2025-10-06T14:49:28.562Z" },
+    { url = "https://files.pythonhosted.org/packages/87/15/9da40b9336a7c9fa606c4cf2ed80a649dffeb42b905d4f63a1d7eb17d746/multidict-6.7.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a35c5fc61d4f51eb045061e7967cfe3123d622cd500e8868e7c0c592a09fedc4", size = 268375, upload-time = "2025-10-06T14:49:29.96Z" },
+    { url = "https://files.pythonhosted.org/packages/82/72/c53fcade0cc94dfaad583105fd92b3a783af2091eddcb41a6d5a52474000/multidict-6.7.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:29fe6740ebccba4175af1b9b87bf553e9c15cd5868ee967e010efcf94e4fd0f1", size = 269346, upload-time = "2025-10-06T14:49:31.404Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/e2/9baffdae21a76f77ef8447f1a05a96ec4bc0a24dae08767abc0a2fe680b8/multidict-6.7.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:123e2a72e20537add2f33a79e605f6191fba2afda4cbb876e35c1a7074298a7d", size = 256107, upload-time = "2025-10-06T14:49:32.974Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/06/3f06f611087dc60d65ef775f1fb5aca7c6d61c6db4990e7cda0cef9b1651/multidict-6.7.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b284e319754366c1aee2267a2036248b24eeb17ecd5dc16022095e747f2f4304", size = 253592, upload-time = "2025-10-06T14:49:34.52Z" },
+    { url = "https://files.pythonhosted.org/packages/20/24/54e804ec7945b6023b340c412ce9c3f81e91b3bf5fa5ce65558740141bee/multidict-6.7.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:803d685de7be4303b5a657b76e2f6d1240e7e0a8aa2968ad5811fa2285553a12", size = 251024, upload-time = "2025-10-06T14:49:35.956Z" },
+    { url = "https://files.pythonhosted.org/packages/14/48/011cba467ea0b17ceb938315d219391d3e421dfd35928e5dbdc3f4ae76ef/multidict-6.7.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c04a328260dfd5db8c39538f999f02779012268f54614902d0afc775d44e0a62", size = 251484, upload-time = "2025-10-06T14:49:37.631Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/2f/919258b43bb35b99fa127435cfb2d91798eb3a943396631ef43e3720dcf4/multidict-6.7.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:8a19cdb57cd3df4cd865849d93ee14920fb97224300c88501f16ecfa2604b4e0", size = 263579, upload-time = "2025-10-06T14:49:39.502Z" },
+    { url = "https://files.pythonhosted.org/packages/31/22/a0e884d86b5242b5a74cf08e876bdf299e413016b66e55511f7a804a366e/multidict-6.7.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9b2fd74c52accced7e75de26023b7dccee62511a600e62311b918ec5c168fc2a", size = 259654, upload-time = "2025-10-06T14:49:41.32Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/e5/17e10e1b5c5f5a40f2fcbb45953c9b215f8a4098003915e46a93f5fcaa8f/multidict-6.7.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3e8bfdd0e487acf992407a140d2589fe598238eaeffa3da8448d63a63cd363f8", size = 251511, upload-time = "2025-10-06T14:49:46.021Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/9a/201bb1e17e7af53139597069c375e7b0dcbd47594604f65c2d5359508566/multidict-6.7.0-cp312-cp312-win32.whl", hash = "sha256:dd32a49400a2c3d52088e120ee00c1e3576cbff7e10b98467962c74fdb762ed4", size = 41895, upload-time = "2025-10-06T14:49:48.718Z" },
+    { url = "https://files.pythonhosted.org/packages/46/e2/348cd32faad84eaf1d20cce80e2bb0ef8d312c55bca1f7fa9865e7770aaf/multidict-6.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:92abb658ef2d7ef22ac9f8bb88e8b6c3e571671534e029359b6d9e845923eb1b", size = 46073, upload-time = "2025-10-06T14:49:50.28Z" },
+    { url = "https://files.pythonhosted.org/packages/25/ec/aad2613c1910dce907480e0c3aa306905830f25df2e54ccc9dea450cb5aa/multidict-6.7.0-cp312-cp312-win_arm64.whl", hash = "sha256:490dab541a6a642ce1a9d61a4781656b346a55c13038f0b1244653828e3a83ec", size = 43226, upload-time = "2025-10-06T14:49:52.304Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/86/33272a544eeb36d66e4d9a920602d1a2f57d4ebea4ef3cdfe5a912574c95/multidict-6.7.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:bee7c0588aa0076ce77c0ea5d19a68d76ad81fcd9fe8501003b9a24f9d4000f6", size = 76135, upload-time = "2025-10-06T14:49:54.26Z" },
+    { url = "https://files.pythonhosted.org/packages/91/1c/eb97db117a1ebe46d457a3d235a7b9d2e6dcab174f42d1b67663dd9e5371/multidict-6.7.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7ef6b61cad77091056ce0e7ce69814ef72afacb150b7ac6a3e9470def2198159", size = 45117, upload-time = "2025-10-06T14:49:55.82Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/d8/6c3442322e41fb1dd4de8bd67bfd11cd72352ac131f6368315617de752f1/multidict-6.7.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9c0359b1ec12b1d6849c59f9d319610b7f20ef990a6d454ab151aa0e3b9f78ca", size = 43472, upload-time = "2025-10-06T14:49:57.048Z" },
+    { url = "https://files.pythonhosted.org/packages/75/3f/e2639e80325af0b6c6febdf8e57cc07043ff15f57fa1ef808f4ccb5ac4cd/multidict-6.7.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:cd240939f71c64bd658f186330603aac1a9a81bf6273f523fca63673cb7378a8", size = 249342, upload-time = "2025-10-06T14:49:58.368Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/cc/84e0585f805cbeaa9cbdaa95f9a3d6aed745b9d25700623ac89a6ecff400/multidict-6.7.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a60a4d75718a5efa473ebd5ab685786ba0c67b8381f781d1be14da49f1a2dc60", size = 257082, upload-time = "2025-10-06T14:49:59.89Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/9c/ac851c107c92289acbbf5cfb485694084690c1b17e555f44952c26ddc5bd/multidict-6.7.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:53a42d364f323275126aff81fb67c5ca1b7a04fda0546245730a55c8c5f24bc4", size = 240704, upload-time = "2025-10-06T14:50:01.485Z" },
+    { url = "https://files.pythonhosted.org/packages/50/cc/5f93e99427248c09da95b62d64b25748a5f5c98c7c2ab09825a1d6af0e15/multidict-6.7.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3b29b980d0ddbecb736735ee5bef69bb2ddca56eff603c86f3f29a1128299b4f", size = 266355, upload-time = "2025-10-06T14:50:02.955Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/0c/2ec1d883ceb79c6f7f6d7ad90c919c898f5d1c6ea96d322751420211e072/multidict-6.7.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f8a93b1c0ed2d04b97a5e9336fd2d33371b9a6e29ab7dd6503d63407c20ffbaf", size = 267259, upload-time = "2025-10-06T14:50:04.446Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/2d/f0b184fa88d6630aa267680bdb8623fb69cb0d024b8c6f0d23f9a0f406d3/multidict-6.7.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9ff96e8815eecacc6645da76c413eb3b3d34cfca256c70b16b286a687d013c32", size = 254903, upload-time = "2025-10-06T14:50:05.98Z" },
+    { url = "https://files.pythonhosted.org/packages/06/c9/11ea263ad0df7dfabcad404feb3c0dd40b131bc7f232d5537f2fb1356951/multidict-6.7.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7516c579652f6a6be0e266aec0acd0db80829ca305c3d771ed898538804c2036", size = 252365, upload-time = "2025-10-06T14:50:07.511Z" },
+    { url = "https://files.pythonhosted.org/packages/41/88/d714b86ee2c17d6e09850c70c9d310abac3d808ab49dfa16b43aba9d53fd/multidict-6.7.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:040f393368e63fb0f3330e70c26bfd336656bed925e5cbe17c9da839a6ab13ec", size = 250062, upload-time = "2025-10-06T14:50:09.074Z" },
+    { url = "https://files.pythonhosted.org/packages/15/fe/ad407bb9e818c2b31383f6131ca19ea7e35ce93cf1310fce69f12e89de75/multidict-6.7.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b3bc26a951007b1057a1c543af845f1c7e3e71cc240ed1ace7bf4484aa99196e", size = 249683, upload-time = "2025-10-06T14:50:10.714Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/a4/a89abdb0229e533fb925e7c6e5c40201c2873efebc9abaf14046a4536ee6/multidict-6.7.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:7b022717c748dd1992a83e219587aabe45980d88969f01b316e78683e6285f64", size = 261254, upload-time = "2025-10-06T14:50:12.28Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/aa/0e2b27bd88b40a4fb8dc53dd74eecac70edaa4c1dd0707eb2164da3675b3/multidict-6.7.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:9600082733859f00d79dee64effc7aef1beb26adb297416a4ad2116fd61374bd", size = 257967, upload-time = "2025-10-06T14:50:14.16Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/8e/0c67b7120d5d5f6d874ed85a085f9dc770a7f9d8813e80f44a9fec820bb7/multidict-6.7.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:94218fcec4d72bc61df51c198d098ce2b378e0ccbac41ddbed5ef44092913288", size = 250085, upload-time = "2025-10-06T14:50:15.639Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/55/b73e1d624ea4b8fd4dd07a3bb70f6e4c7c6c5d9d640a41c6ffe5cdbd2a55/multidict-6.7.0-cp313-cp313-win32.whl", hash = "sha256:a37bd74c3fa9d00be2d7b8eca074dc56bd8077ddd2917a839bd989612671ed17", size = 41713, upload-time = "2025-10-06T14:50:17.066Z" },
+    { url = "https://files.pythonhosted.org/packages/32/31/75c59e7d3b4205075b4c183fa4ca398a2daf2303ddf616b04ae6ef55cffe/multidict-6.7.0-cp313-cp313-win_amd64.whl", hash = "sha256:30d193c6cc6d559db42b6bcec8a5d395d34d60c9877a0b71ecd7c204fcf15390", size = 45915, upload-time = "2025-10-06T14:50:18.264Z" },
+    { url = "https://files.pythonhosted.org/packages/31/2a/8987831e811f1184c22bc2e45844934385363ee61c0a2dcfa8f71b87e608/multidict-6.7.0-cp313-cp313-win_arm64.whl", hash = "sha256:ea3334cabe4d41b7ccd01e4d349828678794edbc2d3ae97fc162a3312095092e", size = 43077, upload-time = "2025-10-06T14:50:19.853Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/68/7b3a5170a382a340147337b300b9eb25a9ddb573bcdfff19c0fa3f31ffba/multidict-6.7.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:ad9ce259f50abd98a1ca0aa6e490b58c316a0fce0617f609723e40804add2c00", size = 83114, upload-time = "2025-10-06T14:50:21.223Z" },
+    { url = "https://files.pythonhosted.org/packages/55/5c/3fa2d07c84df4e302060f555bbf539310980362236ad49f50eeb0a1c1eb9/multidict-6.7.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:07f5594ac6d084cbb5de2df218d78baf55ef150b91f0ff8a21cc7a2e3a5a58eb", size = 48442, upload-time = "2025-10-06T14:50:22.871Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/56/67212d33239797f9bd91962bb899d72bb0f4c35a8652dcdb8ed049bef878/multidict-6.7.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:0591b48acf279821a579282444814a2d8d0af624ae0bc600aa4d1b920b6e924b", size = 46885, upload-time = "2025-10-06T14:50:24.258Z" },
+    { url = "https://files.pythonhosted.org/packages/46/d1/908f896224290350721597a61a69cd19b89ad8ee0ae1f38b3f5cd12ea2ac/multidict-6.7.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:749a72584761531d2b9467cfbdfd29487ee21124c304c4b6cb760d8777b27f9c", size = 242588, upload-time = "2025-10-06T14:50:25.716Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/67/8604288bbd68680eee0ab568fdcb56171d8b23a01bcd5cb0c8fedf6e5d99/multidict-6.7.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b4c3d199f953acd5b446bf7c0de1fe25d94e09e79086f8dc2f48a11a129cdf1", size = 249966, upload-time = "2025-10-06T14:50:28.192Z" },
+    { url = "https://files.pythonhosted.org/packages/20/33/9228d76339f1ba51e3efef7da3ebd91964d3006217aae13211653193c3ff/multidict-6.7.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:9fb0211dfc3b51efea2f349ec92c114d7754dd62c01f81c3e32b765b70c45c9b", size = 228618, upload-time = "2025-10-06T14:50:29.82Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/2d/25d9b566d10cab1c42b3b9e5b11ef79c9111eaf4463b8c257a3bd89e0ead/multidict-6.7.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a027ec240fe73a8d6281872690b988eed307cd7d91b23998ff35ff577ca688b5", size = 257539, upload-time = "2025-10-06T14:50:31.731Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/b1/8d1a965e6637fc33de3c0d8f414485c2b7e4af00f42cab3d84e7b955c222/multidict-6.7.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1d964afecdf3a8288789df2f5751dc0a8261138c3768d9af117ed384e538fad", size = 256345, upload-time = "2025-10-06T14:50:33.26Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/0c/06b5a8adbdeedada6f4fb8d8f193d44a347223b11939b42953eeb6530b6b/multidict-6.7.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:caf53b15b1b7df9fbd0709aa01409000a2b4dd03a5f6f5cc548183c7c8f8b63c", size = 247934, upload-time = "2025-10-06T14:50:34.808Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/31/b2491b5fe167ca044c6eb4b8f2c9f3b8a00b24c432c365358eadac5d7625/multidict-6.7.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:654030da3197d927f05a536a66186070e98765aa5142794c9904555d3a9d8fb5", size = 245243, upload-time = "2025-10-06T14:50:36.436Z" },
+    { url = "https://files.pythonhosted.org/packages/61/1a/982913957cb90406c8c94f53001abd9eafc271cb3e70ff6371590bec478e/multidict-6.7.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:2090d3718829d1e484706a2f525e50c892237b2bf9b17a79b059cb98cddc2f10", size = 235878, upload-time = "2025-10-06T14:50:37.953Z" },
+    { url = "https://files.pythonhosted.org/packages/be/c0/21435d804c1a1cf7a2608593f4d19bca5bcbd7a81a70b253fdd1c12af9c0/multidict-6.7.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2d2cfeec3f6f45651b3d408c4acec0ebf3daa9bc8a112a084206f5db5d05b754", size = 243452, upload-time = "2025-10-06T14:50:39.574Z" },
+    { url = "https://files.pythonhosted.org/packages/54/0a/4349d540d4a883863191be6eb9a928846d4ec0ea007d3dcd36323bb058ac/multidict-6.7.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:4ef089f985b8c194d341eb2c24ae6e7408c9a0e2e5658699c92f497437d88c3c", size = 252312, upload-time = "2025-10-06T14:50:41.612Z" },
+    { url = "https://files.pythonhosted.org/packages/26/64/d5416038dbda1488daf16b676e4dbfd9674dde10a0cc8f4fc2b502d8125d/multidict-6.7.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:e93a0617cd16998784bf4414c7e40f17a35d2350e5c6f0bd900d3a8e02bd3762", size = 246935, upload-time = "2025-10-06T14:50:43.972Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/8c/8290c50d14e49f35e0bd4abc25e1bc7711149ca9588ab7d04f886cdf03d9/multidict-6.7.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f0feece2ef8ebc42ed9e2e8c78fc4aa3cf455733b507c09ef7406364c94376c6", size = 243385, upload-time = "2025-10-06T14:50:45.648Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/a0/f83ae75e42d694b3fbad3e047670e511c138be747bc713cf1b10d5096416/multidict-6.7.0-cp313-cp313t-win32.whl", hash = "sha256:19a1d55338ec1be74ef62440ca9e04a2f001a04d0cc49a4983dc320ff0f3212d", size = 47777, upload-time = "2025-10-06T14:50:47.154Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/80/9b174a92814a3830b7357307a792300f42c9e94664b01dee8e457551fa66/multidict-6.7.0-cp313-cp313t-win_amd64.whl", hash = "sha256:3da4fb467498df97e986af166b12d01f05d2e04f978a9c1c680ea1988e0bc4b6", size = 53104, upload-time = "2025-10-06T14:50:48.851Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/28/04baeaf0428d95bb7a7bea0e691ba2f31394338ba424fb0679a9ed0f4c09/multidict-6.7.0-cp313-cp313t-win_arm64.whl", hash = "sha256:b4121773c49a0776461f4a904cdf6264c88e42218aaa8407e803ca8025872792", size = 45503, upload-time = "2025-10-06T14:50:50.16Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/b1/3da6934455dd4b261d4c72f897e3a5728eba81db59959f3a639245891baa/multidict-6.7.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3bab1e4aff7adaa34410f93b1f8e57c4b36b9af0426a76003f441ee1d3c7e842", size = 75128, upload-time = "2025-10-06T14:50:51.92Z" },
+    { url = "https://files.pythonhosted.org/packages/14/2c/f069cab5b51d175a1a2cb4ccdf7a2c2dabd58aa5bd933fa036a8d15e2404/multidict-6.7.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b8512bac933afc3e45fb2b18da8e59b78d4f408399a960339598374d4ae3b56b", size = 44410, upload-time = "2025-10-06T14:50:53.275Z" },
+    { url = "https://files.pythonhosted.org/packages/42/e2/64bb41266427af6642b6b128e8774ed84c11b80a90702c13ac0a86bb10cc/multidict-6.7.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:79dcf9e477bc65414ebfea98ffd013cb39552b5ecd62908752e0e413d6d06e38", size = 43205, upload-time = "2025-10-06T14:50:54.911Z" },
+    { url = "https://files.pythonhosted.org/packages/02/68/6b086fef8a3f1a8541b9236c594f0c9245617c29841f2e0395d979485cde/multidict-6.7.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:31bae522710064b5cbeddaf2e9f32b1abab70ac6ac91d42572502299e9953128", size = 245084, upload-time = "2025-10-06T14:50:56.369Z" },
+    { url = "https://files.pythonhosted.org/packages/15/ee/f524093232007cd7a75c1d132df70f235cfd590a7c9eaccd7ff422ef4ae8/multidict-6.7.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a0df7ff02397bb63e2fd22af2c87dfa39e8c7f12947bc524dbdc528282c7e34", size = 252667, upload-time = "2025-10-06T14:50:57.991Z" },
+    { url = "https://files.pythonhosted.org/packages/02/a5/eeb3f43ab45878f1895118c3ef157a480db58ede3f248e29b5354139c2c9/multidict-6.7.0-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:7a0222514e8e4c514660e182d5156a415c13ef0aabbd71682fc714e327b95e99", size = 233590, upload-time = "2025-10-06T14:50:59.589Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/1e/76d02f8270b97269d7e3dbd45644b1785bda457b474315f8cf999525a193/multidict-6.7.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2397ab4daaf2698eb51a76721e98db21ce4f52339e535725de03ea962b5a3202", size = 264112, upload-time = "2025-10-06T14:51:01.183Z" },
+    { url = "https://files.pythonhosted.org/packages/76/0b/c28a70ecb58963847c2a8efe334904cd254812b10e535aefb3bcce513918/multidict-6.7.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8891681594162635948a636c9fe0ff21746aeb3dd5463f6e25d9bea3a8a39ca1", size = 261194, upload-time = "2025-10-06T14:51:02.794Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/63/2ab26e4209773223159b83aa32721b4021ffb08102f8ac7d689c943fded1/multidict-6.7.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18706cc31dbf402a7945916dd5cddf160251b6dab8a2c5f3d6d5a55949f676b3", size = 248510, upload-time = "2025-10-06T14:51:04.724Z" },
+    { url = "https://files.pythonhosted.org/packages/93/cd/06c1fa8282af1d1c46fd55c10a7930af652afdce43999501d4d68664170c/multidict-6.7.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f844a1bbf1d207dd311a56f383f7eda2d0e134921d45751842d8235e7778965d", size = 248395, upload-time = "2025-10-06T14:51:06.306Z" },
+    { url = "https://files.pythonhosted.org/packages/99/ac/82cb419dd6b04ccf9e7e61befc00c77614fc8134362488b553402ecd55ce/multidict-6.7.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:d4393e3581e84e5645506923816b9cc81f5609a778c7e7534054091acc64d1c6", size = 239520, upload-time = "2025-10-06T14:51:08.091Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/f3/a0f9bf09493421bd8716a362e0cd1d244f5a6550f5beffdd6b47e885b331/multidict-6.7.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:fbd18dc82d7bf274b37aa48d664534330af744e03bccf696d6f4c6042e7d19e7", size = 245479, upload-time = "2025-10-06T14:51:10.365Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/01/476d38fc73a212843f43c852b0eee266b6971f0e28329c2184a8df90c376/multidict-6.7.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:b6234e14f9314731ec45c42fc4554b88133ad53a09092cc48a88e771c125dadb", size = 258903, upload-time = "2025-10-06T14:51:12.466Z" },
+    { url = "https://files.pythonhosted.org/packages/49/6d/23faeb0868adba613b817d0e69c5f15531b24d462af8012c4f6de4fa8dc3/multidict-6.7.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:08d4379f9744d8f78d98c8673c06e202ffa88296f009c71bbafe8a6bf847d01f", size = 252333, upload-time = "2025-10-06T14:51:14.48Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/cc/48d02ac22b30fa247f7dad82866e4b1015431092f4ba6ebc7e77596e0b18/multidict-6.7.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:9fe04da3f79387f450fd0061d4dd2e45a72749d31bf634aecc9e27f24fdc4b3f", size = 243411, upload-time = "2025-10-06T14:51:16.072Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/03/29a8bf5a18abf1fe34535c88adbdfa88c9fb869b5a3b120692c64abe8284/multidict-6.7.0-cp314-cp314-win32.whl", hash = "sha256:fbafe31d191dfa7c4c51f7a6149c9fb7e914dcf9ffead27dcfd9f1ae382b3885", size = 40940, upload-time = "2025-10-06T14:51:17.544Z" },
+    { url = "https://files.pythonhosted.org/packages/82/16/7ed27b680791b939de138f906d5cf2b4657b0d45ca6f5dd6236fdddafb1a/multidict-6.7.0-cp314-cp314-win_amd64.whl", hash = "sha256:2f67396ec0310764b9222a1728ced1ab638f61aadc6226f17a71dd9324f9a99c", size = 45087, upload-time = "2025-10-06T14:51:18.875Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/3c/e3e62eb35a1950292fe39315d3c89941e30a9d07d5d2df42965ab041da43/multidict-6.7.0-cp314-cp314-win_arm64.whl", hash = "sha256:ba672b26069957ee369cfa7fc180dde1fc6f176eaf1e6beaf61fbebbd3d9c000", size = 42368, upload-time = "2025-10-06T14:51:20.225Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/40/cd499bd0dbc5f1136726db3153042a735fffd0d77268e2ee20d5f33c010f/multidict-6.7.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:c1dcc7524066fa918c6a27d61444d4ee7900ec635779058571f70d042d86ed63", size = 82326, upload-time = "2025-10-06T14:51:21.588Z" },
+    { url = "https://files.pythonhosted.org/packages/13/8a/18e031eca251c8df76daf0288e6790561806e439f5ce99a170b4af30676b/multidict-6.7.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:27e0b36c2d388dc7b6ced3406671b401e84ad7eb0656b8f3a2f46ed0ce483718", size = 48065, upload-time = "2025-10-06T14:51:22.93Z" },
+    { url = "https://files.pythonhosted.org/packages/40/71/5e6701277470a87d234e433fb0a3a7deaf3bcd92566e421e7ae9776319de/multidict-6.7.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2a7baa46a22e77f0988e3b23d4ede5513ebec1929e34ee9495be535662c0dfe2", size = 46475, upload-time = "2025-10-06T14:51:24.352Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/6a/bab00cbab6d9cfb57afe1663318f72ec28289ea03fd4e8236bb78429893a/multidict-6.7.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7bf77f54997a9166a2f5675d1201520586439424c2511723a7312bdb4bcc034e", size = 239324, upload-time = "2025-10-06T14:51:25.822Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/5f/8de95f629fc22a7769ade8b41028e3e5a822c1f8904f618d175945a81ad3/multidict-6.7.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e011555abada53f1578d63389610ac8a5400fc70ce71156b0aa30d326f1a5064", size = 246877, upload-time = "2025-10-06T14:51:27.604Z" },
+    { url = "https://files.pythonhosted.org/packages/23/b4/38881a960458f25b89e9f4a4fdcb02ac101cfa710190db6e5528841e67de/multidict-6.7.0-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:28b37063541b897fd6a318007373930a75ca6d6ac7c940dbe14731ffdd8d498e", size = 225824, upload-time = "2025-10-06T14:51:29.664Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/39/6566210c83f8a261575f18e7144736059f0c460b362e96e9cf797a24b8e7/multidict-6.7.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:05047ada7a2fde2631a0ed706f1fd68b169a681dfe5e4cf0f8e4cb6618bbc2cd", size = 253558, upload-time = "2025-10-06T14:51:31.684Z" },
+    { url = "https://files.pythonhosted.org/packages/00/a3/67f18315100f64c269f46e6c0319fa87ba68f0f64f2b8e7fd7c72b913a0b/multidict-6.7.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:716133f7d1d946a4e1b91b1756b23c088881e70ff180c24e864c26192ad7534a", size = 252339, upload-time = "2025-10-06T14:51:33.699Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/2a/1cb77266afee2458d82f50da41beba02159b1d6b1f7973afc9a1cad1499b/multidict-6.7.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d1bed1b467ef657f2a0ae62844a607909ef1c6889562de5e1d505f74457d0b96", size = 244895, upload-time = "2025-10-06T14:51:36.189Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/72/09fa7dd487f119b2eb9524946ddd36e2067c08510576d43ff68469563b3b/multidict-6.7.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ca43bdfa5d37bd6aee89d85e1d0831fb86e25541be7e9d376ead1b28974f8e5e", size = 241862, upload-time = "2025-10-06T14:51:41.291Z" },
+    { url = "https://files.pythonhosted.org/packages/65/92/bc1f8bd0853d8669300f732c801974dfc3702c3eeadae2f60cef54dc69d7/multidict-6.7.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:44b546bd3eb645fd26fb949e43c02a25a2e632e2ca21a35e2e132c8105dc8599", size = 232376, upload-time = "2025-10-06T14:51:43.55Z" },
+    { url = "https://files.pythonhosted.org/packages/09/86/ac39399e5cb9d0c2ac8ef6e10a768e4d3bc933ac808d49c41f9dc23337eb/multidict-6.7.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:a6ef16328011d3f468e7ebc326f24c1445f001ca1dec335b2f8e66bed3006394", size = 240272, upload-time = "2025-10-06T14:51:45.265Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/b6/fed5ac6b8563ec72df6cb1ea8dac6d17f0a4a1f65045f66b6d3bf1497c02/multidict-6.7.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:5aa873cbc8e593d361ae65c68f85faadd755c3295ea2c12040ee146802f23b38", size = 248774, upload-time = "2025-10-06T14:51:46.836Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/8d/b954d8c0dc132b68f760aefd45870978deec6818897389dace00fcde32ff/multidict-6.7.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:3d7b6ccce016e29df4b7ca819659f516f0bc7a4b3efa3bb2012ba06431b044f9", size = 242731, upload-time = "2025-10-06T14:51:48.541Z" },
+    { url = "https://files.pythonhosted.org/packages/16/9d/a2dac7009125d3540c2f54e194829ea18ac53716c61b655d8ed300120b0f/multidict-6.7.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:171b73bd4ee683d307599b66793ac80981b06f069b62eea1c9e29c9241aa66b0", size = 240193, upload-time = "2025-10-06T14:51:50.355Z" },
+    { url = "https://files.pythonhosted.org/packages/39/ca/c05f144128ea232ae2178b008d5011d4e2cea86e4ee8c85c2631b1b94802/multidict-6.7.0-cp314-cp314t-win32.whl", hash = "sha256:b2d7f80c4e1fd010b07cb26820aae86b7e73b681ee4889684fb8d2d4537aab13", size = 48023, upload-time = "2025-10-06T14:51:51.883Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/8f/0a60e501584145588be1af5cc829265701ba3c35a64aec8e07cbb71d39bb/multidict-6.7.0-cp314-cp314t-win_amd64.whl", hash = "sha256:09929cab6fcb68122776d575e03c6cc64ee0b8fca48d17e135474b042ce515cd", size = 53507, upload-time = "2025-10-06T14:51:53.672Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/ae/3148b988a9c6239903e786eac19c889fab607c31d6efa7fb2147e5680f23/multidict-6.7.0-cp314-cp314t-win_arm64.whl", hash = "sha256:cc41db090ed742f32bd2d2c721861725e6109681eddf835d0a82bd3a5c382827", size = 44804, upload-time = "2025-10-06T14:51:55.415Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/da/7d22601b625e241d4f23ef1ebff8acfc60da633c9e7e7922e24d10f592b3/multidict-6.7.0-py3-none-any.whl", hash = "sha256:394fc5c42a333c9ffc3e421a4c85e08580d990e08b99f6bf35b4132114c5dcb3", size = 12317, upload-time = "2025-10-06T14:52:29.272Z" },
+]
+
+[[package]]
+name = "pferd"
+source = { editable = "." }
+dependencies = [
+    { name = "aiohttp" },
+    { name = "beautifulsoup4" },
+    { name = "certifi" },
+    { name = "keyring" },
+    { name = "rich" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "aiohttp", specifier = ">=3.8.1" },
+    { name = "beautifulsoup4", specifier = ">=4.10.0" },
+    { name = "certifi", specifier = ">=2021.10.8" },
+    { name = "keyring", specifier = ">=23.5.0" },
+    { name = "rich", specifier = ">=11.0.0" },
+]
+
+[[package]]
+name = "propcache"
+version = "0.4.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/9e/da/e9fc233cf63743258bff22b3dfa7ea5baef7b5bc324af47a0ad89b8ffc6f/propcache-0.4.1.tar.gz", hash = "sha256:f48107a8c637e80362555f37ecf49abe20370e557cc4ab374f04ec4423c97c3d", size = 46442, upload-time = "2025-10-08T19:49:02.291Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8c/d4/4e2c9aaf7ac2242b9358f98dccd8f90f2605402f5afeff6c578682c2c491/propcache-0.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:60a8fda9644b7dfd5dece8c61d8a85e271cb958075bfc4e01083c148b61a7caf", size = 80208, upload-time = "2025-10-08T19:46:24.597Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/21/d7b68e911f9c8e18e4ae43bdbc1e1e9bbd971f8866eb81608947b6f585ff/propcache-0.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c30b53e7e6bda1d547cabb47c825f3843a0a1a42b0496087bb58d8fedf9f41b5", size = 45777, upload-time = "2025-10-08T19:46:25.733Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/1d/11605e99ac8ea9435651ee71ab4cb4bf03f0949586246476a25aadfec54a/propcache-0.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6918ecbd897443087a3b7cd978d56546a812517dcaaca51b49526720571fa93e", size = 47647, upload-time = "2025-10-08T19:46:27.304Z" },
+    { url = "https://files.pythonhosted.org/packages/58/1a/3c62c127a8466c9c843bccb503d40a273e5cc69838805f322e2826509e0d/propcache-0.4.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3d902a36df4e5989763425a8ab9e98cd8ad5c52c823b34ee7ef307fd50582566", size = 214929, upload-time = "2025-10-08T19:46:28.62Z" },
+    { url = "https://files.pythonhosted.org/packages/56/b9/8fa98f850960b367c4b8fe0592e7fc341daa7a9462e925228f10a60cf74f/propcache-0.4.1-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a9695397f85973bb40427dedddf70d8dc4a44b22f1650dd4af9eedf443d45165", size = 221778, upload-time = "2025-10-08T19:46:30.358Z" },
+    { url = "https://files.pythonhosted.org/packages/46/a6/0ab4f660eb59649d14b3d3d65c439421cf2f87fe5dd68591cbe3c1e78a89/propcache-0.4.1-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2bb07ffd7eaad486576430c89f9b215f9e4be68c4866a96e97db9e97fead85dc", size = 228144, upload-time = "2025-10-08T19:46:32.607Z" },
+    { url = "https://files.pythonhosted.org/packages/52/6a/57f43e054fb3d3a56ac9fc532bc684fc6169a26c75c353e65425b3e56eef/propcache-0.4.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fd6f30fdcf9ae2a70abd34da54f18da086160e4d7d9251f81f3da0ff84fc5a48", size = 210030, upload-time = "2025-10-08T19:46:33.969Z" },
+    { url = "https://files.pythonhosted.org/packages/40/e2/27e6feebb5f6b8408fa29f5efbb765cd54c153ac77314d27e457a3e993b7/propcache-0.4.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:fc38cba02d1acba4e2869eef1a57a43dfbd3d49a59bf90dda7444ec2be6a5570", size = 208252, upload-time = "2025-10-08T19:46:35.309Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/f8/91c27b22ccda1dbc7967f921c42825564fa5336a01ecd72eb78a9f4f53c2/propcache-0.4.1-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:67fad6162281e80e882fb3ec355398cf72864a54069d060321f6cd0ade95fe85", size = 202064, upload-time = "2025-10-08T19:46:36.993Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/26/7f00bd6bd1adba5aafe5f4a66390f243acab58eab24ff1a08bebb2ef9d40/propcache-0.4.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f10207adf04d08bec185bae14d9606a1444715bc99180f9331c9c02093e1959e", size = 212429, upload-time = "2025-10-08T19:46:38.398Z" },
+    { url = "https://files.pythonhosted.org/packages/84/89/fd108ba7815c1117ddca79c228f3f8a15fc82a73bca8b142eb5de13b2785/propcache-0.4.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:e9b0d8d0845bbc4cfcdcbcdbf5086886bc8157aa963c31c777ceff7846c77757", size = 216727, upload-time = "2025-10-08T19:46:39.732Z" },
+    { url = "https://files.pythonhosted.org/packages/79/37/3ec3f7e3173e73f1d600495d8b545b53802cbf35506e5732dd8578db3724/propcache-0.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:981333cb2f4c1896a12f4ab92a9cc8f09ea664e9b7dbdc4eff74627af3a11c0f", size = 205097, upload-time = "2025-10-08T19:46:41.025Z" },
+    { url = "https://files.pythonhosted.org/packages/61/b0/b2631c19793f869d35f47d5a3a56fb19e9160d3c119f15ac7344fc3ccae7/propcache-0.4.1-cp311-cp311-win32.whl", hash = "sha256:f1d2f90aeec838a52f1c1a32fe9a619fefd5e411721a9117fbf82aea638fe8a1", size = 38084, upload-time = "2025-10-08T19:46:42.693Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/78/6cce448e2098e9f3bfc91bb877f06aa24b6ccace872e39c53b2f707c4648/propcache-0.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:364426a62660f3f699949ac8c621aad6977be7126c5807ce48c0aeb8e7333ea6", size = 41637, upload-time = "2025-10-08T19:46:43.778Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/e9/754f180cccd7f51a39913782c74717c581b9cc8177ad0e949f4d51812383/propcache-0.4.1-cp311-cp311-win_arm64.whl", hash = "sha256:e53f3a38d3510c11953f3e6a33f205c6d1b001129f972805ca9b42fc308bc239", size = 38064, upload-time = "2025-10-08T19:46:44.872Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/0f/f17b1b2b221d5ca28b4b876e8bb046ac40466513960646bda8e1853cdfa2/propcache-0.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e153e9cd40cc8945138822807139367f256f89c6810c2634a4f6902b52d3b4e2", size = 80061, upload-time = "2025-10-08T19:46:46.075Z" },
+    { url = "https://files.pythonhosted.org/packages/76/47/8ccf75935f51448ba9a16a71b783eb7ef6b9ee60f5d14c7f8a8a79fbeed7/propcache-0.4.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:cd547953428f7abb73c5ad82cbb32109566204260d98e41e5dfdc682eb7f8403", size = 46037, upload-time = "2025-10-08T19:46:47.23Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/b6/5c9a0e42df4d00bfb4a3cbbe5cf9f54260300c88a0e9af1f47ca5ce17ac0/propcache-0.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f048da1b4f243fc44f205dfd320933a951b8d89e0afd4c7cacc762a8b9165207", size = 47324, upload-time = "2025-10-08T19:46:48.384Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/d3/6c7ee328b39a81ee877c962469f1e795f9db87f925251efeb0545e0020d0/propcache-0.4.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ec17c65562a827bba85e3872ead335f95405ea1674860d96483a02f5c698fa72", size = 225505, upload-time = "2025-10-08T19:46:50.055Z" },
+    { url = "https://files.pythonhosted.org/packages/01/5d/1c53f4563490b1d06a684742cc6076ef944bc6457df6051b7d1a877c057b/propcache-0.4.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:405aac25c6394ef275dee4c709be43745d36674b223ba4eb7144bf4d691b7367", size = 230242, upload-time = "2025-10-08T19:46:51.815Z" },
+    { url = "https://files.pythonhosted.org/packages/20/e1/ce4620633b0e2422207c3cb774a0ee61cac13abc6217763a7b9e2e3f4a12/propcache-0.4.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0013cb6f8dde4b2a2f66903b8ba740bdfe378c943c4377a200551ceb27f379e4", size = 238474, upload-time = "2025-10-08T19:46:53.208Z" },
+    { url = "https://files.pythonhosted.org/packages/46/4b/3aae6835b8e5f44ea6a68348ad90f78134047b503765087be2f9912140ea/propcache-0.4.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:15932ab57837c3368b024473a525e25d316d8353016e7cc0e5ba9eb343fbb1cf", size = 221575, upload-time = "2025-10-08T19:46:54.511Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/a5/8a5e8678bcc9d3a1a15b9a29165640d64762d424a16af543f00629c87338/propcache-0.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:031dce78b9dc099f4c29785d9cf5577a3faf9ebf74ecbd3c856a7b92768c3df3", size = 216736, upload-time = "2025-10-08T19:46:56.212Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/63/b7b215eddeac83ca1c6b934f89d09a625aa9ee4ba158338854c87210cc36/propcache-0.4.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:ab08df6c9a035bee56e31af99be621526bd237bea9f32def431c656b29e41778", size = 213019, upload-time = "2025-10-08T19:46:57.595Z" },
+    { url = "https://files.pythonhosted.org/packages/57/74/f580099a58c8af587cac7ba19ee7cb418506342fbbe2d4a4401661cca886/propcache-0.4.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4d7af63f9f93fe593afbf104c21b3b15868efb2c21d07d8732c0c4287e66b6a6", size = 220376, upload-time = "2025-10-08T19:46:59.067Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/ee/542f1313aff7eaf19c2bb758c5d0560d2683dac001a1c96d0774af799843/propcache-0.4.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:cfc27c945f422e8b5071b6e93169679e4eb5bf73bbcbf1ba3ae3a83d2f78ebd9", size = 226988, upload-time = "2025-10-08T19:47:00.544Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/18/9c6b015dd9c6930f6ce2229e1f02fb35298b847f2087ea2b436a5bfa7287/propcache-0.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:35c3277624a080cc6ec6f847cbbbb5b49affa3598c4535a0a4682a697aaa5c75", size = 215615, upload-time = "2025-10-08T19:47:01.968Z" },
+    { url = "https://files.pythonhosted.org/packages/80/9e/e7b85720b98c45a45e1fca6a177024934dc9bc5f4d5dd04207f216fc33ed/propcache-0.4.1-cp312-cp312-win32.whl", hash = "sha256:671538c2262dadb5ba6395e26c1731e1d52534bfe9ae56d0b5573ce539266aa8", size = 38066, upload-time = "2025-10-08T19:47:03.503Z" },
+    { url = "https://files.pythonhosted.org/packages/54/09/d19cff2a5aaac632ec8fc03737b223597b1e347416934c1b3a7df079784c/propcache-0.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:cb2d222e72399fcf5890d1d5cc1060857b9b236adff2792ff48ca2dfd46c81db", size = 41655, upload-time = "2025-10-08T19:47:04.973Z" },
+    { url = "https://files.pythonhosted.org/packages/68/ab/6b5c191bb5de08036a8c697b265d4ca76148efb10fa162f14af14fb5f076/propcache-0.4.1-cp312-cp312-win_arm64.whl", hash = "sha256:204483131fb222bdaaeeea9f9e6c6ed0cac32731f75dfc1d4a567fc1926477c1", size = 37789, upload-time = "2025-10-08T19:47:06.077Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/df/6d9c1b6ac12b003837dde8a10231a7344512186e87b36e855bef32241942/propcache-0.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:43eedf29202c08550aac1d14e0ee619b0430aaef78f85864c1a892294fbc28cf", size = 77750, upload-time = "2025-10-08T19:47:07.648Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/e8/677a0025e8a2acf07d3418a2e7ba529c9c33caf09d3c1f25513023c1db56/propcache-0.4.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d62cdfcfd89ccb8de04e0eda998535c406bf5e060ffd56be6c586cbcc05b3311", size = 44780, upload-time = "2025-10-08T19:47:08.851Z" },
+    { url = "https://files.pythonhosted.org/packages/89/a4/92380f7ca60f99ebae761936bc48a72a639e8a47b29050615eef757cb2a7/propcache-0.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cae65ad55793da34db5f54e4029b89d3b9b9490d8abe1b4c7ab5d4b8ec7ebf74", size = 46308, upload-time = "2025-10-08T19:47:09.982Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/48/c5ac64dee5262044348d1d78a5f85dd1a57464a60d30daee946699963eb3/propcache-0.4.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:333ddb9031d2704a301ee3e506dc46b1fe5f294ec198ed6435ad5b6a085facfe", size = 208182, upload-time = "2025-10-08T19:47:11.319Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/0c/cd762dd011a9287389a6a3eb43aa30207bde253610cca06824aeabfe9653/propcache-0.4.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:fd0858c20f078a32cf55f7e81473d96dcf3b93fd2ccdb3d40fdf54b8573df3af", size = 211215, upload-time = "2025-10-08T19:47:13.146Z" },
+    { url = "https://files.pythonhosted.org/packages/30/3e/49861e90233ba36890ae0ca4c660e95df565b2cd15d4a68556ab5865974e/propcache-0.4.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:678ae89ebc632c5c204c794f8dab2837c5f159aeb59e6ed0539500400577298c", size = 218112, upload-time = "2025-10-08T19:47:14.913Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/8b/544bc867e24e1bd48f3118cecd3b05c694e160a168478fa28770f22fd094/propcache-0.4.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d472aeb4fbf9865e0c6d622d7f4d54a4e101a89715d8904282bb5f9a2f476c3f", size = 204442, upload-time = "2025-10-08T19:47:16.277Z" },
+    { url = "https://files.pythonhosted.org/packages/50/a6/4282772fd016a76d3e5c0df58380a5ea64900afd836cec2c2f662d1b9bb3/propcache-0.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4d3df5fa7e36b3225954fba85589da77a0fe6a53e3976de39caf04a0db4c36f1", size = 199398, upload-time = "2025-10-08T19:47:17.962Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/ec/d8a7cd406ee1ddb705db2139f8a10a8a427100347bd698e7014351c7af09/propcache-0.4.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:ee17f18d2498f2673e432faaa71698032b0127ebf23ae5974eeaf806c279df24", size = 196920, upload-time = "2025-10-08T19:47:19.355Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/6c/f38ab64af3764f431e359f8baf9e0a21013e24329e8b85d2da32e8ed07ca/propcache-0.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:580e97762b950f993ae618e167e7be9256b8353c2dcd8b99ec100eb50f5286aa", size = 203748, upload-time = "2025-10-08T19:47:21.338Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/e3/fa846bd70f6534d647886621388f0a265254d30e3ce47e5c8e6e27dbf153/propcache-0.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:501d20b891688eb8e7aa903021f0b72d5a55db40ffaab27edefd1027caaafa61", size = 205877, upload-time = "2025-10-08T19:47:23.059Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/39/8163fc6f3133fea7b5f2827e8eba2029a0277ab2c5beee6c1db7b10fc23d/propcache-0.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a0bd56e5b100aef69bd8562b74b46254e7c8812918d3baa700c8a8009b0af66", size = 199437, upload-time = "2025-10-08T19:47:24.445Z" },
+    { url = "https://files.pythonhosted.org/packages/93/89/caa9089970ca49c7c01662bd0eeedfe85494e863e8043565aeb6472ce8fe/propcache-0.4.1-cp313-cp313-win32.whl", hash = "sha256:bcc9aaa5d80322bc2fb24bb7accb4a30f81e90ab8d6ba187aec0744bc302ad81", size = 37586, upload-time = "2025-10-08T19:47:25.736Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/ab/f76ec3c3627c883215b5c8080debb4394ef5a7a29be811f786415fc1e6fd/propcache-0.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:381914df18634f5494334d201e98245c0596067504b9372d8cf93f4bb23e025e", size = 40790, upload-time = "2025-10-08T19:47:26.847Z" },
+    { url = "https://files.pythonhosted.org/packages/59/1b/e71ae98235f8e2ba5004d8cb19765a74877abf189bc53fc0c80d799e56c3/propcache-0.4.1-cp313-cp313-win_arm64.whl", hash = "sha256:8873eb4460fd55333ea49b7d189749ecf6e55bf85080f11b1c4530ed3034cba1", size = 37158, upload-time = "2025-10-08T19:47:27.961Z" },
+    { url = "https://files.pythonhosted.org/packages/83/ce/a31bbdfc24ee0dcbba458c8175ed26089cf109a55bbe7b7640ed2470cfe9/propcache-0.4.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:92d1935ee1f8d7442da9c0c4fa7ac20d07e94064184811b685f5c4fada64553b", size = 81451, upload-time = "2025-10-08T19:47:29.445Z" },
+    { url = "https://files.pythonhosted.org/packages/25/9c/442a45a470a68456e710d96cacd3573ef26a1d0a60067e6a7d5e655621ed/propcache-0.4.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:473c61b39e1460d386479b9b2f337da492042447c9b685f28be4f74d3529e566", size = 46374, upload-time = "2025-10-08T19:47:30.579Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/bf/b1d5e21dbc3b2e889ea4327044fb16312a736d97640fb8b6aa3f9c7b3b65/propcache-0.4.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:c0ef0aaafc66fbd87842a3fe3902fd889825646bc21149eafe47be6072725835", size = 48396, upload-time = "2025-10-08T19:47:31.79Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/04/5b4c54a103d480e978d3c8a76073502b18db0c4bc17ab91b3cb5092ad949/propcache-0.4.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f95393b4d66bfae908c3ca8d169d5f79cd65636ae15b5e7a4f6e67af675adb0e", size = 275950, upload-time = "2025-10-08T19:47:33.481Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/c1/86f846827fb969c4b78b0af79bba1d1ea2156492e1b83dea8b8a6ae27395/propcache-0.4.1-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c07fda85708bc48578467e85099645167a955ba093be0a2dcba962195676e859", size = 273856, upload-time = "2025-10-08T19:47:34.906Z" },
+    { url = "https://files.pythonhosted.org/packages/36/1d/fc272a63c8d3bbad6878c336c7a7dea15e8f2d23a544bda43205dfa83ada/propcache-0.4.1-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:af223b406d6d000830c6f65f1e6431783fc3f713ba3e6cc8c024d5ee96170a4b", size = 280420, upload-time = "2025-10-08T19:47:36.338Z" },
+    { url = "https://files.pythonhosted.org/packages/07/0c/01f2219d39f7e53d52e5173bcb09c976609ba30209912a0680adfb8c593a/propcache-0.4.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a78372c932c90ee474559c5ddfffd718238e8673c340dc21fe45c5b8b54559a0", size = 263254, upload-time = "2025-10-08T19:47:37.692Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/18/cd28081658ce597898f0c4d174d4d0f3c5b6d4dc27ffafeef835c95eb359/propcache-0.4.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:564d9f0d4d9509e1a870c920a89b2fec951b44bf5ba7d537a9e7c1ccec2c18af", size = 261205, upload-time = "2025-10-08T19:47:39.659Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/71/1f9e22eb8b8316701c2a19fa1f388c8a3185082607da8e406a803c9b954e/propcache-0.4.1-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:17612831fda0138059cc5546f4d12a2aacfb9e47068c06af35c400ba58ba7393", size = 247873, upload-time = "2025-10-08T19:47:41.084Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/65/3d4b61f36af2b4eddba9def857959f1016a51066b4f1ce348e0cf7881f58/propcache-0.4.1-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:41a89040cb10bd345b3c1a873b2bf36413d48da1def52f268a055f7398514874", size = 262739, upload-time = "2025-10-08T19:47:42.51Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/42/26746ab087faa77c1c68079b228810436ccd9a5ce9ac85e2b7307195fd06/propcache-0.4.1-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:e35b88984e7fa64aacecea39236cee32dd9bd8c55f57ba8a75cf2399553f9bd7", size = 263514, upload-time = "2025-10-08T19:47:43.927Z" },
+    { url = "https://files.pythonhosted.org/packages/94/13/630690fe201f5502d2403dd3cfd451ed8858fe3c738ee88d095ad2ff407b/propcache-0.4.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6f8b465489f927b0df505cbe26ffbeed4d6d8a2bbc61ce90eb074ff129ef0ab1", size = 257781, upload-time = "2025-10-08T19:47:45.448Z" },
+    { url = "https://files.pythonhosted.org/packages/92/f7/1d4ec5841505f423469efbfc381d64b7b467438cd5a4bbcbb063f3b73d27/propcache-0.4.1-cp313-cp313t-win32.whl", hash = "sha256:2ad890caa1d928c7c2965b48f3a3815c853180831d0e5503d35cf00c472f4717", size = 41396, upload-time = "2025-10-08T19:47:47.202Z" },
+    { url = "https://files.pythonhosted.org/packages/48/f0/615c30622316496d2cbbc29f5985f7777d3ada70f23370608c1d3e081c1f/propcache-0.4.1-cp313-cp313t-win_amd64.whl", hash = "sha256:f7ee0e597f495cf415bcbd3da3caa3bd7e816b74d0d52b8145954c5e6fd3ff37", size = 44897, upload-time = "2025-10-08T19:47:48.336Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/ca/6002e46eccbe0e33dcd4069ef32f7f1c9e243736e07adca37ae8c4830ec3/propcache-0.4.1-cp313-cp313t-win_arm64.whl", hash = "sha256:929d7cbe1f01bb7baffb33dc14eb5691c95831450a26354cd210a8155170c93a", size = 39789, upload-time = "2025-10-08T19:47:49.876Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/5c/bca52d654a896f831b8256683457ceddd490ec18d9ec50e97dfd8fc726a8/propcache-0.4.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3f7124c9d820ba5548d431afb4632301acf965db49e666aa21c305cbe8c6de12", size = 78152, upload-time = "2025-10-08T19:47:51.051Z" },
+    { url = "https://files.pythonhosted.org/packages/65/9b/03b04e7d82a5f54fb16113d839f5ea1ede58a61e90edf515f6577c66fa8f/propcache-0.4.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:c0d4b719b7da33599dfe3b22d3db1ef789210a0597bc650b7cee9c77c2be8c5c", size = 44869, upload-time = "2025-10-08T19:47:52.594Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/fa/89a8ef0468d5833a23fff277b143d0573897cf75bd56670a6d28126c7d68/propcache-0.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9f302f4783709a78240ebc311b793f123328716a60911d667e0c036bc5dcbded", size = 46596, upload-time = "2025-10-08T19:47:54.073Z" },
+    { url = "https://files.pythonhosted.org/packages/86/bd/47816020d337f4a746edc42fe8d53669965138f39ee117414c7d7a340cfe/propcache-0.4.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c80ee5802e3fb9ea37938e7eecc307fb984837091d5fd262bb37238b1ae97641", size = 206981, upload-time = "2025-10-08T19:47:55.715Z" },
+    { url = "https://files.pythonhosted.org/packages/df/f6/c5fa1357cc9748510ee55f37173eb31bfde6d94e98ccd9e6f033f2fc06e1/propcache-0.4.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ed5a841e8bb29a55fb8159ed526b26adc5bdd7e8bd7bf793ce647cb08656cdf4", size = 211490, upload-time = "2025-10-08T19:47:57.499Z" },
+    { url = "https://files.pythonhosted.org/packages/80/1e/e5889652a7c4a3846683401a48f0f2e5083ce0ec1a8a5221d8058fbd1adf/propcache-0.4.1-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:55c72fd6ea2da4c318e74ffdf93c4fe4e926051133657459131a95c846d16d44", size = 215371, upload-time = "2025-10-08T19:47:59.317Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/f2/889ad4b2408f72fe1a4f6a19491177b30ea7bf1a0fd5f17050ca08cfc882/propcache-0.4.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8326e144341460402713f91df60ade3c999d601e7eb5ff8f6f7862d54de0610d", size = 201424, upload-time = "2025-10-08T19:48:00.67Z" },
+    { url = "https://files.pythonhosted.org/packages/27/73/033d63069b57b0812c8bd19f311faebeceb6ba31b8f32b73432d12a0b826/propcache-0.4.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:060b16ae65bc098da7f6d25bf359f1f31f688384858204fe5d652979e0015e5b", size = 197566, upload-time = "2025-10-08T19:48:02.604Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/89/ce24f3dc182630b4e07aa6d15f0ff4b14ed4b9955fae95a0b54c58d66c05/propcache-0.4.1-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:89eb3fa9524f7bec9de6e83cf3faed9d79bffa560672c118a96a171a6f55831e", size = 193130, upload-time = "2025-10-08T19:48:04.499Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/24/ef0d5fd1a811fb5c609278d0209c9f10c35f20581fcc16f818da959fc5b4/propcache-0.4.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:dee69d7015dc235f526fe80a9c90d65eb0039103fe565776250881731f06349f", size = 202625, upload-time = "2025-10-08T19:48:06.213Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/02/98ec20ff5546f68d673df2f7a69e8c0d076b5abd05ca882dc7ee3a83653d/propcache-0.4.1-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:5558992a00dfd54ccbc64a32726a3357ec93825a418a401f5cc67df0ac5d9e49", size = 204209, upload-time = "2025-10-08T19:48:08.432Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/87/492694f76759b15f0467a2a93ab68d32859672b646aa8a04ce4864e7932d/propcache-0.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:c9b822a577f560fbd9554812526831712c1436d2c046cedee4c3796d3543b144", size = 197797, upload-time = "2025-10-08T19:48:09.968Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/36/66367de3575db1d2d3f3d177432bd14ee577a39d3f5d1b3d5df8afe3b6e2/propcache-0.4.1-cp314-cp314-win32.whl", hash = "sha256:ab4c29b49d560fe48b696cdcb127dd36e0bc2472548f3bf56cc5cb3da2b2984f", size = 38140, upload-time = "2025-10-08T19:48:11.232Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/2a/a758b47de253636e1b8aef181c0b4f4f204bf0dd964914fb2af90a95b49b/propcache-0.4.1-cp314-cp314-win_amd64.whl", hash = "sha256:5a103c3eb905fcea0ab98be99c3a9a5ab2de60228aa5aceedc614c0281cf6153", size = 41257, upload-time = "2025-10-08T19:48:12.707Z" },
+    { url = "https://files.pythonhosted.org/packages/34/5e/63bd5896c3fec12edcbd6f12508d4890d23c265df28c74b175e1ef9f4f3b/propcache-0.4.1-cp314-cp314-win_arm64.whl", hash = "sha256:74c1fb26515153e482e00177a1ad654721bf9207da8a494a0c05e797ad27b992", size = 38097, upload-time = "2025-10-08T19:48:13.923Z" },
+    { url = "https://files.pythonhosted.org/packages/99/85/9ff785d787ccf9bbb3f3106f79884a130951436f58392000231b4c737c80/propcache-0.4.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:824e908bce90fb2743bd6b59db36eb4f45cd350a39637c9f73b1c1ea66f5b75f", size = 81455, upload-time = "2025-10-08T19:48:15.16Z" },
+    { url = "https://files.pythonhosted.org/packages/90/85/2431c10c8e7ddb1445c1f7c4b54d886e8ad20e3c6307e7218f05922cad67/propcache-0.4.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c2b5e7db5328427c57c8e8831abda175421b709672f6cfc3d630c3b7e2146393", size = 46372, upload-time = "2025-10-08T19:48:16.424Z" },
+    { url = "https://files.pythonhosted.org/packages/01/20/b0972d902472da9bcb683fa595099911f4d2e86e5683bcc45de60dd05dc3/propcache-0.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:6f6ff873ed40292cd4969ef5310179afd5db59fdf055897e282485043fc80ad0", size = 48411, upload-time = "2025-10-08T19:48:17.577Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/e3/7dc89f4f21e8f99bad3d5ddb3a3389afcf9da4ac69e3deb2dcdc96e74169/propcache-0.4.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:49a2dc67c154db2c1463013594c458881a069fcf98940e61a0569016a583020a", size = 275712, upload-time = "2025-10-08T19:48:18.901Z" },
+    { url = "https://files.pythonhosted.org/packages/20/67/89800c8352489b21a8047c773067644e3897f02ecbbd610f4d46b7f08612/propcache-0.4.1-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:005f08e6a0529984491e37d8dbc3dd86f84bd78a8ceb5fa9a021f4c48d4984be", size = 273557, upload-time = "2025-10-08T19:48:20.762Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/a1/b52b055c766a54ce6d9c16d9aca0cad8059acd9637cdf8aa0222f4a026ef/propcache-0.4.1-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5c3310452e0d31390da9035c348633b43d7e7feb2e37be252be6da45abd1abcc", size = 280015, upload-time = "2025-10-08T19:48:22.592Z" },
+    { url = "https://files.pythonhosted.org/packages/48/c8/33cee30bd890672c63743049f3c9e4be087e6780906bfc3ec58528be59c1/propcache-0.4.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4c3c70630930447f9ef1caac7728c8ad1c56bc5015338b20fed0d08ea2480b3a", size = 262880, upload-time = "2025-10-08T19:48:23.947Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/b1/8f08a143b204b418285c88b83d00edbd61afbc2c6415ffafc8905da7038b/propcache-0.4.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8e57061305815dfc910a3634dcf584f08168a8836e6999983569f51a8544cd89", size = 260938, upload-time = "2025-10-08T19:48:25.656Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/12/96e4664c82ca2f31e1c8dff86afb867348979eb78d3cb8546a680287a1e9/propcache-0.4.1-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:521a463429ef54143092c11a77e04056dd00636f72e8c45b70aaa3140d639726", size = 247641, upload-time = "2025-10-08T19:48:27.207Z" },
+    { url = "https://files.pythonhosted.org/packages/18/ed/e7a9cfca28133386ba52278136d42209d3125db08d0a6395f0cba0c0285c/propcache-0.4.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:120c964da3fdc75e3731aa392527136d4ad35868cc556fd09bb6d09172d9a367", size = 262510, upload-time = "2025-10-08T19:48:28.65Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/76/16d8bf65e8845dd62b4e2b57444ab81f07f40caa5652b8969b87ddcf2ef6/propcache-0.4.1-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:d8f353eb14ee3441ee844ade4277d560cdd68288838673273b978e3d6d2c8f36", size = 263161, upload-time = "2025-10-08T19:48:30.133Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/70/c99e9edb5d91d5ad8a49fa3c1e8285ba64f1476782fed10ab251ff413ba1/propcache-0.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ab2943be7c652f09638800905ee1bab2c544e537edb57d527997a24c13dc1455", size = 257393, upload-time = "2025-10-08T19:48:31.567Z" },
+    { url = "https://files.pythonhosted.org/packages/08/02/87b25304249a35c0915d236575bc3574a323f60b47939a2262b77632a3ee/propcache-0.4.1-cp314-cp314t-win32.whl", hash = "sha256:05674a162469f31358c30bcaa8883cb7829fa3110bf9c0991fe27d7896c42d85", size = 42546, upload-time = "2025-10-08T19:48:32.872Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/ef/3c6ecf8b317aa982f309835e8f96987466123c6e596646d4e6a1dfcd080f/propcache-0.4.1-cp314-cp314t-win_amd64.whl", hash = "sha256:990f6b3e2a27d683cb7602ed6c86f15ee6b43b1194736f9baaeb93d0016633b1", size = 46259, upload-time = "2025-10-08T19:48:34.226Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/2d/346e946d4951f37eca1e4f55be0f0174c52cd70720f84029b02f296f4a38/propcache-0.4.1-cp314-cp314t-win_arm64.whl", hash = "sha256:ecef2343af4cc68e05131e45024ba34f6095821988a9d0a02aa7c73fcc448aa9", size = 40428, upload-time = "2025-10-08T19:48:35.441Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/5a/bc7b4a4ef808fa59a816c17b20c4bef6884daebbdf627ff2a161da67da19/propcache-0.4.1-py3-none-any.whl", hash = "sha256:af2a6052aeb6cf17d3e46ee169099044fd8224cbaf75c76a2ef596e8163e2237", size = 13305, upload-time = "2025-10-08T19:49:00.792Z" },
+]
+
+[[package]]
+name = "pycparser"
+version = "2.23"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fe/cf/d2d3b9f5699fb1e4615c8e32ff220203e43b248e1dfcc6736ad9057731ca/pycparser-2.23.tar.gz", hash = "sha256:78816d4f24add8f10a06d6f05b4d424ad9e96cfebf68a4ddc99c65c0720d00c2", size = 173734, upload-time = "2025-09-09T13:23:47.91Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a0/e3/59cd50310fc9b59512193629e1984c1f95e5c8ae6e5d8c69532ccc65a7fe/pycparser-2.23-py3-none-any.whl", hash = "sha256:e5c6e8d3fbad53479cab09ac03729e0a9faf2bee3db8208a550daf5af81a5934", size = 118140, upload-time = "2025-09-09T13:23:46.651Z" },
+]
+
+[[package]]
+name = "pygments"
+version = "2.19.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
+]
+
+[[package]]
+name = "pywin32-ctypes"
+version = "0.2.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/85/9f/01a1a99704853cb63f253eea009390c88e7131c67e66a0a02099a8c917cb/pywin32-ctypes-0.2.3.tar.gz", hash = "sha256:d162dc04946d704503b2edc4d55f3dba5c1d539ead017afa00142c38b9885755", size = 29471, upload-time = "2024-08-14T10:15:34.626Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/de/3d/8161f7711c017e01ac9f008dfddd9410dff3674334c233bde66e7ba65bbf/pywin32_ctypes-0.2.3-py3-none-any.whl", hash = "sha256:8a1513379d709975552d202d942d9837758905c8d01eb82b8bcc30918929e7b8", size = 30756, upload-time = "2024-08-14T10:15:33.187Z" },
+]
+
+[[package]]
+name = "rich"
+version = "14.2.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markdown-it-py" },
+    { name = "pygments" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fb/d2/8920e102050a0de7bfabeb4c4614a49248cf8d5d7a8d01885fbb24dc767a/rich-14.2.0.tar.gz", hash = "sha256:73ff50c7c0c1c77c8243079283f4edb376f0f6442433aecb8ce7e6d0b92d1fe4", size = 219990, upload-time = "2025-10-09T14:16:53.064Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/25/7a/b0178788f8dc6cafce37a212c99565fa1fe7872c70c6c9c1e1a372d9d88f/rich-14.2.0-py3-none-any.whl", hash = "sha256:76bc51fe2e57d2b1be1f96c524b890b816e334ab4c1e45888799bfaab0021edd", size = 243393, upload-time = "2025-10-09T14:16:51.245Z" },
+]
+
+[[package]]
+name = "secretstorage"
+version = "3.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cryptography" },
+    { name = "jeepney" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/31/9f/11ef35cf1027c1339552ea7bfe6aaa74a8516d8b5caf6e7d338daf54fd80/secretstorage-3.4.0.tar.gz", hash = "sha256:c46e216d6815aff8a8a18706a2fbfd8d53fcbb0dce99301881687a1b0289ef7c", size = 19748, upload-time = "2025-09-09T16:42:13.859Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/91/ff/2e2eed29e02c14a5cb6c57f09b2d5b40e65d6cc71f45b52e0be295ccbc2f/secretstorage-3.4.0-py3-none-any.whl", hash = "sha256:0e3b6265c2c63509fb7415717607e4b2c9ab767b7f344a57473b779ca13bd02e", size = 15272, upload-time = "2025-09-09T16:42:12.744Z" },
+]
+
+[[package]]
+name = "soupsieve"
+version = "2.8"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6d/e6/21ccce3262dd4889aa3332e5a119a3491a95e8f60939870a3a035aabac0d/soupsieve-2.8.tar.gz", hash = "sha256:e2dd4a40a628cb5f28f6d4b0db8800b8f581b65bb380b97de22ba5ca8d72572f", size = 103472, upload-time = "2025-08-27T15:39:51.78Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/14/a0/bb38d3b76b8cae341dad93a2dd83ab7462e6dbcdd84d43f54ee60a8dc167/soupsieve-2.8-py3-none-any.whl", hash = "sha256:0cc76456a30e20f5d7f2e14a98a4ae2ee4e5abdc7c5ea0aafe795f344bc7984c", size = 36679, upload-time = "2025-08-27T15:39:50.179Z" },
+]
+
+[[package]]
+name = "typing-extensions"
+version = "4.15.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" },
+]
+
+[[package]]
+name = "yarl"
+version = "1.22.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "idna" },
+    { name = "multidict" },
+    { name = "propcache" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/57/63/0c6ebca57330cd313f6102b16dd57ffaf3ec4c83403dcb45dbd15c6f3ea1/yarl-1.22.0.tar.gz", hash = "sha256:bebf8557577d4401ba8bd9ff33906f1376c877aa78d1fe216ad01b4d6745af71", size = 187169, upload-time = "2025-10-06T14:12:55.963Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4d/27/5ab13fc84c76a0250afd3d26d5936349a35be56ce5785447d6c423b26d92/yarl-1.22.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:1ab72135b1f2db3fed3997d7e7dc1b80573c67138023852b6efb336a5eae6511", size = 141607, upload-time = "2025-10-06T14:09:16.298Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/a1/d065d51d02dc02ce81501d476b9ed2229d9a990818332242a882d5d60340/yarl-1.22.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:669930400e375570189492dc8d8341301578e8493aec04aebc20d4717f899dd6", size = 94027, upload-time = "2025-10-06T14:09:17.786Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/da/8da9f6a53f67b5106ffe902c6fa0164e10398d4e150d85838b82f424072a/yarl-1.22.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:792a2af6d58177ef7c19cbf0097aba92ca1b9cb3ffdd9c7470e156c8f9b5e028", size = 94963, upload-time = "2025-10-06T14:09:19.662Z" },
+    { url = "https://files.pythonhosted.org/packages/68/fe/2c1f674960c376e29cb0bec1249b117d11738db92a6ccc4a530b972648db/yarl-1.22.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ea66b1c11c9150f1372f69afb6b8116f2dd7286f38e14ea71a44eee9ec51b9d", size = 368406, upload-time = "2025-10-06T14:09:21.402Z" },
+    { url = "https://files.pythonhosted.org/packages/95/26/812a540e1c3c6418fec60e9bbd38e871eaba9545e94fa5eff8f4a8e28e1e/yarl-1.22.0-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3e2daa88dc91870215961e96a039ec73e4937da13cf77ce17f9cad0c18df3503", size = 336581, upload-time = "2025-10-06T14:09:22.98Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/f5/5777b19e26fdf98563985e481f8be3d8a39f8734147a6ebf459d0dab5a6b/yarl-1.22.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ba440ae430c00eee41509353628600212112cd5018d5def7e9b05ea7ac34eb65", size = 388924, upload-time = "2025-10-06T14:09:24.655Z" },
+    { url = "https://files.pythonhosted.org/packages/86/08/24bd2477bd59c0bbd994fe1d93b126e0472e4e3df5a96a277b0a55309e89/yarl-1.22.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e6438cc8f23a9c1478633d216b16104a586b9761db62bfacb6425bac0a36679e", size = 392890, upload-time = "2025-10-06T14:09:26.617Z" },
+    { url = "https://files.pythonhosted.org/packages/46/00/71b90ed48e895667ecfb1eaab27c1523ee2fa217433ed77a73b13205ca4b/yarl-1.22.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4c52a6e78aef5cf47a98ef8e934755abf53953379b7d53e68b15ff4420e6683d", size = 365819, upload-time = "2025-10-06T14:09:28.544Z" },
+    { url = "https://files.pythonhosted.org/packages/30/2d/f715501cae832651d3282387c6a9236cd26bd00d0ff1e404b3dc52447884/yarl-1.22.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:3b06bcadaac49c70f4c88af4ffcfbe3dc155aab3163e75777818092478bcbbe7", size = 363601, upload-time = "2025-10-06T14:09:30.568Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/f9/a678c992d78e394e7126ee0b0e4e71bd2775e4334d00a9278c06a6cce96a/yarl-1.22.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:6944b2dc72c4d7f7052683487e3677456050ff77fcf5e6204e98caf785ad1967", size = 358072, upload-time = "2025-10-06T14:09:32.528Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/d1/b49454411a60edb6fefdcad4f8e6dbba7d8019e3a508a1c5836cba6d0781/yarl-1.22.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:d5372ca1df0f91a86b047d1277c2aaf1edb32d78bbcefffc81b40ffd18f027ed", size = 385311, upload-time = "2025-10-06T14:09:34.634Z" },
+    { url = "https://files.pythonhosted.org/packages/87/e5/40d7a94debb8448c7771a916d1861d6609dddf7958dc381117e7ba36d9e8/yarl-1.22.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:51af598701f5299012b8416486b40fceef8c26fc87dc6d7d1f6fc30609ea0aa6", size = 381094, upload-time = "2025-10-06T14:09:36.268Z" },
+    { url = "https://files.pythonhosted.org/packages/35/d8/611cc282502381ad855448643e1ad0538957fc82ae83dfe7762c14069e14/yarl-1.22.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b266bd01fedeffeeac01a79ae181719ff848a5a13ce10075adbefc8f1daee70e", size = 370944, upload-time = "2025-10-06T14:09:37.872Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/df/fadd00fb1c90e1a5a8bd731fa3d3de2e165e5a3666a095b04e31b04d9cb6/yarl-1.22.0-cp311-cp311-win32.whl", hash = "sha256:a9b1ba5610a4e20f655258d5a1fdc7ebe3d837bb0e45b581398b99eb98b1f5ca", size = 81804, upload-time = "2025-10-06T14:09:39.359Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/f7/149bb6f45f267cb5c074ac40c01c6b3ea6d8a620d34b337f6321928a1b4d/yarl-1.22.0-cp311-cp311-win_amd64.whl", hash = "sha256:078278b9b0b11568937d9509b589ee83ef98ed6d561dfe2020e24a9fd08eaa2b", size = 86858, upload-time = "2025-10-06T14:09:41.068Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/13/88b78b93ad3f2f0b78e13bfaaa24d11cbc746e93fe76d8c06bf139615646/yarl-1.22.0-cp311-cp311-win_arm64.whl", hash = "sha256:b6a6f620cfe13ccec221fa312139135166e47ae169f8253f72a0abc0dae94376", size = 81637, upload-time = "2025-10-06T14:09:42.712Z" },
+    { url = "https://files.pythonhosted.org/packages/75/ff/46736024fee3429b80a165a732e38e5d5a238721e634ab41b040d49f8738/yarl-1.22.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e340382d1afa5d32b892b3ff062436d592ec3d692aeea3bef3a5cfe11bbf8c6f", size = 142000, upload-time = "2025-10-06T14:09:44.631Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/9a/b312ed670df903145598914770eb12de1bac44599549b3360acc96878df8/yarl-1.22.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f1e09112a2c31ffe8d80be1b0988fa6a18c5d5cad92a9ffbb1c04c91bfe52ad2", size = 94338, upload-time = "2025-10-06T14:09:46.372Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/f5/0601483296f09c3c65e303d60c070a5c19fcdbc72daa061e96170785bc7d/yarl-1.22.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:939fe60db294c786f6b7c2d2e121576628468f65453d86b0fe36cb52f987bd74", size = 94909, upload-time = "2025-10-06T14:09:48.648Z" },
+    { url = "https://files.pythonhosted.org/packages/60/41/9a1fe0b73dbcefce72e46cf149b0e0a67612d60bfc90fb59c2b2efdfbd86/yarl-1.22.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e1651bf8e0398574646744c1885a41198eba53dc8a9312b954073f845c90a8df", size = 372940, upload-time = "2025-10-06T14:09:50.089Z" },
+    { url = "https://files.pythonhosted.org/packages/17/7a/795cb6dfee561961c30b800f0ed616b923a2ec6258b5def2a00bf8231334/yarl-1.22.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b8a0588521a26bf92a57a1705b77b8b59044cdceccac7151bd8d229e66b8dedb", size = 345825, upload-time = "2025-10-06T14:09:52.142Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/93/a58f4d596d2be2ae7bab1a5846c4d270b894958845753b2c606d666744d3/yarl-1.22.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:42188e6a615c1a75bcaa6e150c3fe8f3e8680471a6b10150c5f7e83f47cc34d2", size = 386705, upload-time = "2025-10-06T14:09:54.128Z" },
+    { url = "https://files.pythonhosted.org/packages/61/92/682279d0e099d0e14d7fd2e176bd04f48de1484f56546a3e1313cd6c8e7c/yarl-1.22.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f6d2cb59377d99718913ad9a151030d6f83ef420a2b8f521d94609ecc106ee82", size = 396518, upload-time = "2025-10-06T14:09:55.762Z" },
+    { url = "https://files.pythonhosted.org/packages/db/0f/0d52c98b8a885aeda831224b78f3be7ec2e1aa4a62091f9f9188c3c65b56/yarl-1.22.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:50678a3b71c751d58d7908edc96d332af328839eea883bb554a43f539101277a", size = 377267, upload-time = "2025-10-06T14:09:57.958Z" },
+    { url = "https://files.pythonhosted.org/packages/22/42/d2685e35908cbeaa6532c1fc73e89e7f2efb5d8a7df3959ea8e37177c5a3/yarl-1.22.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1e8fbaa7cec507aa24ea27a01456e8dd4b6fab829059b69844bd348f2d467124", size = 365797, upload-time = "2025-10-06T14:09:59.527Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/83/cf8c7bcc6355631762f7d8bdab920ad09b82efa6b722999dfb05afa6cfac/yarl-1.22.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:433885ab5431bc3d3d4f2f9bd15bfa1614c522b0f1405d62c4f926ccd69d04fa", size = 365535, upload-time = "2025-10-06T14:10:01.139Z" },
+    { url = "https://files.pythonhosted.org/packages/25/e1/5302ff9b28f0c59cac913b91fe3f16c59a033887e57ce9ca5d41a3a94737/yarl-1.22.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:b790b39c7e9a4192dc2e201a282109ed2985a1ddbd5ac08dc56d0e121400a8f7", size = 382324, upload-time = "2025-10-06T14:10:02.756Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/cd/4617eb60f032f19ae3a688dc990d8f0d89ee0ea378b61cac81ede3e52fae/yarl-1.22.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:31f0b53913220599446872d757257be5898019c85e7971599065bc55065dc99d", size = 383803, upload-time = "2025-10-06T14:10:04.552Z" },
+    { url = "https://files.pythonhosted.org/packages/59/65/afc6e62bb506a319ea67b694551dab4a7e6fb7bf604e9bd9f3e11d575fec/yarl-1.22.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a49370e8f711daec68d09b821a34e1167792ee2d24d405cbc2387be4f158b520", size = 374220, upload-time = "2025-10-06T14:10:06.489Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/3d/68bf18d50dc674b942daec86a9ba922d3113d8399b0e52b9897530442da2/yarl-1.22.0-cp312-cp312-win32.whl", hash = "sha256:70dfd4f241c04bd9239d53b17f11e6ab672b9f1420364af63e8531198e3f5fe8", size = 81589, upload-time = "2025-10-06T14:10:09.254Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/9a/6ad1a9b37c2f72874f93e691b2e7ecb6137fb2b899983125db4204e47575/yarl-1.22.0-cp312-cp312-win_amd64.whl", hash = "sha256:8884d8b332a5e9b88e23f60bb166890009429391864c685e17bd73a9eda9105c", size = 87213, upload-time = "2025-10-06T14:10:11.369Z" },
+    { url = "https://files.pythonhosted.org/packages/44/c5/c21b562d1680a77634d748e30c653c3ca918beb35555cff24986fff54598/yarl-1.22.0-cp312-cp312-win_arm64.whl", hash = "sha256:ea70f61a47f3cc93bdf8b2f368ed359ef02a01ca6393916bc8ff877427181e74", size = 81330, upload-time = "2025-10-06T14:10:13.112Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/f3/d67de7260456ee105dc1d162d43a019ecad6b91e2f51809d6cddaa56690e/yarl-1.22.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8dee9c25c74997f6a750cd317b8ca63545169c098faee42c84aa5e506c819b53", size = 139980, upload-time = "2025-10-06T14:10:14.601Z" },
+    { url = "https://files.pythonhosted.org/packages/01/88/04d98af0b47e0ef42597b9b28863b9060bb515524da0a65d5f4db160b2d5/yarl-1.22.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:01e73b85a5434f89fc4fe27dcda2aff08ddf35e4d47bbbea3bdcd25321af538a", size = 93424, upload-time = "2025-10-06T14:10:16.115Z" },
+    { url = "https://files.pythonhosted.org/packages/18/91/3274b215fd8442a03975ce6bee5fe6aa57a8326b29b9d3d56234a1dca244/yarl-1.22.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:22965c2af250d20c873cdbee8ff958fb809940aeb2e74ba5f20aaf6b7ac8c70c", size = 93821, upload-time = "2025-10-06T14:10:17.993Z" },
+    { url = "https://files.pythonhosted.org/packages/61/3a/caf4e25036db0f2da4ca22a353dfeb3c9d3c95d2761ebe9b14df8fc16eb0/yarl-1.22.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b4f15793aa49793ec8d1c708ab7f9eded1aa72edc5174cae703651555ed1b601", size = 373243, upload-time = "2025-10-06T14:10:19.44Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/9e/51a77ac7516e8e7803b06e01f74e78649c24ee1021eca3d6a739cb6ea49c/yarl-1.22.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5542339dcf2747135c5c85f68680353d5cb9ffd741c0f2e8d832d054d41f35a", size = 342361, upload-time = "2025-10-06T14:10:21.124Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/f8/33b92454789dde8407f156c00303e9a891f1f51a0330b0fad7c909f87692/yarl-1.22.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5c401e05ad47a75869c3ab3e35137f8468b846770587e70d71e11de797d113df", size = 387036, upload-time = "2025-10-06T14:10:22.902Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/9a/c5db84ea024f76838220280f732970aa4ee154015d7f5c1bfb60a267af6f/yarl-1.22.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:243dda95d901c733f5b59214d28b0120893d91777cb8aa043e6ef059d3cddfe2", size = 397671, upload-time = "2025-10-06T14:10:24.523Z" },
+    { url = "https://files.pythonhosted.org/packages/11/c9/cd8538dc2e7727095e0c1d867bad1e40c98f37763e6d995c1939f5fdc7b1/yarl-1.22.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bec03d0d388060058f5d291a813f21c011041938a441c593374da6077fe21b1b", size = 377059, upload-time = "2025-10-06T14:10:26.406Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/b9/ab437b261702ced75122ed78a876a6dec0a1b0f5e17a4ac7a9a2482d8abe/yarl-1.22.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b0748275abb8c1e1e09301ee3cf90c8a99678a4e92e4373705f2a2570d581273", size = 365356, upload-time = "2025-10-06T14:10:28.461Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/9d/8e1ae6d1d008a9567877b08f0ce4077a29974c04c062dabdb923ed98e6fe/yarl-1.22.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:47fdb18187e2a4e18fda2c25c05d8251a9e4a521edaed757fef033e7d8498d9a", size = 361331, upload-time = "2025-10-06T14:10:30.541Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/5a/09b7be3905962f145b73beb468cdd53db8aa171cf18c80400a54c5b82846/yarl-1.22.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c7044802eec4524fde550afc28edda0dd5784c4c45f0be151a2d3ba017daca7d", size = 382590, upload-time = "2025-10-06T14:10:33.352Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/7f/59ec509abf90eda5048b0bc3e2d7b5099dffdb3e6b127019895ab9d5ef44/yarl-1.22.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:139718f35149ff544caba20fce6e8a2f71f1e39b92c700d8438a0b1d2a631a02", size = 385316, upload-time = "2025-10-06T14:10:35.034Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/84/891158426bc8036bfdfd862fabd0e0fa25df4176ec793e447f4b85cf1be4/yarl-1.22.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e1b51bebd221006d3d2f95fbe124b22b247136647ae5dcc8c7acafba66e5ee67", size = 374431, upload-time = "2025-10-06T14:10:37.76Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/49/03da1580665baa8bef5e8ed34c6df2c2aca0a2f28bf397ed238cc1bbc6f2/yarl-1.22.0-cp313-cp313-win32.whl", hash = "sha256:d3e32536234a95f513bd374e93d717cf6b2231a791758de6c509e3653f234c95", size = 81555, upload-time = "2025-10-06T14:10:39.649Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/ee/450914ae11b419eadd067c6183ae08381cfdfcb9798b90b2b713bbebddda/yarl-1.22.0-cp313-cp313-win_amd64.whl", hash = "sha256:47743b82b76d89a1d20b83e60d5c20314cbd5ba2befc9cda8f28300c4a08ed4d", size = 86965, upload-time = "2025-10-06T14:10:41.313Z" },
+    { url = "https://files.pythonhosted.org/packages/98/4d/264a01eae03b6cf629ad69bae94e3b0e5344741e929073678e84bf7a3e3b/yarl-1.22.0-cp313-cp313-win_arm64.whl", hash = "sha256:5d0fcda9608875f7d052eff120c7a5da474a6796fe4d83e152e0e4d42f6d1a9b", size = 81205, upload-time = "2025-10-06T14:10:43.167Z" },
+    { url = "https://files.pythonhosted.org/packages/88/fc/6908f062a2f77b5f9f6d69cecb1747260831ff206adcbc5b510aff88df91/yarl-1.22.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:719ae08b6972befcba4310e49edb1161a88cdd331e3a694b84466bd938a6ab10", size = 146209, upload-time = "2025-10-06T14:10:44.643Z" },
+    { url = "https://files.pythonhosted.org/packages/65/47/76594ae8eab26210b4867be6f49129861ad33da1f1ebdf7051e98492bf62/yarl-1.22.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:47d8a5c446df1c4db9d21b49619ffdba90e77c89ec6e283f453856c74b50b9e3", size = 95966, upload-time = "2025-10-06T14:10:46.554Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/ce/05e9828a49271ba6b5b038b15b3934e996980dd78abdfeb52a04cfb9467e/yarl-1.22.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cfebc0ac8333520d2d0423cbbe43ae43c8838862ddb898f5ca68565e395516e9", size = 97312, upload-time = "2025-10-06T14:10:48.007Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/c5/7dffad5e4f2265b29c9d7ec869c369e4223166e4f9206fc2243ee9eea727/yarl-1.22.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4398557cbf484207df000309235979c79c4356518fd5c99158c7d38203c4da4f", size = 361967, upload-time = "2025-10-06T14:10:49.997Z" },
+    { url = "https://files.pythonhosted.org/packages/50/b2/375b933c93a54bff7fc041e1a6ad2c0f6f733ffb0c6e642ce56ee3b39970/yarl-1.22.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:2ca6fd72a8cd803be290d42f2dec5cdcd5299eeb93c2d929bf060ad9efaf5de0", size = 323949, upload-time = "2025-10-06T14:10:52.004Z" },
+    { url = "https://files.pythonhosted.org/packages/66/50/bfc2a29a1d78644c5a7220ce2f304f38248dc94124a326794e677634b6cf/yarl-1.22.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ca1f59c4e1ab6e72f0a23c13fca5430f889634166be85dbf1013683e49e3278e", size = 361818, upload-time = "2025-10-06T14:10:54.078Z" },
+    { url = "https://files.pythonhosted.org/packages/46/96/f3941a46af7d5d0f0498f86d71275696800ddcdd20426298e572b19b91ff/yarl-1.22.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6c5010a52015e7c70f86eb967db0f37f3c8bd503a695a49f8d45700144667708", size = 372626, upload-time = "2025-10-06T14:10:55.767Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/42/8b27c83bb875cd89448e42cd627e0fb971fa1675c9ec546393d18826cb50/yarl-1.22.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d7672ecf7557476642c88497c2f8d8542f8e36596e928e9bcba0e42e1e7d71f", size = 341129, upload-time = "2025-10-06T14:10:57.985Z" },
+    { url = "https://files.pythonhosted.org/packages/49/36/99ca3122201b382a3cf7cc937b95235b0ac944f7e9f2d5331d50821ed352/yarl-1.22.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:3b7c88eeef021579d600e50363e0b6ee4f7f6f728cd3486b9d0f3ee7b946398d", size = 346776, upload-time = "2025-10-06T14:10:59.633Z" },
+    { url = "https://files.pythonhosted.org/packages/85/b4/47328bf996acd01a4c16ef9dcd2f59c969f495073616586f78cd5f2efb99/yarl-1.22.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:f4afb5c34f2c6fecdcc182dfcfc6af6cccf1aa923eed4d6a12e9d96904e1a0d8", size = 334879, upload-time = "2025-10-06T14:11:01.454Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/ad/b77d7b3f14a4283bffb8e92c6026496f6de49751c2f97d4352242bba3990/yarl-1.22.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:59c189e3e99a59cf8d83cbb31d4db02d66cda5a1a4374e8a012b51255341abf5", size = 350996, upload-time = "2025-10-06T14:11:03.452Z" },
+    { url = "https://files.pythonhosted.org/packages/81/c8/06e1d69295792ba54d556f06686cbd6a7ce39c22307100e3fb4a2c0b0a1d/yarl-1.22.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:5a3bf7f62a289fa90f1990422dc8dff5a458469ea71d1624585ec3a4c8d6960f", size = 356047, upload-time = "2025-10-06T14:11:05.115Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/b8/4c0e9e9f597074b208d18cef227d83aac36184bfbc6eab204ea55783dbc5/yarl-1.22.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:de6b9a04c606978fdfe72666fa216ffcf2d1a9f6a381058d4378f8d7b1e5de62", size = 342947, upload-time = "2025-10-06T14:11:08.137Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/e5/11f140a58bf4c6ad7aca69a892bff0ee638c31bea4206748fc0df4ebcb3a/yarl-1.22.0-cp313-cp313t-win32.whl", hash = "sha256:1834bb90991cc2999f10f97f5f01317f99b143284766d197e43cd5b45eb18d03", size = 86943, upload-time = "2025-10-06T14:11:10.284Z" },
+    { url = "https://files.pythonhosted.org/packages/31/74/8b74bae38ed7fe6793d0c15a0c8207bbb819cf287788459e5ed230996cdd/yarl-1.22.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ff86011bd159a9d2dfc89c34cfd8aff12875980e3bd6a39ff097887520e60249", size = 93715, upload-time = "2025-10-06T14:11:11.739Z" },
+    { url = "https://files.pythonhosted.org/packages/69/66/991858aa4b5892d57aef7ee1ba6b4d01ec3b7eb3060795d34090a3ca3278/yarl-1.22.0-cp313-cp313t-win_arm64.whl", hash = "sha256:7861058d0582b847bc4e3a4a4c46828a410bca738673f35a29ba3ca5db0b473b", size = 83857, upload-time = "2025-10-06T14:11:13.586Z" },
+    { url = "https://files.pythonhosted.org/packages/46/b3/e20ef504049f1a1c54a814b4b9bed96d1ac0e0610c3b4da178f87209db05/yarl-1.22.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:34b36c2c57124530884d89d50ed2c1478697ad7473efd59cfd479945c95650e4", size = 140520, upload-time = "2025-10-06T14:11:15.465Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/04/3532d990fdbab02e5ede063676b5c4260e7f3abea2151099c2aa745acc4c/yarl-1.22.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:0dd9a702591ca2e543631c2a017e4a547e38a5c0f29eece37d9097e04a7ac683", size = 93504, upload-time = "2025-10-06T14:11:17.106Z" },
+    { url = "https://files.pythonhosted.org/packages/11/63/ff458113c5c2dac9a9719ac68ee7c947cb621432bcf28c9972b1c0e83938/yarl-1.22.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:594fcab1032e2d2cc3321bb2e51271e7cd2b516c7d9aee780ece81b07ff8244b", size = 94282, upload-time = "2025-10-06T14:11:19.064Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/bc/315a56aca762d44a6aaaf7ad253f04d996cb6b27bad34410f82d76ea8038/yarl-1.22.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f3d7a87a78d46a2e3d5b72587ac14b4c16952dd0887dbb051451eceac774411e", size = 372080, upload-time = "2025-10-06T14:11:20.996Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/3f/08e9b826ec2e099ea6e7c69a61272f4f6da62cb5b1b63590bb80ca2e4a40/yarl-1.22.0-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:852863707010316c973162e703bddabec35e8757e67fcb8ad58829de1ebc8590", size = 338696, upload-time = "2025-10-06T14:11:22.847Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/9f/90360108e3b32bd76789088e99538febfea24a102380ae73827f62073543/yarl-1.22.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:131a085a53bfe839a477c0845acf21efc77457ba2bcf5899618136d64f3303a2", size = 387121, upload-time = "2025-10-06T14:11:24.889Z" },
+    { url = "https://files.pythonhosted.org/packages/98/92/ab8d4657bd5b46a38094cfaea498f18bb70ce6b63508fd7e909bd1f93066/yarl-1.22.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:078a8aefd263f4d4f923a9677b942b445a2be970ca24548a8102689a3a8ab8da", size = 394080, upload-time = "2025-10-06T14:11:27.307Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/e7/d8c5a7752fef68205296201f8ec2bf718f5c805a7a7e9880576c67600658/yarl-1.22.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bca03b91c323036913993ff5c738d0842fc9c60c4648e5c8d98331526df89784", size = 372661, upload-time = "2025-10-06T14:11:29.387Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/2e/f4d26183c8db0bb82d491b072f3127fb8c381a6206a3a56332714b79b751/yarl-1.22.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:68986a61557d37bb90d3051a45b91fa3d5c516d177dfc6dd6f2f436a07ff2b6b", size = 364645, upload-time = "2025-10-06T14:11:31.423Z" },
+    { url = "https://files.pythonhosted.org/packages/80/7c/428e5812e6b87cd00ee8e898328a62c95825bf37c7fa87f0b6bb2ad31304/yarl-1.22.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:4792b262d585ff0dff6bcb787f8492e40698443ec982a3568c2096433660c694", size = 355361, upload-time = "2025-10-06T14:11:33.055Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/2a/249405fd26776f8b13c067378ef4d7dd49c9098d1b6457cdd152a99e96a9/yarl-1.22.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:ebd4549b108d732dba1d4ace67614b9545b21ece30937a63a65dd34efa19732d", size = 381451, upload-time = "2025-10-06T14:11:35.136Z" },
+    { url = "https://files.pythonhosted.org/packages/67/a8/fb6b1adbe98cf1e2dd9fad71003d3a63a1bc22459c6e15f5714eb9323b93/yarl-1.22.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:f87ac53513d22240c7d59203f25cc3beac1e574c6cd681bbfd321987b69f95fd", size = 383814, upload-time = "2025-10-06T14:11:37.094Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/f9/3aa2c0e480fb73e872ae2814c43bc1e734740bb0d54e8cb2a95925f98131/yarl-1.22.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:22b029f2881599e2f1b06f8f1db2ee63bd309e2293ba2d566e008ba12778b8da", size = 370799, upload-time = "2025-10-06T14:11:38.83Z" },
+    { url = "https://files.pythonhosted.org/packages/50/3c/af9dba3b8b5eeb302f36f16f92791f3ea62e3f47763406abf6d5a4a3333b/yarl-1.22.0-cp314-cp314-win32.whl", hash = "sha256:6a635ea45ba4ea8238463b4f7d0e721bad669f80878b7bfd1f89266e2ae63da2", size = 82990, upload-time = "2025-10-06T14:11:40.624Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/30/ac3a0c5bdc1d6efd1b41fa24d4897a4329b3b1e98de9449679dd327af4f0/yarl-1.22.0-cp314-cp314-win_amd64.whl", hash = "sha256:0d6e6885777af0f110b0e5d7e5dda8b704efed3894da26220b7f3d887b839a79", size = 88292, upload-time = "2025-10-06T14:11:42.578Z" },
+    { url = "https://files.pythonhosted.org/packages/df/0a/227ab4ff5b998a1b7410abc7b46c9b7a26b0ca9e86c34ba4b8d8bc7c63d5/yarl-1.22.0-cp314-cp314-win_arm64.whl", hash = "sha256:8218f4e98d3c10d683584cb40f0424f4b9fd6e95610232dd75e13743b070ee33", size = 82888, upload-time = "2025-10-06T14:11:44.863Z" },
+    { url = "https://files.pythonhosted.org/packages/06/5e/a15eb13db90abd87dfbefb9760c0f3f257ac42a5cac7e75dbc23bed97a9f/yarl-1.22.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:45c2842ff0e0d1b35a6bf1cd6c690939dacb617a70827f715232b2e0494d55d1", size = 146223, upload-time = "2025-10-06T14:11:46.796Z" },
+    { url = "https://files.pythonhosted.org/packages/18/82/9665c61910d4d84f41a5bf6837597c89e665fa88aa4941080704645932a9/yarl-1.22.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:d947071e6ebcf2e2bee8fce76e10faca8f7a14808ca36a910263acaacef08eca", size = 95981, upload-time = "2025-10-06T14:11:48.845Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/9a/2f65743589809af4d0a6d3aa749343c4b5f4c380cc24a8e94a3c6625a808/yarl-1.22.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:334b8721303e61b00019474cc103bdac3d7b1f65e91f0bfedeec2d56dfe74b53", size = 97303, upload-time = "2025-10-06T14:11:50.897Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/ab/5b13d3e157505c43c3b43b5a776cbf7b24a02bc4cccc40314771197e3508/yarl-1.22.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1e7ce67c34138a058fd092f67d07a72b8e31ff0c9236e751957465a24b28910c", size = 361820, upload-time = "2025-10-06T14:11:52.549Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/76/242a5ef4677615cf95330cfc1b4610e78184400699bdda0acb897ef5e49a/yarl-1.22.0-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d77e1b2c6d04711478cb1c4ab90db07f1609ccf06a287d5607fcd90dc9863acf", size = 323203, upload-time = "2025-10-06T14:11:54.225Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/96/475509110d3f0153b43d06164cf4195c64d16999e0c7e2d8a099adcd6907/yarl-1.22.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c4647674b6150d2cae088fc07de2738a84b8bcedebef29802cf0b0a82ab6face", size = 363173, upload-time = "2025-10-06T14:11:56.069Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/66/59db471aecfbd559a1fd48aedd954435558cd98c7d0da8b03cc6c140a32c/yarl-1.22.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:efb07073be061c8f79d03d04139a80ba33cbd390ca8f0297aae9cce6411e4c6b", size = 373562, upload-time = "2025-10-06T14:11:58.783Z" },
+    { url = "https://files.pythonhosted.org/packages/03/1f/c5d94abc91557384719da10ff166b916107c1b45e4d0423a88457071dd88/yarl-1.22.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e51ac5435758ba97ad69617e13233da53908beccc6cfcd6c34bbed8dcbede486", size = 339828, upload-time = "2025-10-06T14:12:00.686Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/97/aa6a143d3afba17b6465733681c70cf175af89f76ec8d9286e08437a7454/yarl-1.22.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:33e32a0dd0c8205efa8e83d04fc9f19313772b78522d1bdc7d9aed706bfd6138", size = 347551, upload-time = "2025-10-06T14:12:02.628Z" },
+    { url = "https://files.pythonhosted.org/packages/43/3c/45a2b6d80195959239a7b2a8810506d4eea5487dce61c2a3393e7fc3c52e/yarl-1.22.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:bf4a21e58b9cde0e401e683ebd00f6ed30a06d14e93f7c8fd059f8b6e8f87b6a", size = 334512, upload-time = "2025-10-06T14:12:04.871Z" },
+    { url = "https://files.pythonhosted.org/packages/86/a0/c2ab48d74599c7c84cb104ebd799c5813de252bea0f360ffc29d270c2caa/yarl-1.22.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:e4b582bab49ac33c8deb97e058cd67c2c50dac0dd134874106d9c774fd272529", size = 352400, upload-time = "2025-10-06T14:12:06.624Z" },
+    { url = "https://files.pythonhosted.org/packages/32/75/f8919b2eafc929567d3d8411f72bdb1a2109c01caaab4ebfa5f8ffadc15b/yarl-1.22.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:0b5bcc1a9c4839e7e30b7b30dd47fe5e7e44fb7054ec29b5bb8d526aa1041093", size = 357140, upload-time = "2025-10-06T14:12:08.362Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/72/6a85bba382f22cf78add705d8c3731748397d986e197e53ecc7835e76de7/yarl-1.22.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c0232bce2170103ec23c454e54a57008a9a72b5d1c3105dc2496750da8cfa47c", size = 341473, upload-time = "2025-10-06T14:12:10.994Z" },
+    { url = "https://files.pythonhosted.org/packages/35/18/55e6011f7c044dc80b98893060773cefcfdbf60dfefb8cb2f58b9bacbd83/yarl-1.22.0-cp314-cp314t-win32.whl", hash = "sha256:8009b3173bcd637be650922ac455946197d858b3630b6d8787aa9e5c4564533e", size = 89056, upload-time = "2025-10-06T14:12:13.317Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/86/0f0dccb6e59a9e7f122c5afd43568b1d31b8ab7dda5f1b01fb5c7025c9a9/yarl-1.22.0-cp314-cp314t-win_amd64.whl", hash = "sha256:9fb17ea16e972c63d25d4a97f016d235c78dd2344820eb35bc034bc32012ee27", size = 96292, upload-time = "2025-10-06T14:12:15.398Z" },
+    { url = "https://files.pythonhosted.org/packages/48/b7/503c98092fb3b344a179579f55814b613c1fbb1c23b3ec14a7b008a66a6e/yarl-1.22.0-cp314-cp314t-win_arm64.whl", hash = "sha256:9f6d73c1436b934e3f01df1e1b21ff765cd1d28c77dfb9ace207f746d4610ee1", size = 85171, upload-time = "2025-10-06T14:12:16.935Z" },
+    { url = "https://files.pythonhosted.org/packages/73/ae/b48f95715333080afb75a4504487cbe142cae1268afc482d06692d605ae6/yarl-1.22.0-py3-none-any.whl", hash = "sha256:1380560bdba02b6b6c90de54133c81c9f2a453dee9912fe58c1dcced1edb7cff", size = 46814, upload-time = "2025-10-06T14:12:53.872Z" },
+]
+
+[[package]]
+name = "zipp"
+version = "3.23.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e3/02/0f2892c661036d50ede074e376733dca2ae7c6eb617489437771209d4180/zipp-3.23.0.tar.gz", hash = "sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166", size = 25547, upload-time = "2025-06-08T17:06:39.4Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2e/54/647ade08bf0db230bfea292f893923872fd20be6ac6f53b2b936ba839d75/zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e", size = 10276, upload-time = "2025-06-08T17:06:38.034Z" },
+]

From 6e563134b2e31b4ad939929de5973eefec04bdc5 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 19 Oct 2025 15:25:40 +0200
Subject: [PATCH 514/524] Fix ruff errors

---
 PFERD/auth/__init__.py                     |  4 +-
 PFERD/auth/authenticator.py                |  3 +-
 PFERD/auth/credential_file.py              |  7 +-
 PFERD/auth/keyring.py                      |  4 +-
 PFERD/auth/pass_.py                        |  9 +--
 PFERD/auth/simple.py                       |  4 +-
 PFERD/auth/tfa.py                          |  4 +-
 PFERD/cli/parser.py                        |  9 ++-
 PFERD/config.py                            | 26 +++---
 PFERD/crawl/__init__.py                    |  6 +-
 PFERD/crawl/crawler.py                     | 14 ++--
 PFERD/crawl/http_crawler.py                |  8 +-
 PFERD/crawl/ilias/async_helper.py          |  7 +-
 PFERD/crawl/ilias/file_templates.py        |  6 +-
 PFERD/crawl/ilias/ilias_web_crawler.py     | 93 ++++++++++------------
 PFERD/crawl/ilias/kit_ilias_html.py        | 58 +++++++-------
 PFERD/crawl/ilias/kit_ilias_web_crawler.py |  4 +-
 PFERD/crawl/kit_ipd_crawler.py             | 16 ++--
 PFERD/deduplicator.py                      |  4 +-
 PFERD/limiter.py                           |  3 +-
 PFERD/logging.py                           |  5 +-
 PFERD/output_dir.py                        | 19 +++--
 PFERD/pferd.py                             | 27 +++----
 PFERD/report.py                            | 30 +++----
 PFERD/transformer.py                       | 26 +++---
 PFERD/utils.py                             |  7 +-
 26 files changed, 194 insertions(+), 209 deletions(-)

diff --git a/PFERD/auth/__init__.py b/PFERD/auth/__init__.py
index 80d4586..7295c7a 100644
--- a/PFERD/auth/__init__.py
+++ b/PFERD/auth/__init__.py
@@ -1,5 +1,5 @@
+from collections.abc import Callable
 from configparser import SectionProxy
-from typing import Callable, Dict
 
 from ..config import Config
 from .authenticator import Authenticator, AuthError, AuthLoadError, AuthSection  # noqa: F401
@@ -18,7 +18,7 @@ AuthConstructor = Callable[
     Authenticator,
 ]
 
-AUTHENTICATORS: Dict[str, AuthConstructor] = {
+AUTHENTICATORS: dict[str, AuthConstructor] = {
     "credential-file": lambda n, s, c: CredentialFileAuthenticator(n, CredentialFileAuthSection(s), c),
     "keyring": lambda n, s, c: KeyringAuthenticator(n, KeyringAuthSection(s)),
     "pass": lambda n, s, c: PassAuthenticator(n, PassAuthSection(s)),
diff --git a/PFERD/auth/authenticator.py b/PFERD/auth/authenticator.py
index 643a2d5..417b7ba 100644
--- a/PFERD/auth/authenticator.py
+++ b/PFERD/auth/authenticator.py
@@ -1,5 +1,4 @@
 from abc import ABC, abstractmethod
-from typing import Tuple
 
 from ..config import Section
 
@@ -35,7 +34,7 @@ class Authenticator(ABC):
         self.name = name
 
     @abstractmethod
-    async def credentials(self) -> Tuple[str, str]:
+    async def credentials(self) -> tuple[str, str]:
         pass
 
     async def username(self) -> str:
diff --git a/PFERD/auth/credential_file.py b/PFERD/auth/credential_file.py
index 94ffa73..cb7834c 100644
--- a/PFERD/auth/credential_file.py
+++ b/PFERD/auth/credential_file.py
@@ -1,5 +1,4 @@
 from pathlib import Path
-from typing import Tuple
 
 from ..config import Config
 from ..utils import fmt_real_path
@@ -23,7 +22,9 @@ class CredentialFileAuthenticator(Authenticator):
             with open(path, encoding="utf-8") as f:
                 lines = list(f)
         except UnicodeDecodeError:
-            raise AuthLoadError(f"Credential file at {fmt_real_path(path)} is not encoded using UTF-8")
+            raise AuthLoadError(
+                f"Credential file at {fmt_real_path(path)} is not encoded using UTF-8"
+            ) from None
         except OSError as e:
             raise AuthLoadError(f"No credential file at {fmt_real_path(path)}") from e
 
@@ -42,5 +43,5 @@ class CredentialFileAuthenticator(Authenticator):
         self._username = uline[9:]
         self._password = pline[9:]
 
-    async def credentials(self) -> Tuple[str, str]:
+    async def credentials(self) -> tuple[str, str]:
         return self._username, self._password
diff --git a/PFERD/auth/keyring.py b/PFERD/auth/keyring.py
index 7ff2673..e69a69e 100644
--- a/PFERD/auth/keyring.py
+++ b/PFERD/auth/keyring.py
@@ -1,4 +1,4 @@
-from typing import Optional, Tuple, cast
+from typing import Optional, cast
 
 import keyring
 
@@ -27,7 +27,7 @@ class KeyringAuthenticator(Authenticator):
         self._password_invalidated = False
         self._username_fixed = section.username() is not None
 
-    async def credentials(self) -> Tuple[str, str]:
+    async def credentials(self) -> tuple[str, str]:
         # Request the username
         if self._username is None:
             async with log.exclusive_output():
diff --git a/PFERD/auth/pass_.py b/PFERD/auth/pass_.py
index 4c8e775..c5d9b24 100644
--- a/PFERD/auth/pass_.py
+++ b/PFERD/auth/pass_.py
@@ -1,6 +1,5 @@
 import re
 import subprocess
-from typing import List, Tuple
 
 from ..logging import log
 from .authenticator import Authenticator, AuthError, AuthSection
@@ -12,11 +11,11 @@ class PassAuthSection(AuthSection):
             self.missing_value("passname")
         return value
 
-    def username_prefixes(self) -> List[str]:
+    def username_prefixes(self) -> list[str]:
         value = self.s.get("username_prefixes", "login,username,user")
         return [prefix.lower() for prefix in value.split(",")]
 
-    def password_prefixes(self) -> List[str]:
+    def password_prefixes(self) -> list[str]:
         value = self.s.get("password_prefixes", "password,pass,secret")
         return [prefix.lower() for prefix in value.split(",")]
 
@@ -31,14 +30,14 @@ class PassAuthenticator(Authenticator):
         self._username_prefixes = section.username_prefixes()
         self._password_prefixes = section.password_prefixes()
 
-    async def credentials(self) -> Tuple[str, str]:
+    async def credentials(self) -> tuple[str, str]:
         log.explain_topic("Obtaining credentials from pass")
 
         try:
             log.explain(f"Calling 'pass show {self._passname}'")
             result = subprocess.check_output(["pass", "show", self._passname], text=True)
         except subprocess.CalledProcessError as e:
-            raise AuthError(f"Failed to get password info from {self._passname}: {e}")
+            raise AuthError(f"Failed to get password info from {self._passname}: {e}") from e
 
         prefixed = {}
         unprefixed = []
diff --git a/PFERD/auth/simple.py b/PFERD/auth/simple.py
index 831c12f..dea4b67 100644
--- a/PFERD/auth/simple.py
+++ b/PFERD/auth/simple.py
@@ -1,4 +1,4 @@
-from typing import Optional, Tuple
+from typing import Optional
 
 from ..logging import log
 from ..utils import agetpass, ainput
@@ -23,7 +23,7 @@ class SimpleAuthenticator(Authenticator):
         self._username_fixed = self.username is not None
         self._password_fixed = self.password is not None
 
-    async def credentials(self) -> Tuple[str, str]:
+    async def credentials(self) -> tuple[str, str]:
         if self._username is not None and self._password is not None:
             return self._username, self._password
 
diff --git a/PFERD/auth/tfa.py b/PFERD/auth/tfa.py
index 26b1383..6ae48fe 100644
--- a/PFERD/auth/tfa.py
+++ b/PFERD/auth/tfa.py
@@ -1,5 +1,3 @@
-from typing import Tuple
-
 from ..logging import log
 from ..utils import ainput
 from .authenticator import Authenticator, AuthError
@@ -17,7 +15,7 @@ class TfaAuthenticator(Authenticator):
             code = await ainput("TFA code: ")
             return code
 
-    async def credentials(self) -> Tuple[str, str]:
+    async def credentials(self) -> tuple[str, str]:
         raise AuthError("TFA authenticator does not support usernames")
 
     def invalidate_username(self) -> None:
diff --git a/PFERD/cli/parser.py b/PFERD/cli/parser.py
index 12bfeac..c9bec13 100644
--- a/PFERD/cli/parser.py
+++ b/PFERD/cli/parser.py
@@ -1,8 +1,9 @@
 import argparse
 import configparser
 from argparse import ArgumentTypeError
+from collections.abc import Callable, Sequence
 from pathlib import Path
-from typing import Any, Callable, List, Optional, Sequence, Union
+from typing import Any, Optional
 
 from ..output_dir import OnConflict, Redownload
 from ..version import NAME, VERSION
@@ -16,7 +17,7 @@ class ParserLoadError(Exception):
 class BooleanOptionalAction(argparse.Action):
     def __init__(
         self,
-        option_strings: List[str],
+        option_strings: list[str],
         dest: Any,
         default: Any = None,
         type: Any = None,
@@ -51,7 +52,7 @@ class BooleanOptionalAction(argparse.Action):
         self,
         parser: argparse.ArgumentParser,
         namespace: argparse.Namespace,
-        values: Union[str, Sequence[Any], None],
+        values: str | Sequence[Any] | None,
         option_string: Optional[str] = None,
     ) -> None:
         if option_string and option_string in self.option_strings:
@@ -72,7 +73,7 @@ def show_value_error(inner: Callable[[str], Any]) -> Callable[[str], Any]:
         try:
             return inner(input)
         except ValueError as e:
-            raise ArgumentTypeError(e)
+            raise ArgumentTypeError(e) from e
 
     return wrapper
 
diff --git a/PFERD/config.py b/PFERD/config.py
index 1a0f017..7da2889 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -3,7 +3,7 @@ import os
 import sys
 from configparser import ConfigParser, SectionProxy
 from pathlib import Path
-from typing import Any, List, NoReturn, Optional, Tuple
+from typing import Any, NoReturn, Optional
 
 from rich.markup import escape
 
@@ -126,13 +126,13 @@ class Config:
             with open(path, encoding="utf-8") as f:
                 parser.read_file(f, source=str(path))
         except FileNotFoundError:
-            raise ConfigLoadError(path, "File does not exist")
+            raise ConfigLoadError(path, "File does not exist") from None
         except IsADirectoryError:
-            raise ConfigLoadError(path, "That's a directory, not a file")
+            raise ConfigLoadError(path, "That's a directory, not a file") from None
         except PermissionError:
-            raise ConfigLoadError(path, "Insufficient permissions")
+            raise ConfigLoadError(path, "Insufficient permissions") from None
         except UnicodeDecodeError:
-            raise ConfigLoadError(path, "File is not encoded using UTF-8")
+            raise ConfigLoadError(path, "File is not encoded using UTF-8") from None
 
     def dump(self, path: Optional[Path] = None) -> None:
         """
@@ -150,8 +150,8 @@ class Config:
 
         try:
             path.parent.mkdir(parents=True, exist_ok=True)
-        except PermissionError:
-            raise ConfigDumpError(path, "Could not create parent directory")
+        except PermissionError as e:
+            raise ConfigDumpError(path, "Could not create parent directory") from e
 
         try:
             # Ensuring we don't accidentally overwrite any existing files by
@@ -167,16 +167,16 @@ class Config:
                     with open(path, "w", encoding="utf-8") as f:
                         self._parser.write(f)
                 else:
-                    raise ConfigDumpError(path, "File already exists")
+                    raise ConfigDumpError(path, "File already exists") from None
         except IsADirectoryError:
-            raise ConfigDumpError(path, "That's a directory, not a file")
-        except PermissionError:
-            raise ConfigDumpError(path, "Insufficient permissions")
+            raise ConfigDumpError(path, "That's a directory, not a file") from None
+        except PermissionError as e:
+            raise ConfigDumpError(path, "Insufficient permissions") from e
 
     def dump_to_stdout(self) -> None:
         self._parser.write(sys.stdout)
 
-    def crawl_sections(self) -> List[Tuple[str, SectionProxy]]:
+    def crawl_sections(self) -> list[tuple[str, SectionProxy]]:
         result = []
         for name, proxy in self._parser.items():
             if name.startswith("crawl:"):
@@ -184,7 +184,7 @@ class Config:
 
         return result
 
-    def auth_sections(self) -> List[Tuple[str, SectionProxy]]:
+    def auth_sections(self) -> list[tuple[str, SectionProxy]]:
         result = []
         for name, proxy in self._parser.items():
             if name.startswith("auth:"):
diff --git a/PFERD/crawl/__init__.py b/PFERD/crawl/__init__.py
index 04a5e3f..6032c97 100644
--- a/PFERD/crawl/__init__.py
+++ b/PFERD/crawl/__init__.py
@@ -1,5 +1,5 @@
+from collections.abc import Callable
 from configparser import SectionProxy
-from typing import Callable, Dict
 
 from ..auth import Authenticator
 from ..config import Config
@@ -13,12 +13,12 @@ CrawlerConstructor = Callable[
         str,  # Name (without the "crawl:" prefix)
         SectionProxy,  # Crawler's section of global config
         Config,  # Global config
-        Dict[str, Authenticator],  # Loaded authenticators by name
+        dict[str, Authenticator],  # Loaded authenticators by name
     ],
     Crawler,
 ]
 
-CRAWLERS: Dict[str, CrawlerConstructor] = {
+CRAWLERS: dict[str, CrawlerConstructor] = {
     "local": lambda n, s, c, a: LocalCrawler(n, LocalCrawlerSection(s), c),
     "ilias-web": lambda n, s, c, a: IliasWebCrawler(n, IliasWebCrawlerSection(s), c, a),
     "kit-ilias-web": lambda n, s, c, a: KitIliasWebCrawler(n, KitIliasWebCrawlerSection(s), c, a),
diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index f1aec5a..e2cdf30 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -1,10 +1,10 @@
 import asyncio
 import os
 from abc import ABC, abstractmethod
-from collections.abc import Awaitable, Coroutine
+from collections.abc import Awaitable, Callable, Coroutine, Sequence
 from datetime import datetime
 from pathlib import Path, PurePath
-from typing import Any, Callable, Dict, List, Optional, Sequence, Set, Tuple, TypeVar
+from typing import Any, Optional, TypeVar
 
 from ..auth import Authenticator
 from ..config import Config, Section
@@ -116,7 +116,7 @@ class CrawlToken(ReusableAsyncContextManager[ProgressBar]):
         return bar
 
 
-class DownloadToken(ReusableAsyncContextManager[Tuple[ProgressBar, FileSink]]):
+class DownloadToken(ReusableAsyncContextManager[tuple[ProgressBar, FileSink]]):
     def __init__(self, limiter: Limiter, fs_token: FileSinkToken, path: PurePath):
         super().__init__()
 
@@ -128,7 +128,7 @@ class DownloadToken(ReusableAsyncContextManager[Tuple[ProgressBar, FileSink]]):
     def path(self) -> PurePath:
         return self._path
 
-    async def _on_aenter(self) -> Tuple[ProgressBar, FileSink]:
+    async def _on_aenter(self) -> tuple[ProgressBar, FileSink]:
         await self._stack.enter_async_context(self._limiter.limit_download())
         sink = await self._stack.enter_async_context(self._fs_token)
         # The "Downloaded ..." message is printed in the output dir, not here
@@ -205,7 +205,7 @@ class CrawlerSection(Section):
         on_windows = os.name == "nt"
         return self.s.getboolean("windows_paths", fallback=on_windows)
 
-    def auth(self, authenticators: Dict[str, Authenticator]) -> Authenticator:
+    def auth(self, authenticators: dict[str, Authenticator]) -> Authenticator:
         value = self.s.get("auth")
         if value is None:
             self.missing_value("auth")
@@ -262,7 +262,7 @@ class Crawler(ABC):
         return self._output_dir
 
     @staticmethod
-    async def gather(awaitables: Sequence[Awaitable[Any]]) -> List[Any]:
+    async def gather(awaitables: Sequence[Awaitable[Any]]) -> list[Any]:
         """
         Similar to asyncio.gather. However, in the case of an exception, all
         still running tasks are cancelled and the exception is rethrown.
@@ -394,7 +394,7 @@ class Crawler(ABC):
             log.warn("Couldn't find or load old report")
             return
 
-        seen: Set[PurePath] = set()
+        seen: set[PurePath] = set()
         for known in sorted(self.prev_report.found_paths):
             looking_at = list(reversed(known.parents)) + [known]
             for path in looking_at:
diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index 572b39d..830f537 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -3,7 +3,7 @@ import http.cookies
 import ssl
 from datetime import datetime
 from pathlib import Path, PurePath
-from typing import Any, Dict, List, Optional, Tuple, cast
+from typing import Any, Optional, cast
 
 import aiohttp
 import certifi
@@ -43,7 +43,7 @@ class HttpCrawler(Crawler):
         self._http_timeout = section.http_timeout()
 
         self._cookie_jar_path = self._output_dir.resolve(self.COOKIE_FILE)
-        self._shared_cookie_jar_paths: Optional[List[Path]] = None
+        self._shared_cookie_jar_paths: Optional[list[Path]] = None
         self._shared_auth = shared_auth
 
         self._output_dir.register_reserved(self.COOKIE_FILE)
@@ -98,7 +98,7 @@ class HttpCrawler(Crawler):
         """
         raise RuntimeError("_authenticate() was called but crawler doesn't provide an implementation")
 
-    def share_cookies(self, shared: Dict[Authenticator, List[Path]]) -> None:
+    def share_cookies(self, shared: dict[Authenticator, list[Path]]) -> None:
         if not self._shared_auth:
             return
 
@@ -219,7 +219,7 @@ class HttpCrawler(Crawler):
         etags[str(path)] = etag
         self._output_dir.report.add_custom_value(ETAGS_CUSTOM_REPORT_VALUE_KEY, etags)
 
-    async def _request_resource_version(self, resource_url: str) -> Tuple[Optional[str], Optional[datetime]]:
+    async def _request_resource_version(self, resource_url: str) -> tuple[Optional[str], Optional[datetime]]:
         """
         Requests the ETag and Last-Modified headers of a resource via a HEAD request.
         If no entity tag / modification date can be obtained, the according value will be None.
diff --git a/PFERD/crawl/ilias/async_helper.py b/PFERD/crawl/ilias/async_helper.py
index 5e586b1..2e6b301 100644
--- a/PFERD/crawl/ilias/async_helper.py
+++ b/PFERD/crawl/ilias/async_helper.py
@@ -1,5 +1,6 @@
 import asyncio
-from typing import Any, Callable, Optional
+from collections.abc import Callable
+from typing import Any, Optional
 
 import aiohttp
 
@@ -15,9 +16,9 @@ def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Calla
                 try:
                     return await f(*args, **kwargs)
                 except aiohttp.ContentTypeError:  # invalid content type
-                    raise CrawlWarning("ILIAS returned an invalid content type")
+                    raise CrawlWarning("ILIAS returned an invalid content type") from None
                 except aiohttp.TooManyRedirects:
-                    raise CrawlWarning("Got stuck in a redirect loop")
+                    raise CrawlWarning("Got stuck in a redirect loop") from None
                 except aiohttp.ClientPayloadError as e:  # encoding or not enough bytes
                     last_exception = e
                 except aiohttp.ClientConnectionError as e:  # e.g. timeout, disconnect, resolve failed, etc.
diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index 814bb7b..37691b2 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -297,9 +297,7 @@ class Links(Enum):
         raise ValueError("Missing switch case")
 
     def collection_as_one(self) -> bool:
-        if self == Links.FANCY:
-            return True
-        return False
+        return self == Links.FANCY
 
     def extension(self) -> Optional[str]:
         if self == Links.FANCY:
@@ -355,4 +353,4 @@ class Links(Enum):
             return Links(string)
         except ValueError:
             options = [f"'{option.value}'" for option in Links]
-            raise ValueError(f"must be one of {', '.join(options)}")
+            raise ValueError(f"must be one of {', '.join(options)}") from None
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index e6929b5..b8212a4 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -4,7 +4,7 @@ import os
 import re
 from collections.abc import Awaitable, Coroutine
 from pathlib import PurePath
-from typing import Any, Dict, List, Literal, Optional, Set, Union, cast
+from typing import Any, Literal, Optional, cast
 from urllib.parse import urljoin
 
 import aiohttp
@@ -33,7 +33,7 @@ from .kit_ilias_html import (
 )
 from .shibboleth_login import ShibbolethLogin
 
-TargetType = Union[str, int]
+TargetType = str | int
 
 
 class LoginTypeLocal:
@@ -49,7 +49,7 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
 
         return base_url
 
-    def login(self) -> Union[Literal["shibboleth"], LoginTypeLocal]:
+    def login(self) -> Literal["shibboleth"] | LoginTypeLocal:
         login_type = self.s.get("login_type")
         if not login_type:
             self.missing_value("login_type")
@@ -63,7 +63,7 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
 
         self.invalid_value("login_type", login_type, "Should be <shibboleth | local>")
 
-    def tfa_auth(self, authenticators: Dict[str, Authenticator]) -> Optional[Authenticator]:
+    def tfa_auth(self, authenticators: dict[str, Authenticator]) -> Optional[Authenticator]:
         value: Optional[str] = self.s.get("tfa_auth")
         if value is None:
             return None
@@ -110,7 +110,7 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
         return self.s.getboolean("forums", fallback=False)
 
 
-_DIRECTORY_PAGES: Set[IliasElementType] = {
+_DIRECTORY_PAGES: set[IliasElementType] = {
     IliasElementType.EXERCISE,
     IliasElementType.EXERCISE_FILES,
     IliasElementType.EXERCISE_OVERVIEW,
@@ -122,7 +122,7 @@ _DIRECTORY_PAGES: Set[IliasElementType] = {
     IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
 }
 
-_VIDEO_ELEMENTS: Set[IliasElementType] = {
+_VIDEO_ELEMENTS: set[IliasElementType] = {
     IliasElementType.MEDIACAST_VIDEO,
     IliasElementType.MEDIACAST_VIDEO_FOLDER,
     IliasElementType.OPENCAST_VIDEO,
@@ -172,7 +172,7 @@ class IliasWebCrawler(HttpCrawler):
         name: str,
         section: IliasWebCrawlerSection,
         config: Config,
-        authenticators: Dict[str, Authenticator],
+        authenticators: dict[str, Authenticator],
     ):
         # Setting a main authenticator for cookie sharing
         auth = section.auth(authenticators)
@@ -201,7 +201,7 @@ instance's greatest bottleneck.
         self._links = section.links()
         self._videos = section.videos()
         self._forums = section.forums()
-        self._visited_urls: Dict[str, PurePath] = dict()
+        self._visited_urls: dict[str, PurePath] = dict()
 
     async def _run(self) -> None:
         if isinstance(self._target, int):
@@ -264,9 +264,9 @@ instance's greatest bottleneck.
         expected_course_id: Optional[int] = None,
         crawl_nested_courses: bool = False,
     ) -> None:
-        elements: List[IliasPageElement] = []
+        elements: list[IliasPageElement] = []
         # A list as variable redefinitions are not propagated to outer scopes
-        description: List[BeautifulSoup] = []
+        description: list[BeautifulSoup] = []
 
         @_iorepeat(3, "crawling folder")
         async def gather_elements() -> None:
@@ -309,7 +309,7 @@ instance's greatest bottleneck.
 
         elements.sort(key=lambda e: e.id())
 
-        tasks: List[Awaitable[None]] = []
+        tasks: list[Awaitable[None]] = []
         for element in elements:
             if handle := await self._handle_ilias_element(cl.path, element, crawl_nested_courses):
                 tasks.append(asyncio.create_task(handle))
@@ -340,15 +340,14 @@ instance's greatest bottleneck.
             )
             return None
 
-        if element.type in _VIDEO_ELEMENTS:
-            if not self._videos:
-                log.status(
-                    "[bold bright_black]",
-                    "Ignored",
-                    fmt_path(element_path),
-                    "[bright_black](enable with option 'videos')",
-                )
-                return None
+        if element.type in _VIDEO_ELEMENTS and not self._videos:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](enable with option 'videos')",
+            )
+            return None
 
         if element.type == IliasElementType.FILE:
             return await self._handle_file(element, element_path)
@@ -522,8 +521,8 @@ instance's greatest bottleneck.
             sink.file.write(rendered.encode("utf-8"))
             sink.done()
 
-    async def _resolve_link_target(self, export_url: str) -> Union[BeautifulSoup, Literal["none"]]:
-        async def impl() -> Optional[Union[BeautifulSoup, Literal["none"]]]:
+    async def _resolve_link_target(self, export_url: str) -> BeautifulSoup | Literal["none"]:
+        async def impl() -> Optional[BeautifulSoup | Literal["none"]]:
             async with self.session.get(export_url, allow_redirects=False) as resp:
                 # No redirect means we were authenticated
                 if hdrs.LOCATION not in resp.headers:
@@ -658,7 +657,7 @@ instance's greatest bottleneck.
 
     def _previous_contained_opencast_videos(
         self, element: IliasPageElement, element_path: PurePath
-    ) -> List[PurePath]:
+    ) -> list[PurePath]:
         if not self.prev_report:
             return []
         custom_value = self.prev_report.get_custom_value(_get_video_cache_key(element))
@@ -714,7 +713,7 @@ instance's greatest bottleneck.
                 add_to_report([str(self._transformer.transform(dl.path))])
                 return
 
-        contained_video_paths: List[str] = []
+        contained_video_paths: list[str] = []
 
         for stream_element in stream_elements:
             video_path = dl.path.parent / stream_element.name
@@ -832,7 +831,7 @@ instance's greatest bottleneck.
 
             elements = parse_ilias_forum_export(soupify(export))
 
-        tasks: List[Awaitable[None]] = []
+        tasks: list[Awaitable[None]] = []
         for thread in elements:
             tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, thread, element.url)))
 
@@ -842,7 +841,7 @@ instance's greatest bottleneck.
     @anoncritical
     @_iorepeat(3, "saving forum thread")
     async def _download_forum_thread(
-        self, parent_path: PurePath, thread: Union[IliasForumThread, IliasPageElement], forum_url: str
+        self, parent_path: PurePath, thread: IliasForumThread | IliasPageElement, forum_url: str
     ) -> None:
         path = parent_path / (_sanitize_path_name(thread.name) + ".html")
         maybe_dl = await self.download(path, mtime=thread.mtime)
@@ -871,7 +870,7 @@ instance's greatest bottleneck.
     @_iorepeat(3, "crawling learning module")
     @anoncritical
     async def _crawl_learning_module(self, element: IliasPageElement, cl: CrawlToken) -> None:
-        elements: List[IliasLearningModulePage] = []
+        elements: list[IliasLearningModulePage] = []
 
         async with cl:
             log.explain_topic(f"Parsing initial HTML page for {fmt_path(cl.path)}")
@@ -891,7 +890,7 @@ instance's greatest bottleneck.
         for index, lm_element in enumerate(elements):
             lm_element.title = f"{index:02}_{lm_element.title}"
 
-        tasks: List[Awaitable[None]] = []
+        tasks: list[Awaitable[None]] = []
         for index, elem in enumerate(elements):
             prev_url = elements[index - 1].title if index > 0 else None
             next_url = elements[index + 1].title if index < len(elements) - 1 else None
@@ -906,10 +905,10 @@ instance's greatest bottleneck.
         self,
         path: PurePath,
         start_url: Optional[str],
-        dir: Union[Literal["left"], Literal["right"]],
+        dir: Literal["left"] | Literal["right"],
         parent_element: IliasPageElement,
-    ) -> List[IliasLearningModulePage]:
-        elements: List[IliasLearningModulePage] = []
+    ) -> list[IliasLearningModulePage]:
+        elements: list[IliasLearningModulePage] = []
 
         if not start_url:
             return elements
@@ -923,10 +922,7 @@ instance's greatest bottleneck.
             page = IliasPage(soup, parent_element)
             if next := page.get_learning_module_data():
                 elements.append(next)
-                if dir == "left":
-                    next_element_url = next.previous_url
-                else:
-                    next_element_url = next.next_url
+                next_element_url = next.previous_url if dir == "left" else next.next_url
             counter += 1
 
         return elements
@@ -950,16 +946,10 @@ instance's greatest bottleneck.
 
         if prev:
             prev_p = self._transformer.transform(parent_path / (_sanitize_path_name(prev) + ".html"))
-            if prev_p:
-                prev = cast(str, os.path.relpath(prev_p, my_path.parent))
-            else:
-                prev = None
+            prev = cast(str, os.path.relpath(prev_p, my_path.parent)) if prev_p else None
         if next:
             next_p = self._transformer.transform(parent_path / (_sanitize_path_name(next) + ".html"))
-            if next_p:
-                next = cast(str, os.path.relpath(next_p, my_path.parent))
-            else:
-                next = None
+            next = cast(str, os.path.relpath(next_p, my_path.parent)) if next_p else None
 
         async with maybe_dl as (bar, sink):
             content = element.content
@@ -973,14 +963,13 @@ instance's greatest bottleneck.
         """
         log.explain_topic("Internalizing images")
         for elem in tag.find_all(recursive=True):
-            if elem.name == "img":
-                if src := elem.attrs.get("src", None):
-                    url = urljoin(self._base_url, cast(str, src))
-                    if not url.startswith(self._base_url):
-                        continue
-                    log.explain(f"Internalizing {url!r}")
-                    img = await self._get_authenticated(url)
-                    elem.attrs["src"] = "data:;base64," + base64.b64encode(img).decode()
+            if elem.name == "img" and (src := elem.attrs.get("src", None)):
+                url = urljoin(self._base_url, cast(str, src))
+                if not url.startswith(self._base_url):
+                    continue
+                log.explain(f"Internalizing {url!r}")
+                img = await self._get_authenticated(url)
+                elem.attrs["src"] = "data:;base64," + base64.b64encode(img).decode()
             if elem.name == "iframe" and cast(str, elem.attrs.get("src", "")).startswith("//"):
                 # For unknown reasons the protocol seems to be stripped.
                 elem.attrs["src"] = "https:" + cast(str, elem.attrs["src"])
@@ -1025,7 +1014,7 @@ instance's greatest bottleneck.
             )
         return soup
 
-    async def _post(self, url: str, data: dict[str, Union[str, List[str]]]) -> bytes:
+    async def _post(self, url: str, data: dict[str, str | list[str]]) -> bytes:
         form_data = aiohttp.FormData()
         for key, val in data.items():
             form_data.add_field(key, val)
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 4abb350..d7f6f8d 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -1,9 +1,10 @@
 import json
 import re
+from collections.abc import Callable
 from dataclasses import dataclass
 from datetime import date, datetime, timedelta
 from enum import Enum
-from typing import Callable, Dict, Optional, Union, cast
+from typing import Optional, cast
 from urllib.parse import urljoin, urlparse
 
 from bs4 import BeautifulSoup, Tag
@@ -13,7 +14,7 @@ from PFERD.crawl.crawler import CrawlWarning
 from PFERD.logging import log
 from PFERD.utils import url_set_query_params
 
-TargetType = Union[str, int]
+TargetType = str | int
 
 
 class TypeMatcher:
@@ -308,7 +309,7 @@ class IliasPageElement:
         """
 
         # This checks whether we can reach a `:` without passing a `-`
-        if re.search(r"^[^-]+: ", meeting_name):
+        if re.search(r"^[^-]+: ", meeting_name):  # noqa: SIM108
             # Meeting name only contains date: "05. Jan 2000:"
             split_delimiter = ":"
         else:
@@ -331,7 +332,7 @@ class IliasPageElement:
 @dataclass
 class IliasDownloadForumData:
     url: str
-    form_data: Dict[str, Union[str, list[str]]]
+    form_data: dict[str, str | list[str]]
     empty: bool
 
 
@@ -433,21 +434,20 @@ class IliasPage:
         for p in paragraphs:
             if p.find_parent(class_=is_interesting_class):
                 continue
-            if "ilc_media_cont_MediaContainer" in p["class"]:
+            if "ilc_media_cont_MediaContainer" in p["class"] and (video := p.select_one("video")):
                 # We have an embedded video which should be downloaded by _find_mob_videos
-                if video := p.select_one("video"):
-                    url, title = self._find_mob_video_url_title(video, p)
-                    raw_html += '<div style="min-width: 100px; min-height: 100px; border: 1px solid black;'
-                    raw_html += "display: flex; justify-content: center; align-items: center;"
-                    raw_html += ' margin: 0.5rem;">'
-                    if url is not None and urlparse(url).hostname != urlparse(self._page_url).hostname:
-                        if url.startswith("//"):
-                            url = "https:" + url
-                        raw_html += f'<a href="{url}" target="_blank">External Video: {title}</a>'
-                    else:
-                        raw_html += f"Video elided. Filename: '{title}'."
-                    raw_html += "</div>\n"
-                    continue
+                url, title = self._find_mob_video_url_title(video, p)
+                raw_html += '<div style="min-width: 100px; min-height: 100px; border: 1px solid black;'
+                raw_html += "display: flex; justify-content: center; align-items: center;"
+                raw_html += ' margin: 0.5rem;">'
+                if url is not None and urlparse(url).hostname != urlparse(self._page_url).hostname:
+                    if url.startswith("//"):
+                        url = "https:" + url
+                    raw_html += f'<a href="{url}" target="_blank">External Video: {title}</a>'
+                else:
+                    raw_html += f"Video elided. Filename: '{title}'."
+                raw_html += "</div>\n"
+                continue
 
             # Ignore special listings (like folder groupings)
             if "ilc_section_Special" in p["class"]:
@@ -794,7 +794,7 @@ class IliasPage:
 
         is_paginated = self._soup.find(id=re.compile(r"tab_page_sel.+")) is not None
 
-        if is_paginated and not self._page_type == IliasElementType.OPENCAST_VIDEO_FOLDER:
+        if is_paginated and self._page_type != IliasElementType.OPENCAST_VIDEO_FOLDER:
             # We are in stage 2 - try to break pagination
             return self._find_opencast_video_entries_paginated()
 
@@ -1164,6 +1164,9 @@ class IliasPage:
         """
         found_titles = []
 
+        if None == "hey":
+            pass
+
         outer_accordion_content: Optional[Tag] = None
 
         parents: list[Tag] = list(tag.parents)
@@ -1302,10 +1305,7 @@ class IliasPage:
                 ),
             )
             caption_container = caption_parent.find_next_sibling("div")
-            if caption_container:
-                description = caption_container.get_text().strip()
-            else:
-                description = None
+            description = caption_container.get_text().strip() if caption_container else None
 
             if not typ:
                 _unexpected_html_warning()
@@ -1444,9 +1444,7 @@ class IliasPage:
             return True
         # The individual video player wrapper page has nothing of the above.
         # Match it by its playerContainer.
-        if soup.select_one("#playerContainer") is not None:
-            return True
-        return False
+        return soup.select_one("#playerContainer") is not None
 
     @staticmethod
     def _find_date_in_text(text: str) -> Optional[datetime]:
@@ -1505,11 +1503,11 @@ def demangle_date(date_str: str, fail_silently: bool = False) -> Optional[dateti
         # Normalize whitespace because users
         date_str = re.sub(r"\s+", " ", date_str)
 
-        date_str = re.sub("Gestern|Yesterday", _format_date_english(_yesterday()), date_str, re.I)
-        date_str = re.sub("Heute|Today", _format_date_english(date.today()), date_str, re.I)
-        date_str = re.sub("Morgen|Tomorrow", _format_date_english(_tomorrow()), date_str, re.I)
+        date_str = re.sub("Gestern|Yesterday", _format_date_english(_yesterday()), date_str, flags=re.I)
+        date_str = re.sub("Heute|Today", _format_date_english(date.today()), date_str, flags=re.I)
+        date_str = re.sub("Morgen|Tomorrow", _format_date_english(_tomorrow()), date_str, flags=re.I)
         date_str = date_str.strip()
-        for german, english in zip(german_months, english_months):
+        for german, english in zip(german_months, english_months, strict=True):
             date_str = date_str.replace(german, english)
             # Remove trailing dots for abbreviations, e.g. "20. Apr. 2020" -> "20. Apr 2020"
             date_str = date_str.replace(english + ".", english)
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index fc1d58f..5088e01 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -1,4 +1,4 @@
-from typing import Dict, Literal
+from typing import Literal
 
 from ...auth import Authenticator
 from ...config import Config
@@ -26,7 +26,7 @@ class KitIliasWebCrawler(IliasWebCrawler):
         name: str,
         section: KitIliasWebCrawlerSection,
         config: Config,
-        authenticators: Dict[str, Authenticator],
+        authenticators: dict[str, Authenticator],
     ):
         super().__init__(name, section, config, authenticators)
 
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index f47c969..165a661 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -1,9 +1,11 @@
 import os
 import re
+from collections.abc import Awaitable, Generator, Iterable
 from dataclasses import dataclass
 from datetime import datetime
 from pathlib import PurePath
-from typing import Any, Awaitable, Generator, Iterable, List, Optional, Pattern, Tuple, Union, cast
+from re import Pattern
+from typing import Any, Optional, Union, cast
 from urllib.parse import urljoin
 
 from bs4 import BeautifulSoup, Tag
@@ -44,7 +46,7 @@ class KitIpdFile:
 @dataclass
 class KitIpdFolder:
     name: str
-    entries: List[Union[KitIpdFile, "KitIpdFolder"]]
+    entries: list[Union[KitIpdFile, "KitIpdFolder"]]
 
     def explain(self) -> None:
         log.explain_topic(f"Folder {self.name!r}")
@@ -68,7 +70,7 @@ class KitIpdCrawler(HttpCrawler):
         if not maybe_cl:
             return
 
-        tasks: List[Awaitable[None]] = []
+        tasks: list[Awaitable[None]] = []
 
         async with maybe_cl:
             for item in await self._fetch_items():
@@ -120,9 +122,9 @@ class KitIpdCrawler(HttpCrawler):
         async with maybe_dl as (bar, sink):
             await self._stream_from_url(file.url, element_path, sink, bar)
 
-    async def _fetch_items(self) -> Iterable[Union[KitIpdFile, KitIpdFolder]]:
+    async def _fetch_items(self) -> Iterable[KitIpdFile | KitIpdFolder]:
         page, url = await self.get_page()
-        elements: List[Tag] = self._find_file_links(page)
+        elements: list[Tag] = self._find_file_links(page)
 
         # do not add unnecessary nesting for a single <h1> heading
         drop_h1: bool = len(page.find_all(name="h1")) <= 1
@@ -151,7 +153,7 @@ class KitIpdCrawler(HttpCrawler):
         name = os.path.basename(url)
         return KitIpdFile(name, url)
 
-    def _find_file_links(self, tag: Union[Tag, BeautifulSoup]) -> list[Tag]:
+    def _find_file_links(self, tag: Tag | BeautifulSoup) -> list[Tag]:
         return cast(list[Tag], tag.find_all(name="a", attrs={"href": self._file_regex}))
 
     def _abs_url_from_link(self, url: str, link_tag: Tag) -> str:
@@ -172,7 +174,7 @@ class KitIpdCrawler(HttpCrawler):
 
             self._add_etag_to_report(path, resp.headers.get("ETag"))
 
-    async def get_page(self) -> Tuple[BeautifulSoup, str]:
+    async def get_page(self) -> tuple[BeautifulSoup, str]:
         async with self.session.get(self._url) as request:
             # The web page for Algorithmen für Routenplanung contains some
             # weird comments that beautifulsoup doesn't parse correctly. This
diff --git a/PFERD/deduplicator.py b/PFERD/deduplicator.py
index c204726..18940c5 100644
--- a/PFERD/deduplicator.py
+++ b/PFERD/deduplicator.py
@@ -1,5 +1,5 @@
+from collections.abc import Iterator
 from pathlib import PurePath
-from typing import Iterator, Set
 
 from .logging import log
 from .utils import fmt_path
@@ -43,7 +43,7 @@ class Deduplicator:
     def __init__(self, windows_paths: bool) -> None:
         self._windows_paths = windows_paths
 
-        self._known: Set[PurePath] = set()
+        self._known: set[PurePath] = set()
 
     def _add(self, path: PurePath) -> None:
         self._known.add(path)
diff --git a/PFERD/limiter.py b/PFERD/limiter.py
index 49de0ed..01b4914 100644
--- a/PFERD/limiter.py
+++ b/PFERD/limiter.py
@@ -1,8 +1,9 @@
 import asyncio
 import time
+from collections.abc import AsyncIterator
 from contextlib import asynccontextmanager
 from dataclasses import dataclass
-from typing import AsyncIterator, Optional
+from typing import Optional
 
 
 @dataclass
diff --git a/PFERD/logging.py b/PFERD/logging.py
index e371494..a810aa9 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -1,8 +1,9 @@
 import asyncio
 import sys
 import traceback
+from collections.abc import AsyncIterator, Iterator
 from contextlib import AbstractContextManager, asynccontextmanager, contextmanager
-from typing import AsyncIterator, Iterator, List, Optional
+from typing import Optional
 
 from rich.console import Console, Group
 from rich.live import Live
@@ -60,7 +61,7 @@ class Log:
         self._showing_progress = False
         self._progress_suspended = False
         self._lock = asyncio.Lock()
-        self._lines: List[str] = []
+        self._lines: list[str] = []
 
         # Whether different parts of the output are enabled or disabled
         self.output_explain = False
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index c452c0f..159e1db 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -4,12 +4,13 @@ import os
 import random
 import shutil
 import string
-from contextlib import contextmanager
+from collections.abc import Iterator
+from contextlib import contextmanager, suppress
 from dataclasses import dataclass
 from datetime import datetime
 from enum import Enum
 from pathlib import Path, PurePath
-from typing import BinaryIO, Iterator, Optional, Tuple
+from typing import BinaryIO, Optional
 
 from .logging import log
 from .report import Report, ReportLoadError
@@ -35,7 +36,7 @@ class Redownload(Enum):
         try:
             return Redownload(string)
         except ValueError:
-            raise ValueError("must be one of 'never', 'never-smart', 'always', 'always-smart'")
+            raise ValueError("must be one of 'never', 'never-smart', 'always', 'always-smart'") from None
 
 
 class OnConflict(Enum):
@@ -53,7 +54,7 @@ class OnConflict(Enum):
             raise ValueError(
                 "must be one of 'prompt', 'local-first',"
                 " 'remote-first', 'no-delete', 'no-delete-prompt-overwrite'"
-            )
+            ) from None
 
 
 @dataclass
@@ -177,8 +178,8 @@ class OutputDirectory:
 
         try:
             self._root.mkdir(parents=True, exist_ok=True)
-        except OSError:
-            raise OutputDirError("Failed to create base directory")
+        except OSError as e:
+            raise OutputDirError("Failed to create base directory") from e
 
     def register_reserved(self, path: PurePath) -> None:
         self._report.mark_reserved(path)
@@ -358,7 +359,7 @@ class OutputDirectory:
     async def _create_tmp_file(
         self,
         local_path: Path,
-    ) -> Tuple[Path, BinaryIO]:
+    ) -> tuple[Path, BinaryIO]:
         """
         May raise an OutputDirError.
         """
@@ -509,10 +510,8 @@ class OutputDirectory:
             await self._cleanup(child, pure_child)
 
         if delete_self:
-            try:
+            with suppress(OSError):
                 path.rmdir()
-            except OSError:
-                pass
 
     async def _cleanup_file(self, path: Path, pure: PurePath) -> None:
         if self._report.is_marked(pure):
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index c805c13..1fe37d0 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -1,5 +1,5 @@
 from pathlib import Path, PurePath
-from typing import Dict, List, Optional
+from typing import Optional
 
 from rich.markup import escape
 
@@ -15,7 +15,7 @@ class PferdLoadError(Exception):
 
 
 class Pferd:
-    def __init__(self, config: Config, cli_crawlers: Optional[List[str]], cli_skips: Optional[List[str]]):
+    def __init__(self, config: Config, cli_crawlers: Optional[list[str]], cli_skips: Optional[list[str]]):
         """
         May throw PferdLoadError.
         """
@@ -23,10 +23,10 @@ class Pferd:
         self._config = config
         self._crawlers_to_run = self._find_crawlers_to_run(config, cli_crawlers, cli_skips)
 
-        self._authenticators: Dict[str, Authenticator] = {}
-        self._crawlers: Dict[str, Crawler] = {}
+        self._authenticators: dict[str, Authenticator] = {}
+        self._crawlers: dict[str, Crawler] = {}
 
-    def _find_config_crawlers(self, config: Config) -> List[str]:
+    def _find_config_crawlers(self, config: Config) -> list[str]:
         crawl_sections = []
 
         for name, section in config.crawl_sections():
@@ -37,7 +37,7 @@ class Pferd:
 
         return crawl_sections
 
-    def _find_cli_crawlers(self, config: Config, cli_crawlers: List[str]) -> List[str]:
+    def _find_cli_crawlers(self, config: Config, cli_crawlers: list[str]) -> list[str]:
         if len(cli_crawlers) != len(set(cli_crawlers)):
             raise PferdLoadError("Some crawlers were selected multiple times")
 
@@ -68,12 +68,12 @@ class Pferd:
     def _find_crawlers_to_run(
         self,
         config: Config,
-        cli_crawlers: Optional[List[str]],
-        cli_skips: Optional[List[str]],
-    ) -> List[str]:
+        cli_crawlers: Optional[list[str]],
+        cli_skips: Optional[list[str]],
+    ) -> list[str]:
         log.explain_topic("Deciding which crawlers to run")
 
-        crawlers: List[str]
+        crawlers: list[str]
         if cli_crawlers is None:
             log.explain("No crawlers specified on CLI")
             log.explain("Running crawlers specified in config")
@@ -104,7 +104,7 @@ class Pferd:
 
     def _load_crawlers(self) -> None:
         # Cookie sharing
-        kit_ilias_web_paths: Dict[Authenticator, List[Path]] = {}
+        kit_ilias_web_paths: dict[Authenticator, list[Path]] = {}
 
         for name, section in self._config.crawl_sections():
             log.print(f"[bold bright_cyan]Loading[/] {escape(name)}")
@@ -117,9 +117,8 @@ class Pferd:
             crawler = crawler_constructor(name, section, self._config, self._authenticators)
             self._crawlers[name] = crawler
 
-            if self._config.default_section.share_cookies():
-                if isinstance(crawler, KitIliasWebCrawler):
-                    crawler.share_cookies(kit_ilias_web_paths)
+            if self._config.default_section.share_cookies() and isinstance(crawler, KitIliasWebCrawler):
+                crawler.share_cookies(kit_ilias_web_paths)
 
     def debug_transforms(self) -> None:
         for name in self._crawlers_to_run:
diff --git a/PFERD/report.py b/PFERD/report.py
index 72e2727..5b37c1c 100644
--- a/PFERD/report.py
+++ b/PFERD/report.py
@@ -1,6 +1,6 @@
 import json
 from pathlib import Path, PurePath
-from typing import Any, Dict, List, Optional, Set
+from typing import Any, Optional
 
 
 class ReportLoadError(Exception):
@@ -42,32 +42,32 @@ class Report:
 
     def __init__(self) -> None:
         # Paths found by the crawler, untransformed
-        self.found_paths: Set[PurePath] = set()
+        self.found_paths: set[PurePath] = set()
 
         # Files reserved for metadata files (e. g. the report file or cookies)
         # that can't be overwritten by user transforms and won't be cleaned up
         # at the end.
-        self.reserved_files: Set[PurePath] = set()
+        self.reserved_files: set[PurePath] = set()
 
         # Files found by the crawler, transformed. Only includes files that
         # were downloaded (or a download was attempted)
-        self.known_files: Set[PurePath] = set()
+        self.known_files: set[PurePath] = set()
 
-        self.added_files: Set[PurePath] = set()
-        self.changed_files: Set[PurePath] = set()
-        self.deleted_files: Set[PurePath] = set()
+        self.added_files: set[PurePath] = set()
+        self.changed_files: set[PurePath] = set()
+        self.deleted_files: set[PurePath] = set()
         # Files that should have been deleted by the cleanup but weren't
-        self.not_deleted_files: Set[PurePath] = set()
+        self.not_deleted_files: set[PurePath] = set()
 
         # Custom crawler-specific data
-        self.custom: Dict[str, Any] = dict()
+        self.custom: dict[str, Any] = dict()
 
         # Encountered errors and warnings
-        self.encountered_warnings: List[str] = []
-        self.encountered_errors: List[str] = []
+        self.encountered_warnings: list[str] = []
+        self.encountered_errors: list[str] = []
 
     @staticmethod
-    def _get_list_of_strs(data: Dict[str, Any], key: str) -> List[str]:
+    def _get_list_of_strs(data: dict[str, Any], key: str) -> list[str]:
         result: Any = data.get(key, [])
 
         if not isinstance(result, list):
@@ -80,8 +80,8 @@ class Report:
         return result
 
     @staticmethod
-    def _get_str_dictionary(data: Dict[str, Any], key: str) -> Dict[str, Any]:
-        result: Dict[str, Any] = data.get(key, {})
+    def _get_str_dictionary(data: dict[str, Any], key: str) -> dict[str, Any]:
+        result: dict[str, Any] = data.get(key, {})
 
         if not isinstance(result, dict):
             raise ReportLoadError(f"Incorrect format: {key!r} is not a dictionary")
@@ -170,7 +170,7 @@ class Report:
         self.known_files.add(path)
 
     @property
-    def marked(self) -> Set[PurePath]:
+    def marked(self) -> set[PurePath]:
         return self.known_files | self.reserved_files
 
     def is_marked(self, path: PurePath) -> bool:
diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index 96b5ca7..2cfb28d 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -1,10 +1,12 @@
 import ast
+import contextlib
 import re
 from abc import ABC, abstractmethod
+from collections.abc import Callable, Sequence
 from dataclasses import dataclass
 from enum import Enum
 from pathlib import PurePath
-from typing import Callable, Dict, List, Optional, Sequence, TypeVar, Union
+from typing import Optional, TypeVar
 
 from .logging import log
 from .utils import fmt_path, str_path
@@ -23,7 +25,7 @@ class Empty:
     pass
 
 
-RightSide = Union[str, Ignore, Empty]
+RightSide = str | Ignore | Empty
 
 
 @dataclass
@@ -35,7 +37,7 @@ class Ignored:
     pass
 
 
-TransformResult = Optional[Union[Transformed, Ignored]]
+TransformResult = Transformed | Ignored | None
 
 
 @dataclass
@@ -47,7 +49,7 @@ class Rule:
     right: RightSide
     right_index: int
 
-    def right_result(self, path: PurePath) -> Union[str, Transformed, Ignored]:
+    def right_result(self, path: PurePath) -> str | Transformed | Ignored:
         if isinstance(self.right, str):
             return self.right
         elif isinstance(self.right, Ignore):
@@ -93,24 +95,20 @@ class ExactReTf(Transformation):
         # since elements of "match.groups()" can be None, mypy is wrong.
         groups: Sequence[Optional[str]] = [match[0]] + list(match.groups())
 
-        locals_dir: Dict[str, Union[str, int, float]] = {}
+        locals_dir: dict[str, str | int | float] = {}
         for i, group in enumerate(groups):
             if group is None:
                 continue
 
             locals_dir[f"g{i}"] = group
 
-            try:
+            with contextlib.suppress(ValueError):
                 locals_dir[f"i{i}"] = int(group)
-            except ValueError:
-                pass
 
-            try:
+            with contextlib.suppress(ValueError):
                 locals_dir[f"f{i}"] = float(group)
-            except ValueError:
-                pass
 
-        named_groups: Dict[str, str] = match.groupdict()
+        named_groups: dict[str, str] = match.groupdict()
         for name, capture in named_groups.items():
             locals_dir[name] = capture
 
@@ -228,7 +226,7 @@ class Line:
         self.expect(string)
         return value
 
-    def one_of(self, parsers: List[Callable[[], T]], description: str) -> T:
+    def one_of(self, parsers: list[Callable[[], T]], description: str) -> T:
         for parser in parsers:
             index = self.index
             try:
@@ -315,7 +313,7 @@ def parse_left(line: Line) -> str:
         return parse_str(line)
 
 
-def parse_right(line: Line) -> Union[str, Ignore]:
+def parse_right(line: Line) -> str | Ignore:
     c = line.peek()
     if c in QUOTATION_MARKS:
         return parse_quoted_str(line)
diff --git a/PFERD/utils.py b/PFERD/utils.py
index acd282e..2d01713 100644
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -3,10 +3,11 @@ import getpass
 import sys
 import threading
 from abc import ABC, abstractmethod
+from collections.abc import Callable
 from contextlib import AsyncExitStack
 from pathlib import Path, PurePath
 from types import TracebackType
-from typing import Any, Callable, Dict, Generic, Optional, Type, TypeVar
+from typing import Any, Generic, Optional, TypeVar
 from urllib.parse import parse_qs, urlencode, urlsplit, urlunsplit
 
 import bs4
@@ -79,7 +80,7 @@ def url_set_query_param(url: str, param: str, value: str) -> str:
     return urlunsplit((scheme, netloc, path, new_query_string, fragment))
 
 
-def url_set_query_params(url: str, params: Dict[str, str]) -> str:
+def url_set_query_params(url: str, params: dict[str, str]) -> str:
     """
     Sets multiple query parameters in an url, overwriting existing ones.
     """
@@ -132,7 +133,7 @@ class ReusableAsyncContextManager(ABC, Generic[T]):
 
     async def __aexit__(
         self,
-        exc_type: Optional[Type[BaseException]],
+        exc_type: Optional[type[BaseException]],
         exc_value: Optional[BaseException],
         traceback: Optional[TracebackType],
     ) -> Optional[bool]:

From 5646e933fdb66d0de531d0f0aa725b977ac13294 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 19 Oct 2025 15:46:04 +0200
Subject: [PATCH 515/524] Ignore reformat in git blame

---
 .git-blame-ignore-revs | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 .git-blame-ignore-revs

diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
new file mode 100644
index 0000000..27246bf
--- /dev/null
+++ b/.git-blame-ignore-revs
@@ -0,0 +1 @@
+2cf0e060ed126537dd993896b6aa793e2a6b9e80

From ebcfb2a2f360c1c265b78bd7562a4ab6fa6a40ad Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 19 Oct 2025 15:59:08 +0200
Subject: [PATCH 516/524] Fix some typing errors

It seems like the type hints have gotten better :)
---
 PFERD/auth/keyring.py                   |  4 +-
 PFERD/crawl/http_crawler.py             |  4 +-
 PFERD/crawl/ilias/file_templates.py     |  8 +--
 PFERD/crawl/ilias/ilias_html_cleaner.py |  2 +-
 PFERD/crawl/ilias/ilias_web_crawler.py  | 10 ++--
 PFERD/crawl/ilias/kit_ilias_html.py     | 73 +++++++++----------------
 PFERD/crawl/ilias/shibboleth_login.py   |  6 +-
 PFERD/logging.py                        |  4 +-
 PFERD/pferd.py                          |  5 +-
 PFERD/utils.py                          |  6 +-
 10 files changed, 51 insertions(+), 71 deletions(-)

diff --git a/PFERD/auth/keyring.py b/PFERD/auth/keyring.py
index e69a69e..414640a 100644
--- a/PFERD/auth/keyring.py
+++ b/PFERD/auth/keyring.py
@@ -1,4 +1,4 @@
-from typing import Optional, cast
+from typing import Optional
 
 import keyring
 
@@ -13,7 +13,7 @@ class KeyringAuthSection(AuthSection):
         return self.s.get("username")
 
     def keyring_name(self) -> str:
-        return cast(str, self.s.get("keyring_name", fallback=NAME))
+        return self.s.get("keyring_name", fallback=NAME)
 
 
 class KeyringAuthenticator(Authenticator):
diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index 830f537..70ec5c1 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -3,7 +3,7 @@ import http.cookies
 import ssl
 from datetime import datetime
 from pathlib import Path, PurePath
-from typing import Any, Optional, cast
+from typing import Any, Optional
 
 import aiohttp
 import certifi
@@ -187,7 +187,7 @@ class HttpCrawler(Crawler):
             if level == 0 or (level == 1 and drop_h1):
                 return PurePath()
 
-            level_heading = cast(Optional[Tag], tag.find_previous(name=f"h{level}"))
+            level_heading = tag.find_previous(name=f"h{level}")
 
             if level_heading is None:
                 return find_associated_headings(tag, level - 1)
diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index 37691b2..c832977 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -258,17 +258,17 @@ def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next
             soupify(nav_template.replace("{{left}}", left).replace("{{right}}", right).encode())
         )
 
-    body_str = cast(str, body.prettify())
+    body_str = body.prettify()
     return _learning_module_template.replace("{{body}}", body_str).replace("{{name}}", name)
 
 
 def forum_thread_template(name: str, url: str, heading: bs4.Tag, content: bs4.Tag) -> str:
-    if title := cast(Optional[bs4.Tag], heading.find(name="b")):
+    if title := heading.find(name="b"):
         title.wrap(bs4.Tag(name="a", attrs={"href": url}))
     return (
         _forum_thread_template.replace("{{name}}", name)
-        .replace("{{heading}}", cast(str, heading.prettify()))
-        .replace("{{content}}", cast(str, content.prettify()))
+        .replace("{{heading}}", heading.prettify())
+        .replace("{{content}}", content.prettify())
     )
 
 
diff --git a/PFERD/crawl/ilias/ilias_html_cleaner.py b/PFERD/crawl/ilias/ilias_html_cleaner.py
index 958860a..35a7ea0 100644
--- a/PFERD/crawl/ilias/ilias_html_cleaner.py
+++ b/PFERD/crawl/ilias/ilias_html_cleaner.py
@@ -86,7 +86,7 @@ def clean(soup: BeautifulSoup) -> BeautifulSoup:
 
     for block in cast(list[Tag], soup.find_all(class_="ilc_va_ihcap_VAccordIHeadCap")):
         block.name = "h3"
-        block["class"] += ["accordion-head"]
+        block["class"] += ["accordion-head"]  # type: ignore
 
     for dummy in soup.select(".ilc_text_block_Standard.ilc_Paragraph"):
         children = list(dummy.children)
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index b8212a4..12d8700 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -548,7 +548,7 @@ instance's greatest bottleneck.
 
     @staticmethod
     def _parse_link_content(element: IliasPageElement, content: BeautifulSoup) -> list[LinkData]:
-        links = cast(list[Tag], list(content.select("a")))
+        links = list(content.select("a"))
         if len(links) == 1:
             url = str(links[0].get("href")).strip()
             return [LinkData(name=element.name, description=element.description or "", url=url)]
@@ -598,7 +598,7 @@ instance's greatest bottleneck.
         async with dl as (_bar, sink):
             description = clean(insert_base_markup(description))
             description_tag = await self.internalize_images(description)
-            sink.file.write(cast(str, description_tag.prettify()).encode("utf-8"))
+            sink.file.write(description_tag.prettify().encode("utf-8"))
             sink.done()
 
     @anoncritical
@@ -946,10 +946,10 @@ instance's greatest bottleneck.
 
         if prev:
             prev_p = self._transformer.transform(parent_path / (_sanitize_path_name(prev) + ".html"))
-            prev = cast(str, os.path.relpath(prev_p, my_path.parent)) if prev_p else None
+            prev = os.path.relpath(prev_p, my_path.parent) if prev_p else None
         if next:
             next_p = self._transformer.transform(parent_path / (_sanitize_path_name(next) + ".html"))
-            next = cast(str, os.path.relpath(next_p, my_path.parent)) if next_p else None
+            next = os.path.relpath(next_p, my_path.parent) if next_p else None
 
         async with maybe_dl as (bar, sink):
             content = element.content
@@ -1052,7 +1052,7 @@ instance's greatest bottleneck.
             async with self.session.get(urljoin(self._base_url, "/login.php"), params=params) as request:
                 login_page = soupify(await request.read())
 
-            login_form = cast(Optional[Tag], login_page.find("form", attrs={"name": "login_form"}))
+            login_form = login_page.find("form", attrs={"name": "login_form"})
             if login_form is None:
                 raise CrawlError("Could not find the login form! Specified client id might be invalid.")
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index d7f6f8d..db965b0 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -401,11 +401,8 @@ class IliasPage:
         return self._find_normal_entries()
 
     def get_info_tab(self) -> Optional[IliasPageElement]:
-        tab: Optional[Tag] = cast(
-            Optional[Tag],
-            self._soup.find(
-                name="a", attrs={"href": lambda x: x is not None and "cmdClass=ilinfoscreengui" in x}
-            ),
+        tab: Optional[Tag] = self._soup.find(
+            name="a", attrs={"href": lambda x: x is not None and "cmdClass=ilinfoscreengui" in x}
         )
         if tab is not None:
             return IliasPageElement.create_new(
@@ -496,10 +493,7 @@ class IliasPage:
         base_url = re.sub(r"cmd=\w+", "cmd=post", base_url)
         base_url = re.sub(r"cmdClass=\w+", "cmdClass=ilExportGUI", base_url)
 
-        rtoken_form = cast(
-            Optional[Tag],
-            self._soup.find("form", attrs={"action": lambda x: x is not None and "rtoken=" in x}),
-        )
+        rtoken_form = self._soup.find("form", attrs={"action": lambda x: x is not None and "rtoken=" in x})
         if not rtoken_form:
             log.explain("Found no rtoken anywhere")
             return None
@@ -579,14 +573,9 @@ class IliasPage:
         return self._uncollapse_future_meetings_url() is not None
 
     def _uncollapse_future_meetings_url(self) -> Optional[IliasPageElement]:
-        element = cast(
-            Optional[Tag],
-            self._soup.find(
-                "a",
-                attrs={
-                    "href": lambda x: x is not None and ("crs_next_sess=1" in x or "crs_prev_sess=1" in x)
-                },
-            ),
+        element = self._soup.find(
+            "a",
+            attrs={"href": lambda x: x is not None and ("crs_next_sess=1" in x or "crs_prev_sess=1" in x)},
         )
         if not element:
             return None
@@ -614,16 +603,13 @@ class IliasPage:
         return "baseClass=ilmembershipoverviewgui" in self._page_url
 
     def _select_content_page_url(self) -> Optional[IliasPageElement]:
-        tab = cast(
-            Optional[Tag],
-            self._soup.find(
-                id="tab_view_content", attrs={"class": lambda x: x is not None and "active" not in x}
-            ),
+        tab = self._soup.find(
+            id="tab_view_content", attrs={"class": lambda x: x is not None and "active" not in x}
         )
         # Already selected (or not found)
         if not tab:
             return None
-        link = cast(Optional[Tag], tab.find("a"))
+        link = tab.find("a")
         if link:
             link_str = self._abs_url_from_link(link)
             return IliasPageElement.create_new(IliasElementType.FOLDER, link_str, "select content page")
@@ -670,11 +656,8 @@ class IliasPage:
     def _get_show_max_forum_entries_per_page_url(
         self, wanted_max: Optional[int] = None
     ) -> Optional[IliasPageElement]:
-        correct_link = cast(
-            Optional[Tag],
-            self._soup.find(
-                "a", attrs={"href": lambda x: x is not None and "trows=800" in x and "cmd=showThreads" in x}
-            ),
+        correct_link = self._soup.find(
+            "a", attrs={"href": lambda x: x is not None and "trows=800" in x and "cmd=showThreads" in x}
         )
 
         if not correct_link:
@@ -706,7 +689,7 @@ class IliasPage:
 
         titles: list[Tag] = self._soup.select("#block_pditems_0 .il-item-title")
         for title in titles:
-            link = cast(Optional[Tag], title.find("a"))
+            link = title.find("a")
 
             if not link:
                 log.explain(f"Skipping offline item: {title.get_text().strip()!r}")
@@ -720,7 +703,7 @@ class IliasPage:
                 continue
 
             typ = IliasPage._find_type_for_element(
-                name, url, lambda: IliasPage._find_icon_for_folder_entry(link)
+                name, url, lambda: IliasPage._find_icon_for_folder_entry(cast(Tag, link))
             )
             if not typ:
                 _unexpected_html_warning()
@@ -776,9 +759,7 @@ class IliasPage:
         #
         # We need to figure out where we are.
 
-        video_element_table = cast(
-            Optional[Tag], self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
-        )
+        video_element_table = self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
 
         if video_element_table is None:
             # We are in stage 1
@@ -801,7 +782,7 @@ class IliasPage:
         return self._find_opencast_video_entries_no_paging()
 
     def _find_opencast_video_entries_paginated(self) -> list[IliasPageElement]:
-        table_element = cast(Optional[Tag], self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+")))
+        table_element = self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
 
         if table_element is None:
             log.warn("Couldn't increase elements per page (table not found). I might miss elements.")
@@ -841,12 +822,10 @@ class IliasPage:
         # 6th or 7th child (1 indexed) is the modification time string. Try to find it
         # by parsing backwards from the end and finding something that looks like a date
         modification_time = None
-        row: Tag = link.parent.parent.parent
+        row: Tag = link.parent.parent.parent  # type: ignore
         column_count = len(row.select("td.std"))
         for index in range(column_count, 0, -1):
-            modification_string = (
-                link.parent.parent.parent.select_one(f"td.std:nth-child({index})").get_text().strip()
-            )
+            modification_string = cast(Tag, row.select_one(f"td.std:nth-child({index})")).get_text().strip()
             if match := re.search(r"\d+\.\d+.\d+ \d+:\d+", modification_string):
                 modification_time = datetime.strptime(match.group(0), "%d.%m.%Y %H:%M")
                 break
@@ -855,7 +834,7 @@ class IliasPage:
             log.warn(f"Could not determine upload time for {link}")
             modification_time = datetime.now()
 
-        title = link.parent.parent.parent.select_one("td.std:nth-child(3)").get_text().strip()
+        title = cast(Tag, row.select_one("td.std:nth-child(3)")).get_text().strip()
         title += ".mp4"
 
         video_name: str = _sanitize_path_name(title)
@@ -883,7 +862,7 @@ class IliasPage:
     def _find_exercise_entries_detail_page(self) -> list[IliasPageElement]:
         results: list[IliasPageElement] = []
 
-        if link := cast(Optional[Tag], self._soup.select_one("#tab_submission > a")):
+        if link := self._soup.select_one("#tab_submission > a"):
             results.append(
                 IliasPageElement.create_new(
                     IliasElementType.EXERCISE_FILES, self._abs_url_from_link(link), "Submission"
@@ -907,7 +886,7 @@ class IliasPage:
             parent_row: Tag = cast(
                 Tag, link.find_parent(attrs={"class": lambda x: x is not None and "row" in x})
             )
-            name_tag = cast(Optional[Tag], parent_row.find(name="div"))
+            name_tag = parent_row.find(name="div")
 
             if not name_tag:
                 log.warn("Could not find name tag for exercise entry")
@@ -961,7 +940,7 @@ class IliasPage:
     def _find_exercise_entries_root_page(self) -> list[IliasPageElement]:
         results: list[IliasPageElement] = []
 
-        content_tab = cast(Optional[Tag], self._soup.find(id="ilContentContainer"))
+        content_tab = self._soup.find(id="ilContentContainer")
         if not content_tab:
             log.warn("Could not find content tab in exercise overview page")
             _unexpected_html_warning()
@@ -1118,7 +1097,7 @@ class IliasPage:
         if url is None and video_element.get("src"):
             url = cast(Optional[str], video_element.get("src"))
 
-        fig_caption = cast(Optional[Tag], figure.select_one("figcaption"))
+        fig_caption = figure.select_one("figcaption")
         if fig_caption:
             title = cast(Tag, figure.select_one("figcaption")).get_text().strip() + ".mp4"
         elif url is not None:
@@ -1146,7 +1125,7 @@ class IliasPage:
 
             # We should not crawl files under meetings
             if "ilContainerListItemContentCB" in cast(str, parent.get("class")):
-                link: Tag = parent.parent.find("a")
+                link: Tag = cast(Tag, cast(Tag, parent.parent).find("a"))
                 typ = IliasPage._find_type_for_element(
                     "meeting",
                     self._abs_url_from_link(link),
@@ -1179,7 +1158,7 @@ class IliasPage:
 
             # This is for these weird JS-y blocks and custom item groups
             if "ilContainerItemsContainer" in cast(str, parent.get("class")):
-                data_store_url = parent.parent.get("data-store-url", "").lower()
+                data_store_url = cast(str, cast(Tag, parent.parent).get("data-store-url", "")).lower()
                 is_custom_item_group = (
                     "baseclass=ilcontainerblockpropertiesstoragegui" in data_store_url
                     and "cont_block_id=" in data_store_url
@@ -1417,7 +1396,7 @@ class IliasPage:
     def is_logged_in(ilias_soup: IliasSoup) -> bool:
         soup = ilias_soup.soup
         # Normal ILIAS pages
-        mainbar = cast(Optional[Tag], soup.find(class_="il-maincontrols-metabar"))
+        mainbar = soup.find(class_="il-maincontrols-metabar")
         if mainbar is not None:
             login_button = mainbar.find(attrs={"href": lambda x: x is not None and "login.php" in x})
             shib_login = soup.find(id="button_shib_login")
@@ -1561,7 +1540,7 @@ def parse_ilias_forum_export(forum_export: BeautifulSoup) -> list[IliasForumThre
     elements = []
     for p in forum_export.select("body > p"):
         title_tag = p
-        content_tag = cast(Optional[Tag], p.find_next_sibling("ul"))
+        content_tag = p.find_next_sibling("ul")
 
         title = cast(Tag, p.find("b")).text
         if ":" in title:
diff --git a/PFERD/crawl/ilias/shibboleth_login.py b/PFERD/crawl/ilias/shibboleth_login.py
index bdff4ea..bffb183 100644
--- a/PFERD/crawl/ilias/shibboleth_login.py
+++ b/PFERD/crawl/ilias/shibboleth_login.py
@@ -60,7 +60,7 @@ class ShibbolethLogin:
                 "fudis_web_authn_assertion_input": "",
             }
             if csrf_token_input := form.find("input", {"name": "csrf_token"}):
-                data["csrf_token"] = csrf_token_input["value"]
+                data["csrf_token"] = csrf_token_input["value"]  # type: ignore
             soup = await _post(sess, url, data)
 
             if soup.find(id="attributeRelease"):
@@ -79,7 +79,7 @@ class ShibbolethLogin:
         # (or clicking "Continue" if you have JS disabled)
         relay_state = cast(Tag, soup.find("input", {"name": "RelayState"}))
         saml_response = cast(Tag, soup.find("input", {"name": "SAMLResponse"}))
-        url = form = soup.find("form", {"method": "post"})["action"]
+        url = cast(str, cast(Tag, soup.find("form", {"method": "post"}))["action"])
         data = {  # using the info obtained in the while loop above
             "RelayState": cast(str, relay_state["value"]),
             "SAMLResponse": cast(str, saml_response["value"]),
@@ -108,7 +108,7 @@ class ShibbolethLogin:
             "fudis_otp_input": tfa_token,
         }
         if csrf_token_input := form.find("input", {"name": "csrf_token"}):
-            data["csrf_token"] = csrf_token_input["value"]
+            data["csrf_token"] = csrf_token_input["value"]  # type: ignore
         return await _post(session, url, data)
 
     @staticmethod
diff --git a/PFERD/logging.py b/PFERD/logging.py
index a810aa9..ac633ec 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -3,7 +3,7 @@ import sys
 import traceback
 from collections.abc import AsyncIterator, Iterator
 from contextlib import AbstractContextManager, asynccontextmanager, contextmanager
-from typing import Optional
+from typing import Any, Optional
 
 from rich.console import Console, Group
 from rich.live import Live
@@ -122,7 +122,7 @@ class Log:
         for line in self._lines:
             self.print(line)
 
-    def print(self, text: str) -> None:
+    def print(self, text: Any) -> None:
         """
         Print a normal message. Allows markup.
         """
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index 1fe37d0..9a6035f 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -160,9 +160,10 @@ class Pferd:
 
     def print_report(self) -> None:
         for name in self._crawlers_to_run:
-            crawler = self._crawlers.get(name)
-            if crawler is None:
+            crawlerOpt = self._crawlers.get(name)
+            if crawlerOpt is None:
                 continue  # Crawler failed to load
+            crawler = crawlerOpt
 
             log.report("")
             log.report(f"[bold bright_cyan]Report[/] for {escape(name)}")
diff --git a/PFERD/utils.py b/PFERD/utils.py
index 2d01713..918a9b6 100644
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -125,11 +125,11 @@ class ReusableAsyncContextManager(ABC, Generic[T]):
         # See https://stackoverflow.com/a/13075071
         try:
             result: T = await self._on_aenter()
-        except:  # noqa: E722 do not use bare 'except'
+            return result
+        except:
             if not await self.__aexit__(*sys.exc_info()):
                 raise
-
-        return result
+            raise
 
     async def __aexit__(
         self,

From c1c78673aa9fa046fdc80ff7fee72c58fc095ca7 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 19 Oct 2025 16:17:13 +0200
Subject: [PATCH 517/524] Switch to uv

---
 .github/workflows/build-and-release.yml |  12 +-
 DEV.md                                  |  21 +--
 pyproject.toml                          |   7 +
 scripts/build                           |   2 +-
 scripts/check                           |   4 +-
 scripts/format                          |   2 +-
 uv.lock                                 | 185 ++++++++++++++++++++++++
 7 files changed, 207 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/build-and-release.yml b/.github/workflows/build-and-release.yml
index 0117222..9cd962f 100644
--- a/.github/workflows/build-and-release.yml
+++ b/.github/workflows/build-and-release.yml
@@ -18,19 +18,13 @@ jobs:
     steps:
       - uses: actions/checkout@v4
 
-      - uses: actions/setup-python@v5
+      - name: Install uv
+        uses: astral-sh/setup-uv@v7
         with:
           python-version: ${{ matrix.python }}
 
       - name: Set up project
-        if: matrix.os != 'windows-latest'
-        run: ./scripts/setup
-
-      - name: Set up project on windows
-        if: matrix.os == 'windows-latest'
-        # For some reason, `pip install --upgrade pip` doesn't work on
-        # 'windows-latest'. The installed pip version works fine however.
-        run: ./scripts/setup --no-pip
+        run: uv sync
 
       - name: Run checks
         run: |
diff --git a/DEV.md b/DEV.md
index f577b93..8cc42c2 100644
--- a/DEV.md
+++ b/DEV.md
@@ -9,30 +9,25 @@ particular [this][ppug-1] and [this][ppug-2] guide).
 
 ## Setting up a dev environment
 
-The use of [venv][venv] is recommended. To initially set up a development
-environment, run these commands in the same directory as this file:
+The use of [venv][venv] and [uv][uv] is recommended. To initially set up a
+development environment, run these commands in the same directory as this file:
 
 ```
-$ python -m venv .venv
+$ uv sync
 $ . .venv/bin/activate
-$ ./scripts/setup
 ```
 
-The setup script installs a few required dependencies and tools. It also
-installs PFERD via `pip install --editable .`, which means that you can just run
-`pferd` as if it was installed normally. Since PFERD was installed with
-`--editable`, there is no need to re-run `pip install` when the source code is
-changed.
-
-If you get any errors because pip can't update itself, try running
-`./scripts/setup --no-pip` instead of `./scripts/setup`.
+This install all required dependencies and tools. It also installs PFERD as
+*editable*, which means that you can just run `pferd` as if it was installed
+normally. Since PFERD was installed with `--editable`, there is no need to
+re-run `uv sync` when the source code is changed.
 
 For more details, see [this part of the Python Tutorial][venv-tut] and
 [this section on "development mode"][ppug-dev].
 
 [venv]: <https://docs.python.org/3/library/venv.html> "venv - Creation of virtual environments"
 [venv-tut]: <https://docs.python.org/3/tutorial/venv.html> "12. Virtual Environments and Packages"
-[ppug-dev]: <https://packaging.python.org/guides/distributing-packages-using-setuptools/#working-in-development-mode> "Working in “development mode”"
+[uv]: <https://docs.astral.sh/uv/> "uv - An extremely fast Python package and project manager"
 
 ## Checking and formatting the code
 
diff --git a/pyproject.toml b/pyproject.toml
index 9d4460b..93251ce 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -53,3 +53,10 @@ warn_unused_ignores = true
 warn_unreachable = true
 show_error_context = true
 ignore_missing_imports = true
+
+[dependency-groups]
+dev = [
+    "mypy>=1.18.2",
+    "pyinstaller>=6.16.0",
+    "ruff>=0.14.1",
+]
diff --git a/scripts/build b/scripts/build
index 6f88655..65746c7 100755
--- a/scripts/build
+++ b/scripts/build
@@ -2,4 +2,4 @@
 
 set -e
 
-pyinstaller --onefile pferd.py
+uv run pyinstaller --onefile pferd.py
diff --git a/scripts/check b/scripts/check
index cce6a38..609c4df 100755
--- a/scripts/check
+++ b/scripts/check
@@ -2,5 +2,5 @@
 
 set -e
 
-mypy .
-ruff check
+uv run mypy .
+uv run ruff check
diff --git a/scripts/format b/scripts/format
index 38b10fd..6e814b5 100755
--- a/scripts/format
+++ b/scripts/format
@@ -2,4 +2,4 @@
 
 set -e
 
-ruff format
+uv run ruff format
diff --git a/uv.lock b/uv.lock
index eba384b..691ba1d 100644
--- a/uv.lock
+++ b/uv.lock
@@ -126,6 +126,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" },
 ]
 
+[[package]]
+name = "altgraph"
+version = "0.17.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/de/a8/7145824cf0b9e3c28046520480f207df47e927df83aa9555fb47f8505922/altgraph-0.17.4.tar.gz", hash = "sha256:1b5afbb98f6c4dcadb2e2ae6ab9fa994bbb8c1d75f4fa96d340f9437ae454406", size = 48418, upload-time = "2023-09-25T09:04:52.164Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4d/3f/3bc3f1d83f6e4a7fcb834d3720544ca597590425be5ba9db032b2bf322a2/altgraph-0.17.4-py2.py3-none-any.whl", hash = "sha256:642743b4750de17e655e6711601b077bc6598dbfa3ba5fa2b2a35ce12b508dff", size = 21212, upload-time = "2023-09-25T09:04:50.691Z" },
+]
+
 [[package]]
 name = "attrs"
 version = "25.4.0"
@@ -448,6 +457,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d3/32/da7f44bcb1105d3e88a0b74ebdca50c59121d2ddf71c9e34ba47df7f3a56/keyring-25.6.0-py3-none-any.whl", hash = "sha256:552a3f7af126ece7ed5c89753650eec89c7eaae8617d0aa4d9ad2b75111266bd", size = 39085, upload-time = "2024-12-25T15:26:44.377Z" },
 ]
 
+[[package]]
+name = "macholib"
+version = "1.16.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "altgraph" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/95/ee/af1a3842bdd5902ce133bd246eb7ffd4375c38642aeb5dc0ae3a0329dfa2/macholib-1.16.3.tar.gz", hash = "sha256:07ae9e15e8e4cd9a788013d81f5908b3609aa76f9b1421bae9c4d7606ec86a30", size = 59309, upload-time = "2023-09-25T09:10:16.155Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d1/5d/c059c180c84f7962db0aeae7c3b9303ed1d73d76f2bfbc32bc231c8be314/macholib-1.16.3-py2.py3-none-any.whl", hash = "sha256:0e315d7583d38b8c77e815b1ecbdbf504a8258d8b3e17b61165c6feb60d18f2c", size = 38094, upload-time = "2023-09-25T09:10:14.188Z" },
+]
+
 [[package]]
 name = "markdown-it-py"
 version = "4.0.0"
@@ -595,6 +616,80 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b7/da/7d22601b625e241d4f23ef1ebff8acfc60da633c9e7e7922e24d10f592b3/multidict-6.7.0-py3-none-any.whl", hash = "sha256:394fc5c42a333c9ffc3e421a4c85e08580d990e08b99f6bf35b4132114c5dcb3", size = 12317, upload-time = "2025-10-06T14:52:29.272Z" },
 ]
 
+[[package]]
+name = "mypy"
+version = "1.18.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mypy-extensions" },
+    { name = "pathspec" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c0/77/8f0d0001ffad290cef2f7f216f96c814866248a0b92a722365ed54648e7e/mypy-1.18.2.tar.gz", hash = "sha256:06a398102a5f203d7477b2923dda3634c36727fa5c237d8f859ef90c42a9924b", size = 3448846, upload-time = "2025-09-19T00:11:10.519Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/88/87/cafd3ae563f88f94eec33f35ff722d043e09832ea8530ef149ec1efbaf08/mypy-1.18.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:807d9315ab9d464125aa9fcf6d84fde6e1dc67da0b6f80e7405506b8ac72bc7f", size = 12731198, upload-time = "2025-09-19T00:09:44.857Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/e0/1e96c3d4266a06d4b0197ace5356d67d937d8358e2ee3ffac71faa843724/mypy-1.18.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:776bb00de1778caf4db739c6e83919c1d85a448f71979b6a0edd774ea8399341", size = 11817879, upload-time = "2025-09-19T00:09:47.131Z" },
+    { url = "https://files.pythonhosted.org/packages/72/ef/0c9ba89eb03453e76bdac5a78b08260a848c7bfc5d6603634774d9cd9525/mypy-1.18.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1379451880512ffce14505493bd9fe469e0697543717298242574882cf8cdb8d", size = 12427292, upload-time = "2025-09-19T00:10:22.472Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/52/ec4a061dd599eb8179d5411d99775bec2a20542505988f40fc2fee781068/mypy-1.18.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1331eb7fd110d60c24999893320967594ff84c38ac6d19e0a76c5fd809a84c86", size = 13163750, upload-time = "2025-09-19T00:09:51.472Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/5f/2cf2ceb3b36372d51568f2208c021870fe7834cf3186b653ac6446511839/mypy-1.18.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3ca30b50a51e7ba93b00422e486cbb124f1c56a535e20eff7b2d6ab72b3b2e37", size = 13351827, upload-time = "2025-09-19T00:09:58.311Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/7d/2697b930179e7277529eaaec1513f8de622818696857f689e4a5432e5e27/mypy-1.18.2-cp311-cp311-win_amd64.whl", hash = "sha256:664dc726e67fa54e14536f6e1224bcfce1d9e5ac02426d2326e2bb4e081d1ce8", size = 9757983, upload-time = "2025-09-19T00:10:09.071Z" },
+    { url = "https://files.pythonhosted.org/packages/07/06/dfdd2bc60c66611dd8335f463818514733bc763e4760dee289dcc33df709/mypy-1.18.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:33eca32dd124b29400c31d7cf784e795b050ace0e1f91b8dc035672725617e34", size = 12908273, upload-time = "2025-09-19T00:10:58.321Z" },
+    { url = "https://files.pythonhosted.org/packages/81/14/6a9de6d13a122d5608e1a04130724caf9170333ac5a924e10f670687d3eb/mypy-1.18.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a3c47adf30d65e89b2dcd2fa32f3aeb5e94ca970d2c15fcb25e297871c8e4764", size = 11920910, upload-time = "2025-09-19T00:10:20.043Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/a9/b29de53e42f18e8cc547e38daa9dfa132ffdc64f7250e353f5c8cdd44bee/mypy-1.18.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d6c838e831a062f5f29d11c9057c6009f60cb294fea33a98422688181fe2893", size = 12465585, upload-time = "2025-09-19T00:10:33.005Z" },
+    { url = "https://files.pythonhosted.org/packages/77/ae/6c3d2c7c61ff21f2bee938c917616c92ebf852f015fb55917fd6e2811db2/mypy-1.18.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01199871b6110a2ce984bde85acd481232d17413868c9807e95c1b0739a58914", size = 13348562, upload-time = "2025-09-19T00:10:11.51Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/31/aec68ab3b4aebdf8f36d191b0685d99faa899ab990753ca0fee60fb99511/mypy-1.18.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a2afc0fa0b0e91b4599ddfe0f91e2c26c2b5a5ab263737e998d6817874c5f7c8", size = 13533296, upload-time = "2025-09-19T00:10:06.568Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/83/abcb3ad9478fca3ebeb6a5358bb0b22c95ea42b43b7789c7fb1297ca44f4/mypy-1.18.2-cp312-cp312-win_amd64.whl", hash = "sha256:d8068d0afe682c7c4897c0f7ce84ea77f6de953262b12d07038f4d296d547074", size = 9828828, upload-time = "2025-09-19T00:10:28.203Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/04/7f462e6fbba87a72bc8097b93f6842499c428a6ff0c81dd46948d175afe8/mypy-1.18.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:07b8b0f580ca6d289e69209ec9d3911b4a26e5abfde32228a288eb79df129fcc", size = 12898728, upload-time = "2025-09-19T00:10:01.33Z" },
+    { url = "https://files.pythonhosted.org/packages/99/5b/61ed4efb64f1871b41fd0b82d29a64640f3516078f6c7905b68ab1ad8b13/mypy-1.18.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ed4482847168439651d3feee5833ccedbf6657e964572706a2adb1f7fa4dfe2e", size = 11910758, upload-time = "2025-09-19T00:10:42.607Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/46/d297d4b683cc89a6e4108c4250a6a6b717f5fa96e1a30a7944a6da44da35/mypy-1.18.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c3ad2afadd1e9fea5cf99a45a822346971ede8685cc581ed9cd4d42eaf940986", size = 12475342, upload-time = "2025-09-19T00:11:00.371Z" },
+    { url = "https://files.pythonhosted.org/packages/83/45/4798f4d00df13eae3bfdf726c9244bcb495ab5bd588c0eed93a2f2dd67f3/mypy-1.18.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a431a6f1ef14cf8c144c6b14793a23ec4eae3db28277c358136e79d7d062f62d", size = 13338709, upload-time = "2025-09-19T00:11:03.358Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/09/479f7358d9625172521a87a9271ddd2441e1dab16a09708f056e97007207/mypy-1.18.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7ab28cc197f1dd77a67e1c6f35cd1f8e8b73ed2217e4fc005f9e6a504e46e7ba", size = 13529806, upload-time = "2025-09-19T00:10:26.073Z" },
+    { url = "https://files.pythonhosted.org/packages/71/cf/ac0f2c7e9d0ea3c75cd99dff7aec1c9df4a1376537cb90e4c882267ee7e9/mypy-1.18.2-cp313-cp313-win_amd64.whl", hash = "sha256:0e2785a84b34a72ba55fb5daf079a1003a34c05b22238da94fcae2bbe46f3544", size = 9833262, upload-time = "2025-09-19T00:10:40.035Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/0c/7d5300883da16f0063ae53996358758b2a2df2a09c72a5061fa79a1f5006/mypy-1.18.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:62f0e1e988ad41c2a110edde6c398383a889d95b36b3e60bcf155f5164c4fdce", size = 12893775, upload-time = "2025-09-19T00:10:03.814Z" },
+    { url = "https://files.pythonhosted.org/packages/50/df/2cffbf25737bdb236f60c973edf62e3e7b4ee1c25b6878629e88e2cde967/mypy-1.18.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8795a039bab805ff0c1dfdb8cd3344642c2b99b8e439d057aba30850b8d3423d", size = 11936852, upload-time = "2025-09-19T00:10:51.631Z" },
+    { url = "https://files.pythonhosted.org/packages/be/50/34059de13dd269227fb4a03be1faee6e2a4b04a2051c82ac0a0b5a773c9a/mypy-1.18.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6ca1e64b24a700ab5ce10133f7ccd956a04715463d30498e64ea8715236f9c9c", size = 12480242, upload-time = "2025-09-19T00:11:07.955Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/11/040983fad5132d85914c874a2836252bbc57832065548885b5bb5b0d4359/mypy-1.18.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d924eef3795cc89fecf6bedc6ed32b33ac13e8321344f6ddbf8ee89f706c05cb", size = 13326683, upload-time = "2025-09-19T00:09:55.572Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/ba/89b2901dd77414dd7a8c8729985832a5735053be15b744c18e4586e506ef/mypy-1.18.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:20c02215a080e3a2be3aa50506c67242df1c151eaba0dcbc1e4e557922a26075", size = 13514749, upload-time = "2025-09-19T00:10:44.827Z" },
+    { url = "https://files.pythonhosted.org/packages/25/bc/cc98767cffd6b2928ba680f3e5bc969c4152bf7c2d83f92f5a504b92b0eb/mypy-1.18.2-cp314-cp314-win_amd64.whl", hash = "sha256:749b5f83198f1ca64345603118a6f01a4e99ad4bf9d103ddc5a3200cc4614adf", size = 9982959, upload-time = "2025-09-19T00:10:37.344Z" },
+    { url = "https://files.pythonhosted.org/packages/87/e3/be76d87158ebafa0309946c4a73831974d4d6ab4f4ef40c3b53a385a66fd/mypy-1.18.2-py3-none-any.whl", hash = "sha256:22a1748707dd62b58d2ae53562ffc4d7f8bcc727e8ac7cbc69c053ddc874d47e", size = 2352367, upload-time = "2025-09-19T00:10:15.489Z" },
+]
+
+[[package]]
+name = "mypy-extensions"
+version = "1.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
+]
+
+[[package]]
+name = "packaging"
+version = "25.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
+]
+
+[[package]]
+name = "pathspec"
+version = "0.12.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043, upload-time = "2023-12-10T22:30:45Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" },
+]
+
+[[package]]
+name = "pefile"
+version = "2023.2.7"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/78/c5/3b3c62223f72e2360737fd2a57c30e5b2adecd85e70276879609a7403334/pefile-2023.2.7.tar.gz", hash = "sha256:82e6114004b3d6911c77c3953e3838654b04511b8b66e8583db70c65998017dc", size = 74854, upload-time = "2023-02-07T12:23:55.958Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/55/26/d0ad8b448476d0a1e8d3ea5622dc77b916db84c6aa3cb1e1c0965af948fc/pefile-2023.2.7-py3-none-any.whl", hash = "sha256:da185cd2af68c08a6cd4481f7325ed600a88f6a813bad9dea07ab3ef73d8d8d6", size = 71791, upload-time = "2023-02-07T12:28:36.678Z" },
+]
+
 [[package]]
 name = "pferd"
 source = { editable = "." }
@@ -606,6 +701,13 @@ dependencies = [
     { name = "rich" },
 ]
 
+[package.dev-dependencies]
+dev = [
+    { name = "mypy" },
+    { name = "pyinstaller" },
+    { name = "ruff" },
+]
+
 [package.metadata]
 requires-dist = [
     { name = "aiohttp", specifier = ">=3.8.1" },
@@ -615,6 +717,13 @@ requires-dist = [
     { name = "rich", specifier = ">=11.0.0" },
 ]
 
+[package.metadata.requires-dev]
+dev = [
+    { name = "mypy", specifier = ">=1.18.2" },
+    { name = "pyinstaller", specifier = ">=6.16.0" },
+    { name = "ruff", specifier = ">=0.14.1" },
+]
+
 [[package]]
 name = "propcache"
 version = "0.4.1"
@@ -732,6 +841,47 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
 ]
 
+[[package]]
+name = "pyinstaller"
+version = "6.16.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "altgraph" },
+    { name = "macholib", marker = "sys_platform == 'darwin'" },
+    { name = "packaging" },
+    { name = "pefile", marker = "sys_platform == 'win32'" },
+    { name = "pyinstaller-hooks-contrib" },
+    { name = "pywin32-ctypes", marker = "sys_platform == 'win32'" },
+    { name = "setuptools" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/94/94/1f62e95e4a28b64cfbb5b922ef3046f968b47170d37a1e1a029f56ac9cb4/pyinstaller-6.16.0.tar.gz", hash = "sha256:53559fe1e041a234f2b4dcc3288ea8bdd57f7cad8a6644e422c27bb407f3edef", size = 4008473, upload-time = "2025-09-13T20:07:01.733Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7b/0a/c42ce6e5d3de287f2e9432a074fb209f1fb72a86a72f3903849fdb5e4829/pyinstaller-6.16.0-py3-none-macosx_10_13_universal2.whl", hash = "sha256:7fd1c785219a87ca747c21fa92f561b0d2926a7edc06d0a0fe37f3736e00bd7a", size = 1027899, upload-time = "2025-09-13T20:05:59.2Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/d0/f18fedde32835d5a758f464c75924e2154065625f09d5456c3c303527654/pyinstaller-6.16.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:b756ddb9007b8141c5476b553351f9d97559b8af5d07f9460869bfae02be26b0", size = 727990, upload-time = "2025-09-13T20:06:03.583Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/db/c8bb47514ce857b24bf9294cf1ff74844b6a489fa0ab4ef6f923288c4e38/pyinstaller-6.16.0-py3-none-manylinux2014_i686.whl", hash = "sha256:0a48f55b85ff60f83169e10050f2759019cf1d06773ad1c4da3a411cd8751058", size = 739238, upload-time = "2025-09-13T20:06:07.69Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/3e/451dc784a8fcca0fe9f9b6b802d58555364a95b60f253613a2c83fc6b023/pyinstaller-6.16.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:73ba72e04fcece92e32518bbb1e1fb5ac2892677943dfdff38e01a06e8742851", size = 737142, upload-time = "2025-09-13T20:06:11.732Z" },
+    { url = "https://files.pythonhosted.org/packages/71/37/2f457479ef8fa2821cdb448acee2421dfb19fbe908bf5499d1930c164084/pyinstaller-6.16.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:b1752488248f7899281b17ca3238eefb5410521291371a686a4f5830f29f52b3", size = 734133, upload-time = "2025-09-13T20:06:15.477Z" },
+    { url = "https://files.pythonhosted.org/packages/63/c4/0f7daac4d062a4d1ac2571d8a8b9b5d6812094fcd914d139af591ca5e1ba/pyinstaller-6.16.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:ba618a61627ee674d6d68e5de084ba17c707b59a4f2a856084b3999bdffbd3f0", size = 733817, upload-time = "2025-09-13T20:06:19.683Z" },
+    { url = "https://files.pythonhosted.org/packages/11/e4/b6127265b42bef883e8873d850becadf748bc5652e5a7029b059328f3c31/pyinstaller-6.16.0-py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:c8b7ef536711617e12fef4673806198872033fa06fa92326ad7fd1d84a9fa454", size = 732912, upload-time = "2025-09-13T20:06:23.46Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/00/c6663107bdf814b2916e71563beabd09f693c47712213bc228994cb2cc65/pyinstaller-6.16.0-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:d1ebf84d02c51fed19b82a8abb4df536923abd55bb684d694e1356e4ae2a0ce5", size = 732773, upload-time = "2025-09-13T20:06:27.352Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/14/cabe9bc5f60b95d2e70e7d045ab94b0015ff8f6c8b16e2142d3597e30749/pyinstaller-6.16.0-py3-none-win32.whl", hash = "sha256:6d5f8617f3650ff9ef893e2ab4ddbf3c0d23d0c602ef74b5df8fbef4607840c8", size = 1313878, upload-time = "2025-09-13T20:06:33.234Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/99/2005efbc297e7813c1d6f18484aa94a1a81ce87b6a5b497c563681f4c4ea/pyinstaller-6.16.0-py3-none-win_amd64.whl", hash = "sha256:bc10eb1a787f99fea613509f55b902fbd2d8b73ff5f51ff245ea29a481d97d41", size = 1374706, upload-time = "2025-09-13T20:06:39.95Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/f4/4dfcf69b86d60fcaae05a42bbff1616d48a91e71726e5ed795d773dae9b3/pyinstaller-6.16.0-py3-none-win_arm64.whl", hash = "sha256:d0af8a401de792c233c32c44b16d065ca9ab8262ee0c906835c12bdebc992a64", size = 1315923, upload-time = "2025-09-13T20:06:45.846Z" },
+]
+
+[[package]]
+name = "pyinstaller-hooks-contrib"
+version = "2025.9"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "packaging" },
+    { name = "setuptools" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/7d/83/be0f57c0b77b66c33c2283ebd4ea341022b5a743e97c5fb3bebab82b38b9/pyinstaller_hooks_contrib-2025.9.tar.gz", hash = "sha256:56e972bdaad4e9af767ed47d132362d162112260cbe488c9da7fee01f228a5a6", size = 165189, upload-time = "2025-09-24T11:21:35.113Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a2/26/23b4cfc77d7f808c69f59070e1e8293a579ec281a547c61562357160b346/pyinstaller_hooks_contrib-2025.9-py3-none-any.whl", hash = "sha256:ccbfaa49399ef6b18486a165810155e5a8d4c59b41f20dc5da81af7482aaf038", size = 444283, upload-time = "2025-09-24T11:21:33.67Z" },
+]
+
 [[package]]
 name = "pywin32-ctypes"
 version = "0.2.3"
@@ -754,6 +904,32 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/25/7a/b0178788f8dc6cafce37a212c99565fa1fe7872c70c6c9c1e1a372d9d88f/rich-14.2.0-py3-none-any.whl", hash = "sha256:76bc51fe2e57d2b1be1f96c524b890b816e334ab4c1e45888799bfaab0021edd", size = 243393, upload-time = "2025-10-09T14:16:51.245Z" },
 ]
 
+[[package]]
+name = "ruff"
+version = "0.14.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/9e/58/6ca66896635352812de66f71cdf9ff86b3a4f79071ca5730088c0cd0fc8d/ruff-0.14.1.tar.gz", hash = "sha256:1dd86253060c4772867c61791588627320abcb6ed1577a90ef432ee319729b69", size = 5513429, upload-time = "2025-10-16T18:05:41.766Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8d/39/9cc5ab181478d7a18adc1c1e051a84ee02bec94eb9bdfd35643d7c74ca31/ruff-0.14.1-py3-none-linux_armv6l.whl", hash = "sha256:083bfc1f30f4a391ae09c6f4f99d83074416b471775b59288956f5bc18e82f8b", size = 12445415, upload-time = "2025-10-16T18:04:48.227Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/2e/1226961855ccd697255988f5a2474890ac7c5863b080b15bd038df820818/ruff-0.14.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:f6fa757cd717f791009f7669fefb09121cc5f7d9bd0ef211371fad68c2b8b224", size = 12784267, upload-time = "2025-10-16T18:04:52.515Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/ea/fd9e95863124ed159cd0667ec98449ae461de94acda7101f1acb6066da00/ruff-0.14.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d6191903d39ac156921398e9c86b7354d15e3c93772e7dbf26c9fcae59ceccd5", size = 11781872, upload-time = "2025-10-16T18:04:55.396Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/5a/e890f7338ff537dba4589a5e02c51baa63020acfb7c8cbbaea4831562c96/ruff-0.14.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed04f0e04f7a4587244e5c9d7df50e6b5bf2705d75059f409a6421c593a35896", size = 12226558, upload-time = "2025-10-16T18:04:58.166Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/7a/8ab5c3377f5bf31e167b73651841217542bcc7aa1c19e83030835cc25204/ruff-0.14.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5c9e6cf6cd4acae0febbce29497accd3632fe2025c0c583c8b87e8dbdeae5f61", size = 12187898, upload-time = "2025-10-16T18:05:01.455Z" },
+    { url = "https://files.pythonhosted.org/packages/48/8d/ba7c33aa55406955fc124e62c8259791c3d42e3075a71710fdff9375134f/ruff-0.14.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a6fa2458527794ecdfbe45f654e42c61f2503a230545a91af839653a0a93dbc6", size = 12939168, upload-time = "2025-10-16T18:05:04.397Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/c2/70783f612b50f66d083380e68cbd1696739d88e9b4f6164230375532c637/ruff-0.14.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:39f1c392244e338b21d42ab29b8a6392a722c5090032eb49bb4d6defcdb34345", size = 14386942, upload-time = "2025-10-16T18:05:07.102Z" },
+    { url = "https://files.pythonhosted.org/packages/48/44/cd7abb9c776b66d332119d67f96acf15830d120f5b884598a36d9d3f4d83/ruff-0.14.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7382fa12a26cce1f95070ce450946bec357727aaa428983036362579eadcc5cf", size = 13990622, upload-time = "2025-10-16T18:05:09.882Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/56/4259b696db12ac152fe472764b4f78bbdd9b477afd9bc3a6d53c01300b37/ruff-0.14.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dd0bf2be3ae8521e1093a487c4aa3b455882f139787770698530d28ed3fbb37c", size = 13431143, upload-time = "2025-10-16T18:05:13.46Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/35/266a80d0eb97bd224b3265b9437bd89dde0dcf4faf299db1212e81824e7e/ruff-0.14.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cabcaa9ccf8089fb4fdb78d17cc0e28241520f50f4c2e88cb6261ed083d85151", size = 13132844, upload-time = "2025-10-16T18:05:16.1Z" },
+    { url = "https://files.pythonhosted.org/packages/65/6e/d31ce218acc11a8d91ef208e002a31acf315061a85132f94f3df7a252b18/ruff-0.14.1-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:747d583400f6125ec11a4c14d1c8474bf75d8b419ad22a111a537ec1a952d192", size = 13401241, upload-time = "2025-10-16T18:05:19.395Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/b5/dbc4221bf0b03774b3b2f0d47f39e848d30664157c15b965a14d890637d2/ruff-0.14.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:5a6e74c0efd78515a1d13acbfe6c90f0f5bd822aa56b4a6d43a9ffb2ae6e56cd", size = 12132476, upload-time = "2025-10-16T18:05:22.163Z" },
+    { url = "https://files.pythonhosted.org/packages/98/4b/ac99194e790ccd092d6a8b5f341f34b6e597d698e3077c032c502d75ea84/ruff-0.14.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:0ea6a864d2fb41a4b6d5b456ed164302a0d96f4daac630aeba829abfb059d020", size = 12139749, upload-time = "2025-10-16T18:05:25.162Z" },
+    { url = "https://files.pythonhosted.org/packages/47/26/7df917462c3bb5004e6fdfcc505a49e90bcd8a34c54a051953118c00b53a/ruff-0.14.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:0826b8764f94229604fa255918d1cc45e583e38c21c203248b0bfc9a0e930be5", size = 12544758, upload-time = "2025-10-16T18:05:28.018Z" },
+    { url = "https://files.pythonhosted.org/packages/64/d0/81e7f0648e9764ad9b51dd4be5e5dac3fcfff9602428ccbae288a39c2c22/ruff-0.14.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:cbc52160465913a1a3f424c81c62ac8096b6a491468e7d872cb9444a860bc33d", size = 13221811, upload-time = "2025-10-16T18:05:30.707Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/07/3c45562c67933cc35f6d5df4ca77dabbcd88fddaca0d6b8371693d29fd56/ruff-0.14.1-py3-none-win32.whl", hash = "sha256:e037ea374aaaff4103240ae79168c0945ae3d5ae8db190603de3b4012bd1def6", size = 12319467, upload-time = "2025-10-16T18:05:33.261Z" },
+    { url = "https://files.pythonhosted.org/packages/02/88/0ee4ca507d4aa05f67e292d2e5eb0b3e358fbcfe527554a2eda9ac422d6b/ruff-0.14.1-py3-none-win_amd64.whl", hash = "sha256:59d599cdff9c7f925a017f6f2c256c908b094e55967f93f2821b1439928746a1", size = 13401123, upload-time = "2025-10-16T18:05:35.984Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/81/4b6387be7014858d924b843530e1b2a8e531846807516e9bea2ee0936bf7/ruff-0.14.1-py3-none-win_arm64.whl", hash = "sha256:e3b443c4c9f16ae850906b8d0a707b2a4c16f8d2f0a7fe65c475c5886665ce44", size = 12436636, upload-time = "2025-10-16T18:05:38.995Z" },
+]
+
 [[package]]
 name = "secretstorage"
 version = "3.4.0"
@@ -767,6 +943,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/91/ff/2e2eed29e02c14a5cb6c57f09b2d5b40e65d6cc71f45b52e0be295ccbc2f/secretstorage-3.4.0-py3-none-any.whl", hash = "sha256:0e3b6265c2c63509fb7415717607e4b2c9ab767b7f344a57473b779ca13bd02e", size = 15272, upload-time = "2025-09-09T16:42:12.744Z" },
 ]
 
+[[package]]
+name = "setuptools"
+version = "80.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/18/5d/3bf57dcd21979b887f014ea83c24ae194cfcd12b9e0fda66b957c69d1fca/setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c", size = 1319958, upload-time = "2025-05-27T00:56:51.443Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload-time = "2025-05-27T00:56:49.664Z" },
+]
+
 [[package]]
 name = "soupsieve"
 version = "2.8"

From bb0d68da65605066cbc36593a8246e6c8898a09c Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 19 Oct 2025 16:28:41 +0200
Subject: [PATCH 518/524] Switch to pyright

---
 pyproject.toml | 12 +--------
 scripts/check  |  2 +-
 uv.lock        | 72 +++++++++++++-------------------------------------
 3 files changed, 21 insertions(+), 65 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 93251ce..96aa4a9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,19 +44,9 @@ ignore = [
   "B023"
 ]
 
-[tool.mypy]
-disallow_any_generics = true
-disallow_untyped_defs = true
-disallow_incomplete_defs = true
-no_implicit_optional = true
-warn_unused_ignores = true
-warn_unreachable = true
-show_error_context = true
-ignore_missing_imports = true
-
 [dependency-groups]
 dev = [
-    "mypy>=1.18.2",
     "pyinstaller>=6.16.0",
+    "pyright>=1.1.406",
     "ruff>=0.14.1",
 ]
diff --git a/scripts/check b/scripts/check
index 609c4df..0552f07 100755
--- a/scripts/check
+++ b/scripts/check
@@ -2,5 +2,5 @@
 
 set -e
 
-uv run mypy .
+uv run pyright .
 uv run ruff check
diff --git a/uv.lock b/uv.lock
index 691ba1d..9c2a58e 100644
--- a/uv.lock
+++ b/uv.lock
@@ -617,50 +617,12 @@ wheels = [
 ]
 
 [[package]]
-name = "mypy"
-version = "1.18.2"
+name = "nodeenv"
+version = "1.9.1"
 source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "mypy-extensions" },
-    { name = "pathspec" },
-    { name = "typing-extensions" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/c0/77/8f0d0001ffad290cef2f7f216f96c814866248a0b92a722365ed54648e7e/mypy-1.18.2.tar.gz", hash = "sha256:06a398102a5f203d7477b2923dda3634c36727fa5c237d8f859ef90c42a9924b", size = 3448846, upload-time = "2025-09-19T00:11:10.519Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/43/16/fc88b08840de0e0a72a2f9d8c6bae36be573e475a6326ae854bcc549fc45/nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f", size = 47437, upload-time = "2024-06-04T18:44:11.171Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/88/87/cafd3ae563f88f94eec33f35ff722d043e09832ea8530ef149ec1efbaf08/mypy-1.18.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:807d9315ab9d464125aa9fcf6d84fde6e1dc67da0b6f80e7405506b8ac72bc7f", size = 12731198, upload-time = "2025-09-19T00:09:44.857Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/e0/1e96c3d4266a06d4b0197ace5356d67d937d8358e2ee3ffac71faa843724/mypy-1.18.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:776bb00de1778caf4db739c6e83919c1d85a448f71979b6a0edd774ea8399341", size = 11817879, upload-time = "2025-09-19T00:09:47.131Z" },
-    { url = "https://files.pythonhosted.org/packages/72/ef/0c9ba89eb03453e76bdac5a78b08260a848c7bfc5d6603634774d9cd9525/mypy-1.18.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1379451880512ffce14505493bd9fe469e0697543717298242574882cf8cdb8d", size = 12427292, upload-time = "2025-09-19T00:10:22.472Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/52/ec4a061dd599eb8179d5411d99775bec2a20542505988f40fc2fee781068/mypy-1.18.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1331eb7fd110d60c24999893320967594ff84c38ac6d19e0a76c5fd809a84c86", size = 13163750, upload-time = "2025-09-19T00:09:51.472Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/5f/2cf2ceb3b36372d51568f2208c021870fe7834cf3186b653ac6446511839/mypy-1.18.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3ca30b50a51e7ba93b00422e486cbb124f1c56a535e20eff7b2d6ab72b3b2e37", size = 13351827, upload-time = "2025-09-19T00:09:58.311Z" },
-    { url = "https://files.pythonhosted.org/packages/c8/7d/2697b930179e7277529eaaec1513f8de622818696857f689e4a5432e5e27/mypy-1.18.2-cp311-cp311-win_amd64.whl", hash = "sha256:664dc726e67fa54e14536f6e1224bcfce1d9e5ac02426d2326e2bb4e081d1ce8", size = 9757983, upload-time = "2025-09-19T00:10:09.071Z" },
-    { url = "https://files.pythonhosted.org/packages/07/06/dfdd2bc60c66611dd8335f463818514733bc763e4760dee289dcc33df709/mypy-1.18.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:33eca32dd124b29400c31d7cf784e795b050ace0e1f91b8dc035672725617e34", size = 12908273, upload-time = "2025-09-19T00:10:58.321Z" },
-    { url = "https://files.pythonhosted.org/packages/81/14/6a9de6d13a122d5608e1a04130724caf9170333ac5a924e10f670687d3eb/mypy-1.18.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a3c47adf30d65e89b2dcd2fa32f3aeb5e94ca970d2c15fcb25e297871c8e4764", size = 11920910, upload-time = "2025-09-19T00:10:20.043Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/a9/b29de53e42f18e8cc547e38daa9dfa132ffdc64f7250e353f5c8cdd44bee/mypy-1.18.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d6c838e831a062f5f29d11c9057c6009f60cb294fea33a98422688181fe2893", size = 12465585, upload-time = "2025-09-19T00:10:33.005Z" },
-    { url = "https://files.pythonhosted.org/packages/77/ae/6c3d2c7c61ff21f2bee938c917616c92ebf852f015fb55917fd6e2811db2/mypy-1.18.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01199871b6110a2ce984bde85acd481232d17413868c9807e95c1b0739a58914", size = 13348562, upload-time = "2025-09-19T00:10:11.51Z" },
-    { url = "https://files.pythonhosted.org/packages/4d/31/aec68ab3b4aebdf8f36d191b0685d99faa899ab990753ca0fee60fb99511/mypy-1.18.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a2afc0fa0b0e91b4599ddfe0f91e2c26c2b5a5ab263737e998d6817874c5f7c8", size = 13533296, upload-time = "2025-09-19T00:10:06.568Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/83/abcb3ad9478fca3ebeb6a5358bb0b22c95ea42b43b7789c7fb1297ca44f4/mypy-1.18.2-cp312-cp312-win_amd64.whl", hash = "sha256:d8068d0afe682c7c4897c0f7ce84ea77f6de953262b12d07038f4d296d547074", size = 9828828, upload-time = "2025-09-19T00:10:28.203Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/04/7f462e6fbba87a72bc8097b93f6842499c428a6ff0c81dd46948d175afe8/mypy-1.18.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:07b8b0f580ca6d289e69209ec9d3911b4a26e5abfde32228a288eb79df129fcc", size = 12898728, upload-time = "2025-09-19T00:10:01.33Z" },
-    { url = "https://files.pythonhosted.org/packages/99/5b/61ed4efb64f1871b41fd0b82d29a64640f3516078f6c7905b68ab1ad8b13/mypy-1.18.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ed4482847168439651d3feee5833ccedbf6657e964572706a2adb1f7fa4dfe2e", size = 11910758, upload-time = "2025-09-19T00:10:42.607Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/46/d297d4b683cc89a6e4108c4250a6a6b717f5fa96e1a30a7944a6da44da35/mypy-1.18.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c3ad2afadd1e9fea5cf99a45a822346971ede8685cc581ed9cd4d42eaf940986", size = 12475342, upload-time = "2025-09-19T00:11:00.371Z" },
-    { url = "https://files.pythonhosted.org/packages/83/45/4798f4d00df13eae3bfdf726c9244bcb495ab5bd588c0eed93a2f2dd67f3/mypy-1.18.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a431a6f1ef14cf8c144c6b14793a23ec4eae3db28277c358136e79d7d062f62d", size = 13338709, upload-time = "2025-09-19T00:11:03.358Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/09/479f7358d9625172521a87a9271ddd2441e1dab16a09708f056e97007207/mypy-1.18.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7ab28cc197f1dd77a67e1c6f35cd1f8e8b73ed2217e4fc005f9e6a504e46e7ba", size = 13529806, upload-time = "2025-09-19T00:10:26.073Z" },
-    { url = "https://files.pythonhosted.org/packages/71/cf/ac0f2c7e9d0ea3c75cd99dff7aec1c9df4a1376537cb90e4c882267ee7e9/mypy-1.18.2-cp313-cp313-win_amd64.whl", hash = "sha256:0e2785a84b34a72ba55fb5daf079a1003a34c05b22238da94fcae2bbe46f3544", size = 9833262, upload-time = "2025-09-19T00:10:40.035Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/0c/7d5300883da16f0063ae53996358758b2a2df2a09c72a5061fa79a1f5006/mypy-1.18.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:62f0e1e988ad41c2a110edde6c398383a889d95b36b3e60bcf155f5164c4fdce", size = 12893775, upload-time = "2025-09-19T00:10:03.814Z" },
-    { url = "https://files.pythonhosted.org/packages/50/df/2cffbf25737bdb236f60c973edf62e3e7b4ee1c25b6878629e88e2cde967/mypy-1.18.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8795a039bab805ff0c1dfdb8cd3344642c2b99b8e439d057aba30850b8d3423d", size = 11936852, upload-time = "2025-09-19T00:10:51.631Z" },
-    { url = "https://files.pythonhosted.org/packages/be/50/34059de13dd269227fb4a03be1faee6e2a4b04a2051c82ac0a0b5a773c9a/mypy-1.18.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6ca1e64b24a700ab5ce10133f7ccd956a04715463d30498e64ea8715236f9c9c", size = 12480242, upload-time = "2025-09-19T00:11:07.955Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/11/040983fad5132d85914c874a2836252bbc57832065548885b5bb5b0d4359/mypy-1.18.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d924eef3795cc89fecf6bedc6ed32b33ac13e8321344f6ddbf8ee89f706c05cb", size = 13326683, upload-time = "2025-09-19T00:09:55.572Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/ba/89b2901dd77414dd7a8c8729985832a5735053be15b744c18e4586e506ef/mypy-1.18.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:20c02215a080e3a2be3aa50506c67242df1c151eaba0dcbc1e4e557922a26075", size = 13514749, upload-time = "2025-09-19T00:10:44.827Z" },
-    { url = "https://files.pythonhosted.org/packages/25/bc/cc98767cffd6b2928ba680f3e5bc969c4152bf7c2d83f92f5a504b92b0eb/mypy-1.18.2-cp314-cp314-win_amd64.whl", hash = "sha256:749b5f83198f1ca64345603118a6f01a4e99ad4bf9d103ddc5a3200cc4614adf", size = 9982959, upload-time = "2025-09-19T00:10:37.344Z" },
-    { url = "https://files.pythonhosted.org/packages/87/e3/be76d87158ebafa0309946c4a73831974d4d6ab4f4ef40c3b53a385a66fd/mypy-1.18.2-py3-none-any.whl", hash = "sha256:22a1748707dd62b58d2ae53562ffc4d7f8bcc727e8ac7cbc69c053ddc874d47e", size = 2352367, upload-time = "2025-09-19T00:10:15.489Z" },
-]
-
-[[package]]
-name = "mypy-extensions"
-version = "1.1.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/1d/1b658dbd2b9fa9c4c9f32accbfc0205d532c8c6194dc0f2a4c0428e7128a/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9", size = 22314, upload-time = "2024-06-04T18:44:08.352Z" },
 ]
 
 [[package]]
@@ -672,15 +634,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
 ]
 
-[[package]]
-name = "pathspec"
-version = "0.12.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043, upload-time = "2023-12-10T22:30:45Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" },
-]
-
 [[package]]
 name = "pefile"
 version = "2023.2.7"
@@ -703,8 +656,8 @@ dependencies = [
 
 [package.dev-dependencies]
 dev = [
-    { name = "mypy" },
     { name = "pyinstaller" },
+    { name = "pyright" },
     { name = "ruff" },
 ]
 
@@ -719,8 +672,8 @@ requires-dist = [
 
 [package.metadata.requires-dev]
 dev = [
-    { name = "mypy", specifier = ">=1.18.2" },
     { name = "pyinstaller", specifier = ">=6.16.0" },
+    { name = "pyright", specifier = ">=1.1.406" },
     { name = "ruff", specifier = ">=0.14.1" },
 ]
 
@@ -882,6 +835,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a2/26/23b4cfc77d7f808c69f59070e1e8293a579ec281a547c61562357160b346/pyinstaller_hooks_contrib-2025.9-py3-none-any.whl", hash = "sha256:ccbfaa49399ef6b18486a165810155e5a8d4c59b41f20dc5da81af7482aaf038", size = 444283, upload-time = "2025-09-24T11:21:33.67Z" },
 ]
 
+[[package]]
+name = "pyright"
+version = "1.1.406"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nodeenv" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f7/16/6b4fbdd1fef59a0292cbb99f790b44983e390321eccbc5921b4d161da5d1/pyright-1.1.406.tar.gz", hash = "sha256:c4872bc58c9643dac09e8a2e74d472c62036910b3bd37a32813989ef7576ea2c", size = 4113151, upload-time = "2025-10-02T01:04:45.488Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f6/a2/e309afbb459f50507103793aaef85ca4348b66814c86bc73908bdeb66d12/pyright-1.1.406-py3-none-any.whl", hash = "sha256:1d81fb43c2407bf566e97e57abb01c811973fdb21b2df8df59f870f688bdca71", size = 5980982, upload-time = "2025-10-02T01:04:43.137Z" },
+]
+
 [[package]]
 name = "pywin32-ctypes"
 version = "0.2.3"

From 1e56976b9f58a6a0dcae56ed16e99c8fbc2f4644 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 19 Oct 2025 16:31:16 +0200
Subject: [PATCH 519/524] Update nix flake.lock

---
 flake.lock | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/flake.lock b/flake.lock
index 8d211fc..ae603f1 100644
--- a/flake.lock
+++ b/flake.lock
@@ -2,11 +2,11 @@
   "nodes": {
     "nixpkgs": {
       "locked": {
-        "lastModified": 1751211869,
-        "narHash": "sha256-1Cu92i1KSPbhPCKxoiVG5qnoRiKTgR5CcGSRyLpOd7Y=",
+        "lastModified": 1760725957,
+        "narHash": "sha256-tdoIhL/NlER290HfSjOkgi4jfmjeqmqrzgnmiMtGepE=",
         "owner": "NixOS",
         "repo": "nixpkgs",
-        "rev": "b43c397f6c213918d6cfe6e3550abfe79b5d1c51",
+        "rev": "81b927b14b7b3988334d5282ef9cba802e193fe1",
         "type": "github"
       },
       "original": {

From 6353571eb4c1812fd3f4a06fb3d5812b42676095 Mon Sep 17 00:00:00 2001
From: randomNumber101 <m.khal@outlook.de>
Date: Sat, 18 Oct 2025 17:46:37 +0200
Subject: [PATCH 520/524] Added Ilias configuration for HHU Duesseldorf to
 Congig file

Closes #125
---
 CONFIG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CONFIG.md b/CONFIG.md
index 201ddde..4bf082f 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -166,6 +166,7 @@ out of the box for the corresponding universities:
 | University      | `base_url`                              | `login_type` | `client_id`   |
 |-----------------|-----------------------------------------|--------------|---------------|
 | FH Aachen       | https://www.ili.fh-aachen.de            | local        | elearning     |
+| HHU Düsseldorf  | https://ilias.hhu.de                    | local        | UniRZ         |
 | Uni Köln        | https://www.ilias.uni-koeln.de/ilias    | local        | uk            |
 | Uni Konstanz    | https://ilias.uni-konstanz.de           | local        | ILIASKONSTANZ |
 | Uni Stuttgart   | https://ilias3.uni-stuttgart.de         | local        | Uni_Stuttgart |

From bd7b384e8f25674755c8235158117fd43a30e60f Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 27 Oct 2025 12:43:51 +0100
Subject: [PATCH 521/524] Manually set event loop on windows

The behaviour of get_event_loop changed in 3.14 and no longer creates
one. Instead, it will crash.
---
 CHANGELOG.md      | 3 +++
 PFERD/__main__.py | 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7da225b..4fef0e1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,9 @@ ambiguous situations.
 ## Added
 - Store the description when using the `internet-shortcut` link format
 
+## Fixed
+- Event loop errors on Windows with Python 3.14
+
 ## 3.8.3 - 2025-07-01
 
 ## Added
diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index cb8c67c..2de9dbc 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -133,7 +133,8 @@ def main() -> None:
             # https://bugs.python.org/issue39232
             # https://github.com/encode/httpx/issues/914#issuecomment-780023632
             # TODO Fix this properly
-            loop = asyncio.get_event_loop()
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
             loop.run_until_complete(pferd.run(args.debug_transforms))
             loop.run_until_complete(asyncio.sleep(1))
             loop.close()

From 3453bbc99135f2c7af236f82c40f304ad1ab6148 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 29 Oct 2025 13:02:18 +0100
Subject: [PATCH 522/524] Add basic auth to KIT-IPD crawler

---
 CHANGELOG.md                   |  1 +
 CONFIG.md                      |  1 +
 PFERD/cli/command_kit_ipd.py   | 11 +++++++++++
 PFERD/crawl/__init__.py        |  2 +-
 PFERD/crawl/kit_ipd_crawler.py | 27 +++++++++++++++++++++++++--
 5 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4fef0e1..729299e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ ambiguous situations.
 
 ## Added
 - Store the description when using the `internet-shortcut` link format
+- Support for basic auth with the kit-ipd crawler
 
 ## Fixed
 - Event loop errors on Windows with Python 3.14
diff --git a/CONFIG.md b/CONFIG.md
index 4bf082f..b87f75c 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -153,6 +153,7 @@ requests is likely a good idea.
 - `link_regex`: A regex that is matched against the `href` part of links. If it
   matches, the given link is downloaded as a file. This is used to extract
   files from KIT-IPD pages. (Default: `^.*?[^/]+\.(pdf|zip|c|cpp|java)$`)
+- `auth`: Name of auth section to use for basic authentication. (Optional)
 
 ### The `ilias-web` crawler
 
diff --git a/PFERD/cli/command_kit_ipd.py b/PFERD/cli/command_kit_ipd.py
index 589d9a3..a80af03 100644
--- a/PFERD/cli/command_kit_ipd.py
+++ b/PFERD/cli/command_kit_ipd.py
@@ -20,6 +20,11 @@ GROUP.add_argument(
     metavar="REGEX",
     help="href-matching regex to identify downloadable files",
 )
+GROUP.add_argument(
+    "--basic-auth",
+    action="store_true",
+    help="enable basic authentication",
+)
 GROUP.add_argument(
     "target",
     type=str,
@@ -50,5 +55,11 @@ def load(
     if args.link_regex:
         section["link_regex"] = str(args.link_regex)
 
+    if args.basic_auth:
+        section["auth"] = "auth:kit-ipd"
+        parser["auth:kit-ipd"] = {}
+        auth_section = parser["auth:kit-ipd"]
+        auth_section["type"] = "simple"
+
 
 SUBPARSER.set_defaults(command=load)
diff --git a/PFERD/crawl/__init__.py b/PFERD/crawl/__init__.py
index 6032c97..9ba6a37 100644
--- a/PFERD/crawl/__init__.py
+++ b/PFERD/crawl/__init__.py
@@ -22,5 +22,5 @@ CRAWLERS: dict[str, CrawlerConstructor] = {
     "local": lambda n, s, c, a: LocalCrawler(n, LocalCrawlerSection(s), c),
     "ilias-web": lambda n, s, c, a: IliasWebCrawler(n, IliasWebCrawlerSection(s), c, a),
     "kit-ilias-web": lambda n, s, c, a: KitIliasWebCrawler(n, KitIliasWebCrawlerSection(s), c, a),
-    "kit-ipd": lambda n, s, c, a: KitIpdCrawler(n, KitIpdCrawlerSection(s), c),
+    "kit-ipd": lambda n, s, c, a: KitIpdCrawler(n, KitIpdCrawlerSection(s), c, a),
 }
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index 165a661..4dad8f0 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -8,8 +8,10 @@ from re import Pattern
 from typing import Any, Optional, Union, cast
 from urllib.parse import urljoin
 
+import aiohttp
 from bs4 import BeautifulSoup, Tag
 
+from ..auth import Authenticator
 from ..config import Config
 from ..logging import ProgressBar, log
 from ..output_dir import FileSink
@@ -33,6 +35,15 @@ class KitIpdCrawlerSection(HttpCrawlerSection):
         regex = self.s.get("link_regex", r"^.*?[^/]+\.(pdf|zip|c|cpp|java)$")
         return re.compile(regex)
 
+    def basic_auth(self, authenticators: dict[str, Authenticator]) -> Optional[Authenticator]:
+        value: Optional[str] = self.s.get("auth")
+        if value is None:
+            return None
+        auth = authenticators.get(value)
+        if auth is None:
+            self.invalid_value("auth", value, "No such auth section exists")
+        return auth
+
 
 @dataclass
 class KitIpdFile:
@@ -60,12 +71,19 @@ class KitIpdCrawler(HttpCrawler):
         name: str,
         section: KitIpdCrawlerSection,
         config: Config,
+        authenticators: dict[str, Authenticator],
     ):
         super().__init__(name, section, config)
         self._url = section.target()
         self._file_regex = section.link_regex()
+        self._authenticator = section.basic_auth(authenticators)
+        self._basic_auth: Optional[aiohttp.BasicAuth] = None
 
     async def _run(self) -> None:
+        if self._authenticator:
+            username, password = await self._authenticator.credentials()
+            self._basic_auth = aiohttp.BasicAuth(username, password)
+
         maybe_cl = await self.crawl(PurePath("."))
         if not maybe_cl:
             return
@@ -160,9 +178,14 @@ class KitIpdCrawler(HttpCrawler):
         return urljoin(url, cast(str, link_tag.get("href")))
 
     async def _stream_from_url(self, url: str, path: PurePath, sink: FileSink, bar: ProgressBar) -> None:
-        async with self.session.get(url, allow_redirects=False) as resp:
+        async with self.session.get(url, allow_redirects=False, auth=self._basic_auth) as resp:
             if resp.status == 403:
                 raise CrawlError("Received a 403. Are you within the KIT network/VPN?")
+            if resp.status == 401:
+                raise CrawlError("Received a 401. Do you maybe need credentials?")
+            if resp.status >= 400:
+                raise CrawlError(f"Received HTTP {resp.status} when trying to download {url!r}")
+
             if resp.content_length:
                 bar.set_total(resp.content_length)
 
@@ -175,7 +198,7 @@ class KitIpdCrawler(HttpCrawler):
             self._add_etag_to_report(path, resp.headers.get("ETag"))
 
     async def get_page(self) -> tuple[BeautifulSoup, str]:
-        async with self.session.get(self._url) as request:
+        async with self.session.get(self._url, auth=self._basic_auth) as request:
             # The web page for Algorithmen für Routenplanung contains some
             # weird comments that beautifulsoup doesn't parse correctly. This
             # hack enables those pages to be crawled, and should hopefully not

From 3f5637366e3c33af864663e559f4051ccfb5eb16 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 30 Oct 2025 20:19:30 +0100
Subject: [PATCH 523/524] Sanitize `/` in kit-ipd heading hierarchy

---
 CHANGELOG.md                           |  1 +
 PFERD/crawl/http_crawler.py            |  4 +--
 PFERD/crawl/ilias/ilias_web_crawler.py | 13 ++++-----
 PFERD/crawl/ilias/kit_ilias_html.py    | 38 ++++++++++++--------------
 PFERD/crawl/kit_ipd_crawler.py         |  6 ++--
 PFERD/utils.py                         |  4 +++
 6 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 729299e..e80f345 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -28,6 +28,7 @@ ambiguous situations.
 
 ## Fixed
 - Event loop errors on Windows with Python 3.14
+- Sanitize `/` in headings in kit-ipd crawler
 
 ## 3.8.3 - 2025-07-01
 
diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index 70ec5c1..49d6013 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -13,7 +13,7 @@ from bs4 import Tag
 from ..auth import Authenticator
 from ..config import Config
 from ..logging import log
-from ..utils import fmt_real_path
+from ..utils import fmt_real_path, sanitize_path_name
 from ..version import NAME, VERSION
 from .crawler import Crawler, CrawlerSection
 
@@ -192,7 +192,7 @@ class HttpCrawler(Crawler):
             if level_heading is None:
                 return find_associated_headings(tag, level - 1)
 
-            folder_name = level_heading.get_text().strip()
+            folder_name = sanitize_path_name(level_heading.get_text().strip())
             return find_associated_headings(level_heading, level - 1) / folder_name
 
         # start at level <h3> because paragraph-level headings are usually too granular for folder names
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 12d8700..fda9f6d 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -15,7 +15,7 @@ from ...auth import Authenticator
 from ...config import Config
 from ...logging import ProgressBar, log
 from ...output_dir import FileSink, Redownload
-from ...utils import fmt_path, soupify, url_set_query_param
+from ...utils import fmt_path, sanitize_path_name, soupify, url_set_query_param
 from ..crawler import CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
 from ..http_crawler import HttpCrawler, HttpCrawlerSection
 from .async_helper import _iorepeat
@@ -28,7 +28,6 @@ from .kit_ilias_html import (
     IliasPage,
     IliasPageElement,
     IliasSoup,
-    _sanitize_path_name,
     parse_ilias_forum_export,
 )
 from .shibboleth_login import ShibbolethLogin
@@ -505,7 +504,7 @@ instance's greatest bottleneck.
 
         async def download_all() -> None:
             for link in links:
-                path = cl.path / (_sanitize_path_name(link.name) + extension)
+                path = cl.path / (sanitize_path_name(link.name) + extension)
                 if dl := await self.download(path, mtime=element.mtime):
                     await self._download_link(self._links, element.name, [link], dl)
 
@@ -843,7 +842,7 @@ instance's greatest bottleneck.
     async def _download_forum_thread(
         self, parent_path: PurePath, thread: IliasForumThread | IliasPageElement, forum_url: str
     ) -> None:
-        path = parent_path / (_sanitize_path_name(thread.name) + ".html")
+        path = parent_path / (sanitize_path_name(thread.name) + ".html")
         maybe_dl = await self.download(path, mtime=thread.mtime)
         if not maybe_dl or not isinstance(thread, IliasForumThread):
             return
@@ -936,7 +935,7 @@ instance's greatest bottleneck.
         prev: Optional[str],
         next: Optional[str],
     ) -> None:
-        path = parent_path / (_sanitize_path_name(element.title) + ".html")
+        path = parent_path / (sanitize_path_name(element.title) + ".html")
         maybe_dl = await self.download(path)
         if not maybe_dl:
             return
@@ -945,10 +944,10 @@ instance's greatest bottleneck.
             return
 
         if prev:
-            prev_p = self._transformer.transform(parent_path / (_sanitize_path_name(prev) + ".html"))
+            prev_p = self._transformer.transform(parent_path / (sanitize_path_name(prev) + ".html"))
             prev = os.path.relpath(prev_p, my_path.parent) if prev_p else None
         if next:
-            next_p = self._transformer.transform(parent_path / (_sanitize_path_name(next) + ".html"))
+            next_p = self._transformer.transform(parent_path / (sanitize_path_name(next) + ".html"))
             next = os.path.relpath(next_p, my_path.parent) if next_p else None
 
         async with maybe_dl as (bar, sink):
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index db965b0..e23469c 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -12,7 +12,7 @@ from bs4 import BeautifulSoup, Tag
 from PFERD.crawl import CrawlError
 from PFERD.crawl.crawler import CrawlWarning
 from PFERD.logging import log
-from PFERD.utils import url_set_query_params
+from PFERD.utils import sanitize_path_name, url_set_query_params
 
 TargetType = str | int
 
@@ -297,7 +297,7 @@ class IliasPageElement:
             name = normalized
 
         if not skip_sanitize:
-            name = _sanitize_path_name(name)
+            name = sanitize_path_name(name)
 
         return IliasPageElement(typ, url, name, mtime, description)
 
@@ -695,7 +695,7 @@ class IliasPage:
                 log.explain(f"Skipping offline item: {title.get_text().strip()!r}")
                 continue
 
-            name = _sanitize_path_name(link.text.strip())
+            name = sanitize_path_name(link.text.strip())
             url = self._abs_url_from_link(link)
 
             if "cmd=manage" in url and "cmdClass=ilPDSelectedItemsBlockGUI" in url:
@@ -723,7 +723,7 @@ class IliasPage:
         for link in links:
             url = self._abs_url_from_link(link)
             name = re.sub(r"\([\d,.]+ [MK]B\)", "", link.get_text()).strip().replace("\t", "")
-            name = _sanitize_path_name(name)
+            name = sanitize_path_name(name)
 
             if "file_id" not in url:
                 _unexpected_html_warning()
@@ -745,7 +745,7 @@ class IliasPage:
                 continue
             items.append(
                 IliasPageElement.create_new(
-                    IliasElementType.FILE, self._abs_url_from_link(link), _sanitize_path_name(link.get_text())
+                    IliasElementType.FILE, self._abs_url_from_link(link), sanitize_path_name(link.get_text())
                 )
             )
 
@@ -837,7 +837,7 @@ class IliasPage:
         title = cast(Tag, row.select_one("td.std:nth-child(3)")).get_text().strip()
         title += ".mp4"
 
-        video_name: str = _sanitize_path_name(title)
+        video_name: str = sanitize_path_name(title)
 
         video_url = self._abs_url_from_link(link)
 
@@ -893,7 +893,7 @@ class IliasPage:
                 _unexpected_html_warning()
                 continue
 
-            name = _sanitize_path_name(name_tag.get_text().strip())
+            name = sanitize_path_name(name_tag.get_text().strip())
             log.explain(f"Found exercise detail entry {name!r}")
 
             results.append(
@@ -920,7 +920,7 @@ class IliasPage:
             parent_row: Tag = cast(Tag, link.find_parent("tr"))
             children = cast(list[Tag], parent_row.find_all("td"))
 
-            name = _sanitize_path_name(children[1].get_text().strip())
+            name = sanitize_path_name(children[1].get_text().strip())
             log.explain(f"Found exercise file entry {name!r}")
 
             date = None
@@ -957,7 +957,7 @@ class IliasPage:
             if "ass_id=" not in href or "cmdclass=ilassignmentpresentationgui" not in href.lower():
                 continue
 
-            name = _sanitize_path_name(exercise.get_text().strip())
+            name = sanitize_path_name(exercise.get_text().strip())
             results.append(
                 IliasPageElement.create_new(
                     IliasElementType.EXERCISE, self._abs_url_from_link(exercise), name
@@ -983,12 +983,12 @@ class IliasPage:
         for link in links:
             abs_url = self._abs_url_from_link(link)
             # Make sure parents are sanitized. We do not want accidental parents
-            parents = [_sanitize_path_name(x) for x in IliasPage._find_upwards_folder_hierarchy(link)]
+            parents = [sanitize_path_name(x) for x in IliasPage._find_upwards_folder_hierarchy(link)]
 
             if parents:
-                element_name = "/".join(parents) + "/" + _sanitize_path_name(link.get_text())
+                element_name = "/".join(parents) + "/" + sanitize_path_name(link.get_text())
             else:
-                element_name = _sanitize_path_name(link.get_text())
+                element_name = sanitize_path_name(link.get_text())
 
             element_type = IliasPage._find_type_for_element(
                 element_name, abs_url, lambda: IliasPage._find_icon_for_folder_entry(link)
@@ -1053,7 +1053,7 @@ class IliasPage:
                         IliasPageElement.create_new(
                             typ=IliasElementType.MEDIACAST_VIDEO,
                             url=self._abs_url_from_relative(cast(str, url)),
-                            name=_sanitize_path_name(title),
+                            name=sanitize_path_name(title),
                         )
                     )
 
@@ -1081,7 +1081,7 @@ class IliasPage:
 
             videos.append(
                 IliasPageElement.create_new(
-                    typ=IliasElementType.MOB_VIDEO, url=url, name=_sanitize_path_name(title), mtime=None
+                    typ=IliasElementType.MOB_VIDEO, url=url, name=sanitize_path_name(title), mtime=None
                 )
             )
 
@@ -1192,7 +1192,7 @@ class IliasPage:
             )
             found_titles.append(head_tag.get_text().strip())
 
-        return [_sanitize_path_name(x) for x in reversed(found_titles)]
+        return [sanitize_path_name(x) for x in reversed(found_titles)]
 
     @staticmethod
     def _find_link_description(link: Tag) -> Optional[str]:
@@ -1247,7 +1247,7 @@ class IliasPage:
 
         for title in card_titles:
             url = self._abs_url_from_link(title)
-            name = _sanitize_path_name(title.get_text().strip())
+            name = sanitize_path_name(title.get_text().strip())
             typ = IliasPage._find_type_for_element(name, url, lambda: IliasPage._find_icon_from_card(title))
 
             if not typ:
@@ -1274,7 +1274,7 @@ class IliasPage:
                 log.warn_contd(f"Could not find click handler target for signal {signal} for {button}")
                 continue
             url = self._abs_url_from_relative(open_match.group(1))
-            name = _sanitize_path_name(button.get_text().strip())
+            name = sanitize_path_name(button.get_text().strip())
             typ = IliasPage._find_type_for_element(name, url, lambda: IliasPage._find_icon_from_card(button))
             caption_parent = cast(
                 Tag,
@@ -1532,10 +1532,6 @@ def _tomorrow() -> date:
     return date.today() + timedelta(days=1)
 
 
-def _sanitize_path_name(name: str) -> str:
-    return name.replace("/", "-").replace("\\", "-").strip()
-
-
 def parse_ilias_forum_export(forum_export: BeautifulSoup) -> list[IliasForumThread]:
     elements = []
     for p in forum_export.select("body > p"):
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index 4dad8f0..7094b9c 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -15,7 +15,7 @@ from ..auth import Authenticator
 from ..config import Config
 from ..logging import ProgressBar, log
 from ..output_dir import FileSink
-from ..utils import soupify
+from ..utils import sanitize_path_name, soupify
 from .crawler import CrawlError
 from .http_crawler import HttpCrawler, HttpCrawlerSection
 
@@ -106,7 +106,7 @@ class KitIpdCrawler(HttpCrawler):
         await self.gather(tasks)
 
     async def _crawl_folder(self, parent: PurePath, folder: KitIpdFolder) -> None:
-        path = parent / folder.name
+        path = parent / sanitize_path_name(folder.name)
         if not await self.crawl(path):
             return
 
@@ -125,7 +125,7 @@ class KitIpdCrawler(HttpCrawler):
     async def _download_file(
         self, parent: PurePath, file: KitIpdFile, etag: Optional[str], mtime: Optional[datetime]
     ) -> None:
-        element_path = parent / file.name
+        element_path = parent / sanitize_path_name(file.name)
 
         prev_etag = self._get_previous_etag_from_report(element_path)
         etag_differs = None if prev_etag is None else prev_etag != etag
diff --git a/PFERD/utils.py b/PFERD/utils.py
index 918a9b6..1aa0585 100644
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -106,6 +106,10 @@ def fmt_real_path(path: Path) -> str:
     return repr(str(path.absolute()))
 
 
+def sanitize_path_name(name: str) -> str:
+    return name.replace("/", "-").replace("\\", "-").strip()
+
+
 class ReusableAsyncContextManager(ABC, Generic[T]):
     def __init__(self) -> None:
         self._active = False

From e246053de22c54b42df0885082b687b362ce7678 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 2 Dec 2025 16:33:04 +0100
Subject: [PATCH 524/524] Crawl the info tab of courses again

This got lost in a refactor
---
 CHANGELOG.md                           | 1 +
 PFERD/crawl/ilias/ilias_web_crawler.py | 2 ++
 PFERD/crawl/ilias/kit_ilias_html.py    | 6 ++++--
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e80f345..2a2848c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ ambiguous situations.
 ## Fixed
 - Event loop errors on Windows with Python 3.14
 - Sanitize `/` in headings in kit-ipd crawler
+- Crawl info tab again
 
 ## 3.8.3 - 2025-07-01
 
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index fda9f6d..b5041b3 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -297,6 +297,8 @@ instance's greatest bottleneck.
 
                 page = cast(IliasPage, page)
                 elements.extend(page.get_child_elements())
+                if current_element is None and (info_tab := page.get_info_tab()):
+                    elements.append(info_tab)
                 if description_string := page.get_description():
                     description.append(description_string)
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index e23469c..5966141 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -739,9 +739,10 @@ class IliasPage:
         links: list[Tag] = self._soup.select("a.il_ContainerItemCommand")
 
         for link in links:
-            if "cmdClass=ilobjcoursegui" not in link["href"]:
+            log.explain(f"Found info tab link: {self._abs_url_from_link(link)}")
+            if "cmdclass=ilobjcoursegui" not in cast(str, link["href"]).lower():
                 continue
-            if "cmd=sendfile" not in link["href"]:
+            if "cmd=sendfile" not in cast(str, link["href"]).lower():
                 continue
             items.append(
                 IliasPageElement.create_new(
@@ -749,6 +750,7 @@ class IliasPage:
                 )
             )
 
+        log.explain(f"Found {len(items)} info tab entries {items}")
         return items
 
     def _find_opencast_video_entries(self) -> list[IliasPageElement]: