From 1fc8e9eb7ad99ad8c950c76398aab64b05c7d801 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 1 Jun 2021 10:00:59 +0000
Subject: [PATCH 001/224] Document credential file authenticator config options

---
 CONFIG.md | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index 7826b04..feeade3 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -182,8 +182,11 @@ via the terminal.
 
 ### The `credential-file` authenticator
 
-This authenticator reads a username and a password from a credential file. The
-credential file has exactly two lines (trailing newline optional). The first
+This authenticator reads a username and a password from a credential file.
+
+- `path`: Path to the credential file. (Required)
+
+The credential file has exactly two lines (trailing newline optional). The first
 line starts with `username=` and contains the username, the second line starts
 with `password=` and contains the password. The username and password may
 contain any characters except a line break.

From 31b6311e993439b2bbb087511ca012e140003d9e Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 1 Jun 2021 19:02:55 +0200
Subject: [PATCH 002/224] Remove incorrect tmp file explain message

---
 PFERD/__main__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index 5ae62bb..b274b6b 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -147,7 +147,6 @@ def main() -> None:
         log.unlock()
         log.explain_topic("Interrupted, exiting immediately")
         log.explain("Open files and connections are left for the OS to clean up")
-        log.explain("Temporary files are not cleaned up")
         pferd.print_report()
         # TODO Clean up tmp files
         # And when those files *do* actually get cleaned up properly,

From fc31100a0f6e1933cf084e46898ad20d33d892b9 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 4 Jun 2021 18:02:45 +0200
Subject: [PATCH 003/224] Always use '/' as path separator for regex rules

Previously, regex-matching paths on windows would, in some cases, require four
backslashes ('\\\\') to escape a single path separator. That's just too much.

With this commit, regex transforms now use '/' instead of '\' as path separator,
meaning rules can more easily be shared between platforms (although they are not
guaranteed to be 100% compatible since on Windows, '\' is still recognized as a
path separator).

To make rules more intuitive to write, local relative paths are now also printed
with '/' as path separator on Windows. Since Windows also accepts '/' as path
separator, this change doesn't really affect other rules that parse their sides
as paths.
---
 CHANGELOG.md         | 3 +++
 PFERD/transformer.py | 4 ++--
 PFERD/utils.py       | 8 +++++++-
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 87c1d05..980f96e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Changed
+- Use `/` instead of `\` as path separator for (regex) rules on Windows
+
 ## 3.0.1 - 2021-06-01
 
 ### Added
diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index 83ffde4..ed123eb 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -10,7 +10,7 @@ from pathlib import PurePath
 from typing import Dict, Optional, Sequence, Union
 
 from .logging import log
-from .utils import fmt_path
+from .utils import fmt_path, str_path
 
 
 class Rule(ABC):
@@ -116,7 +116,7 @@ class ReRule(Rule):
         self._right = right
 
     def transform(self, path: PurePath) -> Union[PurePath, bool]:
-        if match := re.fullmatch(self._left, str(path)):
+        if match := re.fullmatch(self._left, str_path(path)):
             if isinstance(self._right, bool):
                 return self._right or path
 
diff --git a/PFERD/utils.py b/PFERD/utils.py
index 397feda..7c7b6f4 100644
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -91,8 +91,14 @@ def url_set_query_params(url: str, params: Dict[str, str]) -> str:
     return result
 
 
+def str_path(path: PurePath) -> str:
+    if not path.parts:
+        return "."
+    return "/".join(path.parts)
+
+
 def fmt_path(path: PurePath) -> str:
-    return repr(str(path))
+    return repr(str_path(path))
 
 
 def fmt_real_path(path: Path) -> str:

From df3ad3d890e0c7e21fbb68305f3c1016f58c2523 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 4 Jun 2021 18:33:02 +0200
Subject: [PATCH 004/224] Add 'skip' option to crawlers

---
 CHANGELOG.md                |  3 +++
 CONFIG.md                   |  3 +++
 PFERD/auth/authenticator.py |  6 +++++-
 PFERD/crawl/__init__.py     |  2 +-
 PFERD/crawl/crawler.py      |  9 +++++++++
 PFERD/pferd.py              | 39 +++++++++++++++++++++++++------------
 6 files changed, 48 insertions(+), 14 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 980f96e..32cbe77 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Added
+- `skip` option for crawlers
+
 ### Changed
 - Use `/` instead of `\` as path separator for (regex) rules on Windows
 
diff --git a/CONFIG.md b/CONFIG.md
index feeade3..2f18be1 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -49,6 +49,9 @@ see the type's [documentation](#crawler-types) below. The following options are
 common to all crawlers:
 
 - `type`: The available types are specified in [this section](#crawler-types).
+- `skip`: Whether the crawler should be skipped during normal execution. The
+  crawler can still be executed manually using the `--crawler` or `-C` flags.
+  (Default: `no`)
 - `output_dir`: The directory the crawler synchronizes files to. A crawler will
   never place any files outside of this directory. (Default: the crawler's name)
 - `redownload`: When to download a file that is already present locally.
diff --git a/PFERD/auth/authenticator.py b/PFERD/auth/authenticator.py
index f588bc4..643a2d5 100644
--- a/PFERD/auth/authenticator.py
+++ b/PFERD/auth/authenticator.py
@@ -13,7 +13,11 @@ class AuthError(Exception):
 
 
 class AuthSection(Section):
-    pass
+    def type(self) -> str:
+        value = self.s.get("type")
+        if value is None:
+            self.missing_value("type")
+        return value
 
 
 class Authenticator(ABC):
diff --git a/PFERD/crawl/__init__.py b/PFERD/crawl/__init__.py
index 297c490..7eb2fb1 100644
--- a/PFERD/crawl/__init__.py
+++ b/PFERD/crawl/__init__.py
@@ -3,7 +3,7 @@ from typing import Callable, Dict
 
 from ..auth import Authenticator
 from ..config import Config
-from .crawler import Crawler, CrawlError  # noqa: F401
+from .crawler import Crawler, CrawlError, CrawlerSection  # noqa: F401
 from .ilias import KitIliasWebCrawler, KitIliasWebCrawlerSection
 from .local_crawler import LocalCrawler, LocalCrawlerSection
 
diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index e990f16..d61783f 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -132,6 +132,15 @@ class DownloadToken(ReusableAsyncContextManager[Tuple[ProgressBar, FileSink]]):
 
 
 class CrawlerSection(Section):
+    def type(self) -> str:
+        value = self.s.get("type")
+        if value is None:
+            self.missing_value("type")
+        return value
+
+    def skip(self) -> bool:
+        return self.s.getboolean("skip", fallback=False)
+
     def output_dir(self, name: str) -> Path:
         # TODO Use removeprefix() after switching to 3.9
         if name.startswith("crawl:"):
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index ac373cf..d98b426 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -3,9 +3,9 @@ from typing import Dict, List, Optional
 
 from rich.markup import escape
 
-from .auth import AUTHENTICATORS, Authenticator, AuthError
+from .auth import AUTHENTICATORS, Authenticator, AuthError, AuthSection
 from .config import Config, ConfigOptionError
-from .crawl import CRAWLERS, Crawler, CrawlError, KitIliasWebCrawler
+from .crawl import CRAWLERS, Crawler, CrawlError, CrawlerSection, KitIliasWebCrawler
 from .logging import log
 from .utils import fmt_path
 
@@ -26,19 +26,22 @@ class Pferd:
         self._authenticators: Dict[str, Authenticator] = {}
         self._crawlers: Dict[str, Crawler] = {}
 
-    def _find_crawlers_to_run(self, config: Config, cli_crawlers: Optional[List[str]]) -> List[str]:
-        log.explain_topic("Deciding which crawlers to run")
-        crawl_sections = [name for name, _ in config.crawl_sections()]
+    def _find_config_crawlers(self, config: Config) -> List[str]:
+        crawl_sections = []
 
-        if cli_crawlers is None:
-            log.explain("No crawlers specified on CLI")
-            log.explain("Running all crawlers specified in config")
-            return crawl_sections
+        for name, section in config.crawl_sections():
+            if CrawlerSection(section).skip():
+                log.explain(f"Skipping {name!r}")
+            else:
+                crawl_sections.append(name)
 
+        return crawl_sections
+
+    def _find_cli_crawlers(self, config: Config, cli_crawlers: List[str]) -> List[str]:
         if len(cli_crawlers) != len(set(cli_crawlers)):
             raise PferdLoadError("Some crawlers were selected multiple times")
 
-        log.explain("Crawlers specified on CLI")
+        crawl_sections = [name for name, _ in config.crawl_sections()]
 
         crawlers_to_run = []  # With crawl: prefix
         unknown_names = []  # Without crawl: prefix
@@ -62,10 +65,22 @@ class Pferd:
 
         return crawlers_to_run
 
+    def _find_crawlers_to_run(self, config: Config, cli_crawlers: Optional[List[str]]) -> List[str]:
+        log.explain_topic("Deciding which crawlers to run")
+
+        if cli_crawlers is None:
+            log.explain("No crawlers specified on CLI")
+            log.explain("Running crawlers specified in config")
+            return self._find_config_crawlers(config)
+        else:
+            log.explain("Crawlers specified on CLI")
+            return self._find_cli_crawlers(config, cli_crawlers)
+
     def _load_authenticators(self) -> None:
         for name, section in self._config.auth_sections():
             log.print(f"[bold bright_cyan]Loading[/] {escape(name)}")
-            auth_type = section.get("type")
+
+            auth_type = AuthSection(section).type()
             authenticator_constructor = AUTHENTICATORS.get(auth_type)
             if authenticator_constructor is None:
                 raise ConfigOptionError(name, "type", f"Unknown authenticator type: {auth_type!r}")
@@ -80,7 +95,7 @@ class Pferd:
         for name, section in self._config.crawl_sections():
             log.print(f"[bold bright_cyan]Loading[/] {escape(name)}")
 
-            crawl_type = section.get("type")
+            crawl_type = CrawlerSection(section).type()
             crawler_constructor = CRAWLERS.get(crawl_type)
             if crawler_constructor is None:
                 raise ConfigOptionError(name, "type", f"Unknown crawler type: {crawl_type!r}")

From 8ab462fb87e8bdfac8bfd6821645dd9f4617e898 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Fri, 4 Jun 2021 19:23:33 +0200
Subject: [PATCH 005/224] Use the exercise label instead of the button name as
 path

---
 CHANGELOG.md                        | 2 ++
 PFERD/crawl/ilias/kit_ilias_html.py | 8 +++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 32cbe77..171a61c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,8 @@ ambiguous situations.
 
 ### Changed
 - Use `/` instead of `\` as path separator for (regex) rules on Windows
+- Use the label to the left for exercises instead of the button name to
+  determine the folder name
 
 ## 3.0.1 - 2021-06-01
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 64491f9..db9a303 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -293,7 +293,13 @@ class IliasPage:
 
             # Add each listing as a new
             for listing in file_listings:
-                file_name = _sanitize_path_name(listing.getText().strip())
+                parent_container: Tag = listing.findParent(
+                    "div", attrs={"class": lambda x: x and "form-group" in x}
+                )
+                label_container: Tag = parent_container.find(
+                    attrs={"class": lambda x: x and "control-label" in x}
+                )
+                file_name = _sanitize_path_name(label_container.getText().strip())
                 url = self._abs_url_from_link(listing)
                 log.explain(f"Found exercise detail {file_name!r} at {url}")
                 results.append(IliasPageElement(

From 61d902d7153f2942e24f92bd9e0a35e39be05563 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 9 Jun 2021 17:42:38 +0200
Subject: [PATCH 006/224] Overhaul transform logic

-re-> arrows now rename their parent directories (like -->) and don't require a
full match (like -exact->). Their old behaviour is available as -exact-re->.

Also, this change adds the ">>" arrow head, which modifies the current path and
continues to the next rule when it matches.
---
 CHANGELOG.md         |   3 +
 PFERD/transformer.py | 540 +++++++++++++++++++++++--------------------
 2 files changed, 298 insertions(+), 245 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 171a61c..ffc6e81 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,8 +24,11 @@ ambiguous situations.
 
 ### Added
 - `skip` option for crawlers
+- Rules with `>>` instead of `>` as arrow head
+- `-exact-re->` arrow (behaves like `-re->` did previously)
 
 ### Changed
+- The `-re->` arrow can now rename directories (like `-->`)
 - Use `/` instead of `\` as path separator for (regex) rules on Windows
 - Use the label to the left for exercises instead of the button name to
   determine the folder name
diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index ed123eb..bf51d6a 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -1,151 +1,159 @@
-# I'm sorry that this code has become a bit dense and unreadable. While
-# reading, it is important to remember what True and False mean. I'd love to
-# have some proper sum-types for the inputs and outputs, they'd make this code
-# a lot easier to understand.
-
 import ast
 import re
 from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from enum import Enum
 from pathlib import PurePath
-from typing import Dict, Optional, Sequence, Union
+from typing import Callable, Dict, List, Optional, Sequence, TypeVar, Union
 
 from .logging import log
 from .utils import fmt_path, str_path
 
 
-class Rule(ABC):
-    @abstractmethod
-    def transform(self, path: PurePath) -> Union[PurePath, bool]:
-        """
-        Try to apply this rule to the path. Returns another path if the rule
-        was successfully applied, True if the rule matched but resulted in an
-        exclamation mark, and False if the rule didn't match at all.
-        """
+class ArrowHead(Enum):
+    NORMAL = 0
+    SEQUENCE = 1
 
+
+class Ignore:
+    pass
+
+
+class Empty:
+    pass
+
+
+RightSide = Union[str, Ignore, Empty]
+
+
+@dataclass
+class Transformed:
+    path: PurePath
+
+
+class Ignored:
+    pass
+
+
+TransformResult = Optional[Union[Transformed, Ignored]]
+
+
+@dataclass
+class Rule:
+    left: str
+    name: str
+    head: ArrowHead
+    right: RightSide
+
+    def right_result(self, path: PurePath) -> Union[str, Transformed, Ignored]:
+        if isinstance(self.right, str):
+            return self.right
+        elif isinstance(self.right, Ignore):
+            return Ignored()
+        elif isinstance(self.right, Empty):
+            return Transformed(path)
+        else:
+            raise RuntimeError(f"Right side has invalid type {type(self.right)}")
+
+
+class Transformation(ABC):
+    def __init__(self, rule: Rule):
+        self.rule = rule
+
+    @abstractmethod
+    def transform(self, path: PurePath) -> TransformResult:
         pass
 
 
-# These rules all use a Union[T, bool] for their right side. They are passed a
-# T if the arrow's right side was a normal string, True if it was an
-# exclamation mark and False if it was missing entirely.
-
-class NormalRule(Rule):
-    def __init__(self, left: PurePath, right: Union[PurePath, bool]):
-
-        self._left = left
-        self._right = right
-
-    def _match_prefix(self, path: PurePath) -> Optional[PurePath]:
-        left_parts = list(reversed(self._left.parts))
-        path_parts = list(reversed(path.parts))
-
-        if len(left_parts) > len(path_parts):
+class ExactTf(Transformation):
+    def transform(self, path: PurePath) -> TransformResult:
+        if path != PurePath(self.rule.left):
             return None
 
-        while left_parts and path_parts:
-            left_part = left_parts.pop()
-            path_part = path_parts.pop()
+        right = self.rule.right_result(path)
+        if not isinstance(right, str):
+            return right
 
-            if left_part != path_part:
-                return None
+        return Transformed(PurePath(right))
 
-        if left_parts:
+
+class ExactReTf(Transformation):
+    def transform(self, path: PurePath) -> TransformResult:
+        match = re.fullmatch(self.rule.left, str_path(path))
+        if not match:
             return None
 
-        path_parts.reverse()
-        return PurePath(*path_parts)
+        right = self.rule.right_result(path)
+        if not isinstance(right, str):
+            return right
 
-    def transform(self, path: PurePath) -> Union[PurePath, bool]:
-        if rest := self._match_prefix(path):
-            if isinstance(self._right, bool):
-                return self._right or path
+        # For some reason, mypy thinks that "groups" has type List[str]. But
+        # since elements of "match.groups()" can be None, mypy is wrong.
+        groups: Sequence[Optional[str]] = [match[0]] + list(match.groups())
+
+        locals_dir: Dict[str, Union[str, int, float]] = {}
+        for i, group in enumerate(groups):
+            if group is None:
+                continue
+
+            locals_dir[f"g{i}"] = group
+
+            try:
+                locals_dir[f"i{i}"] = int(group)
+            except ValueError:
+                pass
+
+            try:
+                locals_dir[f"f{i}"] = float(group)
+            except ValueError:
+                pass
+
+        result = eval(f"f{right!r}", {}, locals_dir)
+        return Transformed(PurePath(result))
+
+
+class RenamingParentsTf(Transformation):
+    def __init__(self, sub_tf: Transformation):
+        super().__init__(sub_tf.rule)
+        self.sub_tf = sub_tf
+
+    def transform(self, path: PurePath) -> TransformResult:
+        for i in range(len(path.parts), -1, -1):
+            parent = PurePath(*path.parts[:i])
+            child = PurePath(*path.parts[i:])
+
+            transformed = self.sub_tf.transform(parent)
+            if not transformed:
+                continue
+            elif isinstance(transformed, Transformed):
+                return Transformed(transformed.path / child)
+            elif isinstance(transformed, Ignored):
+                return transformed
             else:
-                return self._right / rest
+                raise RuntimeError(f"Invalid transform result of type {type(transformed)}: {transformed}")
 
-        return False
+        return None
 
 
-class ExactRule(Rule):
-    def __init__(self, left: PurePath, right: Union[PurePath, bool]):
-        self._left = left
-        self._right = right
+class RenamingPartsTf(Transformation):
+    def __init__(self, sub_tf: Transformation):
+        super().__init__(sub_tf.rule)
+        self.sub_tf = sub_tf
 
-    def transform(self, path: PurePath) -> Union[PurePath, bool]:
-        if path == self._left:
-            if isinstance(self._right, bool):
-                return self._right or path
-            else:
-                return self._right
-
-        return False
-
-
-class NameRule(Rule):
-    def __init__(self, subrule: Rule):
-        self._subrule = subrule
-
-    def transform(self, path: PurePath) -> Union[PurePath, bool]:
-        matched = False
+    def transform(self, path: PurePath) -> TransformResult:
         result = PurePath()
-
         for part in path.parts:
-            part_result = self._subrule.transform(PurePath(part))
-            if isinstance(part_result, PurePath):
-                matched = True
-                result /= part_result
-            elif part_result:
-                # If any subrule call ignores its path segment, the entire path
-                # should be ignored
-                return True
-            else:
-                # The subrule doesn't modify this segment, but maybe other
-                # segments
+            transformed = self.sub_tf.transform(PurePath(part))
+            if not transformed:
                 result /= part
+            elif isinstance(transformed, Transformed):
+                result /= transformed.path
+            elif isinstance(transformed, Ignored):
+                return transformed
+            else:
+                raise RuntimeError(f"Invalid transform result of type {type(transformed)}: {transformed}")
 
-        if matched:
-            return result
-        else:
-            # The subrule has modified no segments, so this name version of it
-            # doesn't match
-            return False
-
-
-class ReRule(Rule):
-    def __init__(self, left: str, right: Union[str, bool]):
-        self._left = left
-        self._right = right
-
-    def transform(self, path: PurePath) -> Union[PurePath, bool]:
-        if match := re.fullmatch(self._left, str_path(path)):
-            if isinstance(self._right, bool):
-                return self._right or path
-
-            vars: Dict[str, Union[str, int, float]] = {}
-
-            # For some reason, mypy thinks that "groups" has type List[str].
-            # But since elements of "match.groups()" can be None, mypy is
-            # wrong.
-            groups: Sequence[Optional[str]] = [match[0]] + list(match.groups())
-            for i, group in enumerate(groups):
-                if group is None:
-                    continue
-
-                vars[f"g{i}"] = group
-
-                try:
-                    vars[f"i{i}"] = int(group)
-                except ValueError:
-                    pass
-
-                try:
-                    vars[f"f{i}"] = float(group)
-                except ValueError:
-                    pass
-
-            result = eval(f"f{self._right!r}", vars)
-            return PurePath(result)
-
-        return False
+        return None
 
 
 class RuleParseError(Exception):
@@ -162,18 +170,15 @@ class RuleParseError(Exception):
         log.error_contd(f"{spaces}^--- {self.reason}")
 
 
+T = TypeVar("T")
+
+
 class Line:
     def __init__(self, line: str, line_nr: int):
         self._line = line
         self._line_nr = line_nr
         self._index = 0
 
-    def get(self) -> Optional[str]:
-        if self._index < len(self._line):
-            return self._line[self._index]
-
-        return None
-
     @property
     def line(self) -> str:
         return self._line
@@ -190,155 +195,192 @@ class Line:
     def index(self, index: int) -> None:
         self._index = index
 
-    def advance(self) -> None:
-        self._index += 1
+    @property
+    def rest(self) -> str:
+        return self.line[self.index:]
 
-    def expect(self, string: str) -> None:
-        for char in string:
-            if self.get() == char:
-                self.advance()
-            else:
-                raise RuleParseError(self, f"Expected {char!r}")
+    def peek(self, amount: int = 1) -> str:
+        return self.rest[:amount]
+
+    def take(self, amount: int = 1) -> str:
+        string = self.peek(amount)
+        self.index += len(string)
+        return string
+
+    def expect(self, string: str) -> str:
+        if self.peek(len(string)) == string:
+            return self.take(len(string))
+        else:
+            raise RuleParseError(self, f"Expected {string!r}")
+
+    def expect_with(self, string: str, value: T) -> T:
+        self.expect(string)
+        return value
+
+    def one_of(self, parsers: List[Callable[[], T]], description: str) -> T:
+        for parser in parsers:
+            index = self.index
+            try:
+                return parser()
+            except RuleParseError:
+                self.index = index
+
+        raise RuleParseError(self, description)
+
+
+# RULE = LEFT SPACE '-' NAME '-' HEAD (SPACE RIGHT)?
+# SPACE = ' '+
+# NAME = '' | 'exact' | 'name' | 're' | 'exact-re' | 'name-re'
+# HEAD = '>' | '>>'
+# LEFT = STR | QUOTED_STR
+# RIGHT = STR | QUOTED_STR | '!'
+
+
+def parse_zero_or_more_spaces(line: Line) -> None:
+    while line.peek() == " ":
+        line.take()
+
+
+def parse_one_or_more_spaces(line: Line) -> None:
+    line.expect(" ")
+    parse_zero_or_more_spaces(line)
+
+
+def parse_str(line: Line) -> str:
+    result = []
+    while c := line.peek():
+        if c == " ":
+            break
+        else:
+            line.take()
+            result.append(c)
+
+    if result:
+        return "".join(result)
+    else:
+        raise RuleParseError(line, "Expected non-space character")
 
 
 QUOTATION_MARKS = {'"', "'"}
 
 
-def parse_string_literal(line: Line) -> str:
+def parse_quoted_str(line: Line) -> str:
     escaped = False
 
     # Points to first character of string literal
     start_index = line.index
 
-    quotation_mark = line.get()
+    quotation_mark = line.peek()
     if quotation_mark not in QUOTATION_MARKS:
-        # This should never happen as long as this function is only called from
-        # parse_string.
-        raise RuleParseError(line, "Invalid quotation mark")
-    line.advance()
+        raise RuleParseError(line, "Expected quotation mark")
+    line.take()
 
-    while c := line.get():
+    while c := line.peek():
         if escaped:
             escaped = False
-            line.advance()
+            line.take()
         elif c == quotation_mark:
-            line.advance()
+            line.take()
             stop_index = line.index
             literal = line.line[start_index:stop_index]
-            return ast.literal_eval(literal)
+            try:
+                return ast.literal_eval(literal)
+            except SyntaxError as e:
+                line.index = start_index
+                raise RuleParseError(line, str(e)) from e
         elif c == "\\":
             escaped = True
-            line.advance()
+            line.take()
         else:
-            line.advance()
+            line.take()
 
     raise RuleParseError(line, "Expected end of string literal")
 
 
-def parse_until_space_or_eol(line: Line) -> str:
-    result = []
-    while c := line.get():
-        if c == " ":
-            break
-        result.append(c)
-        line.advance()
-
-    return "".join(result)
-
-
-def parse_string(line: Line) -> Union[str, bool]:
-    if line.get() in QUOTATION_MARKS:
-        return parse_string_literal(line)
+def parse_left(line: Line) -> str:
+    if line.peek() in QUOTATION_MARKS:
+        return parse_quoted_str(line)
     else:
-        string = parse_until_space_or_eol(line)
+        return parse_str(line)
+
+
+def parse_right(line: Line) -> Union[str, Ignore]:
+    c = line.peek()
+    if c in QUOTATION_MARKS:
+        return parse_quoted_str(line)
+    else:
+        string = parse_str(line)
         if string == "!":
-            return True
+            return Ignore()
         return string
 
 
-def parse_arrow(line: Line) -> str:
-    line.expect("-")
-
-    name = []
-    while True:
-        c = line.get()
-        if not c:
-            raise RuleParseError(line, "Expected rest of arrow")
-        elif c == "-":
-            line.advance()
-            c = line.get()
-            if not c:
-                raise RuleParseError(line, "Expected rest of arrow")
-            elif c == ">":
-                line.advance()
-                break  # End of arrow
-            else:
-                name.append("-")
-                continue
-        else:
-            name.append(c)
-
-        line.advance()
-
-    return "".join(name)
+def parse_arrow_name(line: Line) -> str:
+    return line.one_of([
+        lambda: line.expect("exact-re"),
+        lambda: line.expect("exact"),
+        lambda: line.expect("name-re"),
+        lambda: line.expect("name"),
+        lambda: line.expect("re"),
+        lambda: line.expect(""),
+    ], "Expected arrow name")
 
 
-def parse_whitespace(line: Line) -> None:
-    line.expect(" ")
-    while line.get() == " ":
-        line.advance()
+def parse_arrow_head(line: Line) -> ArrowHead:
+    return line.one_of([
+        lambda: line.expect_with(">>", ArrowHead.SEQUENCE),
+        lambda: line.expect_with(">", ArrowHead.NORMAL),
+    ], "Expected arrow head")
 
 
 def parse_eol(line: Line) -> None:
-    if line.get() is not None:
+    if line.peek():
         raise RuleParseError(line, "Expected end of line")
 
 
 def parse_rule(line: Line) -> Rule:
-    # Parse left side
-    leftindex = line.index
-    left = parse_string(line)
-    if isinstance(left, bool):
-        line.index = leftindex
-        raise RuleParseError(line, "Left side can't be '!'")
-    leftpath = PurePath(left)
+    parse_zero_or_more_spaces(line)
+    left = parse_left(line)
 
-    # Parse arrow
-    parse_whitespace(line)
-    arrowindex = line.index
-    arrowname = parse_arrow(line)
+    parse_one_or_more_spaces(line)
 
-    # Parse right side
-    if line.get():
-        parse_whitespace(line)
-        right = parse_string(line)
+    line.expect("-")
+    name = parse_arrow_name(line)
+    line.expect("-")
+    head = parse_arrow_head(line)
+
+    index = line.index
+    right: RightSide
+    try:
+        parse_zero_or_more_spaces(line)
+        parse_eol(line)
+        right = Empty()
+    except RuleParseError:
+        line.index = index
+        parse_one_or_more_spaces(line)
+        right = parse_right(line)
+        parse_eol(line)
+
+    return Rule(left, name, head, right)
+
+
+def parse_transformation(line: Line) -> Transformation:
+    rule = parse_rule(line)
+
+    if rule.name == "":
+        return RenamingParentsTf(ExactTf(rule))
+    elif rule.name == "exact":
+        return ExactTf(rule)
+    elif rule.name == "name":
+        return RenamingPartsTf(ExactTf(rule))
+    elif rule.name == "re":
+        return RenamingParentsTf(ExactReTf(rule))
+    elif rule.name == "exact-re":
+        return ExactReTf(rule)
+    elif rule.name == "name-re":
+        return RenamingPartsTf(ExactReTf(rule))
     else:
-        right = False
-    rightpath: Union[PurePath, bool]
-    if isinstance(right, bool):
-        rightpath = right
-    else:
-        rightpath = PurePath(right)
-
-    parse_eol(line)
-
-    # Dispatch
-    if arrowname == "":
-        return NormalRule(leftpath, rightpath)
-    elif arrowname == "name":
-        if len(leftpath.parts) > 1:
-            line.index = leftindex
-            raise RuleParseError(line, "SOURCE must be a single name, not multiple segments")
-        return NameRule(ExactRule(leftpath, rightpath))
-    elif arrowname == "exact":
-        return ExactRule(leftpath, rightpath)
-    elif arrowname == "re":
-        return ReRule(left, right)
-    elif arrowname == "name-re":
-        return NameRule(ReRule(left, right))
-    else:
-        line.index = arrowindex + 1  # For nicer error message
-        raise RuleParseError(line, f"Invalid arrow name {arrowname!r}")
+        raise RuntimeError(f"Invalid arrow name {rule.name!r}")
 
 
 class Transformer:
@@ -347,32 +389,40 @@ class Transformer:
         May throw a RuleParseException.
         """
 
-        self._rules = []
+        self._tfs = []
         for i, line in enumerate(rules.split("\n")):
             line = line.strip()
             if line:
-                rule = parse_rule(Line(line, i))
-                self._rules.append((line, rule))
+                tf = parse_transformation(Line(line, i))
+                self._tfs.append((line, tf))
 
     def transform(self, path: PurePath) -> Optional[PurePath]:
-        for i, (line, rule) in enumerate(self._rules):
+        for i, (line, tf) in enumerate(self._tfs):
             log.explain(f"Testing rule {i+1}: {line}")
 
             try:
-                result = rule.transform(path)
+                result = tf.transform(path)
             except Exception as e:
                 log.warn(f"Error while testing rule {i+1}: {line}")
                 log.warn_contd(str(e))
                 continue
 
-            if isinstance(result, PurePath):
-                log.explain(f"Match found, transformed path to {fmt_path(result)}")
-                return result
-            elif result:  # Exclamation mark
-                log.explain("Match found, path ignored")
-                return None
-            else:
+            if not result:
                 continue
 
-        log.explain("No rule matched, path is unchanged")
+            if isinstance(result, Ignored):
+                log.explain("Match found, path ignored")
+                return None
+
+            if tf.rule.head == ArrowHead.NORMAL:
+                log.explain(f"Match found, transformed path to {fmt_path(result.path)}")
+                path = result.path
+                break
+            elif tf.rule.head == ArrowHead.SEQUENCE:
+                log.explain(f"Match found, updated path to {fmt_path(result.path)}")
+                path = result.path
+            else:
+                raise RuntimeError(f"Invalid transform result of type {type(result)}: {result}")
+
+        log.explain(f"Final result: {fmt_path(path)}")
         return path

From f28bbe6b0c11c165ad604b6ab33730a37800604a Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 9 Jun 2021 22:22:40 +0200
Subject: [PATCH 007/224] Update transform rule documentation

It's still missing an example that uses rules with ">>" arrows.
---
 CONFIG.md | 128 +++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 88 insertions(+), 40 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index 2f18be1..1793ddc 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -222,56 +222,87 @@ This authenticator does not support usernames.
 Transformation rules are rules for renaming and excluding files and directories.
 They are specified line-by-line in a crawler's `transform` option. When a
 crawler needs to apply a rule to a path, it goes through this list top-to-bottom
-and choose the first matching rule.
+and applies the first matching rule.
 
 To see this process in action, you can use the `--debug-transforms` or flag or
 the `--explain` flag.
 
-Each line has the format `SOURCE ARROW TARGET` where `TARGET` is optional.
-`SOURCE` is either a normal path without spaces (e. g. `foo/bar`), or a string
-literal delimited by `"` or `'` (e. g. `"foo\" bar/baz"`). Python's string
-escape syntax is supported. Trailing slashes are ignored. `TARGET` can be
-formatted like `SOURCE`, but it can also be a single exclamation mark without
-quotes (`!`). `ARROW` is one of `-->`, `-name->`, `-exact->`, `-re->` and
-`-name-re->`
+Each rule has the format `SOURCE ARROW TARGET` (e. g. `foo/bar --> foo/baz`).
+The arrow specifies how the source and target are interpreted. The different
+kinds of arrows are documented below.
 
-If a rule's target is `!`, this means that when the rule matches on a path, the
-corresponding file or directory is ignored. If a rule's target is missing, the
-path is matched but not modified.
+`SOURCE` and `TARGET` are either a bunch of characters without spaces (e. g.
+`foo/bar`) or string literals (e. g, `"foo/b a r"`). The former syntax has no
+concept of escaping characters, so the backslash is just another character. The
+string literals however support Python's escape syntax (e. g.
+`"foo\\bar\tbaz"`). This also means that in string literals, backslashes must be
+escaped.
+
+`TARGET` can additionally be a single exclamation mark `!` (*not* `"!"`). When a
+rule with a `!` as target matches a path, the corresponding file or directory is
+ignored by the crawler instead of renamed.
+
+`TARGET` can also be omitted entirely. When a rule without target matches a
+path, the path is returned unmodified. This is useful to prevent rules further
+down from matching instead.
+
+Each arrow's behaviour can be modified slightly by changing the arrow's head
+from `>` to `>>`. When a rule with a `>>` arrow head matches a path, it doesn't
+return immediately like a normal arrow. Instead, it replaces the current path
+with its output and continues on to the next rule. In effect, this means that
+multiple rules can be applied sequentially.
 
 ### The `-->` arrow
 
-The `-->` arrow is a basic renaming operation. If a path begins with `SOURCE`,
-that part of the path is replaced with `TARGET`. This means that the rule
-`foo/bar --> baz` would convert `foo/bar` into `baz`, but also `foo/bar/xyz`
-into `baz/xyz`. The rule `foo --> !` would ignore a directory named `foo` as
-well as all its contents.
+The `-->` arrow is a basic renaming operation for files and directories. If a
+path matches `SOURCE`, it is renamed to `TARGET`.
+
+Example: `foo/bar --> baz`
+- Doesn't match `foo`, `a/foo/bar` or `foo/baz`
+- Converts `foo/bar` into `baz`
+- Converts `foo/bar/wargl` into `bar/wargl`
+
+Example: `foo/bar --> !`
+- Doesn't match `foo`, `a/foo/bar` or `foo/baz`
+- Ignores `foo/bar` and any of its children
 
 ### The `-name->` arrow
 
 The `-name->` arrow lets you rename files and directories by their name,
 regardless of where they appear in the file tree. Because of this, its `SOURCE`
 must not contain multiple path segments, only a single name. This restriction
-does not apply to its `TARGET`. The `-name->` arrow is not applied recursively
-to its own output to prevent infinite loops.
+does not apply to its `TARGET`.
 
-For example, the rule `foo -name-> bar/baz` would convert `a/foo` into
-`a/bar/baz` and `a/foo/b/c/foo` into `a/bar/baz/b/c/bar/baz`. The rule `foo
--name-> !` would ignore all directories and files named `foo`.
+Example: `foo -name-> bar/baz`
+- Doesn't match `a/foobar/b` or `x/Foo/y/z`
+- Converts `hello/foo` into `hello/bar/baz`
+- Converts `foo/world` into `bar/baz/world`
+- Converts `a/foo/b/c/foo` into `a/bar/baz/b/c/bar/baz`
+
+Example: `foo -name-> !`
+- Doesn't match `a/foobar/b` or `x/Foo/y/z`
+- Ignores any path containing a segment `foo`
 
 ### The `-exact->` arrow
 
-The `-exact->` arrow requires the path to match `SOURCE` exactly. This means
-that the rule `foo/bar -exact-> baz` would still convert `foo/bar` into `baz`,
-but `foo/bar/xyz` would be unaffected. Also, `foo -exact-> !` would only ignore
-`foo`, but not its contents (if it has any). The examples below show why this is
-useful.
+The `-exact->` arrow requires the path to match `SOURCE` exactly. The examples
+below show why this is useful.
+
+Example: `foo/bar -exact-> baz`
+- Doesn't match `foo`, `a/foo/bar` or `foo/baz`
+- Converts `foo/bar` into `baz`
+- Doesn't match `foo/bar/wargl`
+
+Example: `foo/bar -exact-> !`
+- Doesn't match `foo`, `a/foo/bar` or `foo/baz`
+- Ignores only `foo/bar`, not its children
 
 ### The `-re->` arrow
 
-The `-re->` arrow uses regular expressions. `SOURCE` is a regular expression
-that must match the entire path. If this is the case, then the capturing groups
-are available in `TARGET` for formatting.
+The `-re->` arrow is like the `-->` arrow but with regular expressions. `SOURCE`
+is a regular expression and `TARGET` an f-string based template. If a path
+matches `SOURCE`, the output path is created using `TARGET` as template.
+`SOURCE` is automatically anchored.
 
 `TARGET` uses Python's [format string syntax][3]. The *n*-th capturing group can
 be referred to as `{g<n>}` (e. g. `{g3}`). `{g0}` refers to the original path.
@@ -288,18 +319,36 @@ can use `{i3:05}`.
 PFERD even allows you to write entire expressions inside the curly braces, for
 example `{g2.lower()}` or `{g3.replace(' ', '_')}`.
 
+Example: `f(oo+)/be?ar -re-> B{g1.upper()}H/fear`
+- Doesn't match `a/foo/bar`, `foo/abc/bar`, `afoo/bar` or `foo/bars`
+- Converts `foo/bar` into `BOOH/fear`
+- Converts `fooooo/bear` into `BOOOOOH/fear`
+- Converts `foo/bar/baz` into `BOOH/fear/baz`
+
 [3]: <https://docs.python.org/3/library/string.html#format-string-syntax> "Format String Syntax"
 
 ### The `-name-re->` arrow
 
 The `-name-re>` arrow is like a combination of the `-name->` and `-re->` arrows.
-Instead of the `SOURCE` being the name of a directory or file, it's a regex that
-is matched against the names of directories and files. `TARGET` works like the
-`-re->` arrow's target.
 
-For example, the arrow `(.*)\.jpeg -name-re-> {g1}.jpg` will rename all `.jpeg`
-extensions into `.jpg`. The arrow `\..+ -name-re-> !` will ignore all files and
-directories starting with `.`.
+Example: `(.*)\.jpeg -name-re-> {g1}.jpg`
+- Doesn't match `foo/bar.png`, `baz.JPEG` or `hello,jpeg`
+- Converts `foo/bar.jpeg` into `foo/bar.jpg`
+- Converts `foo.jpeg/bar/baz.jpeg` into `foo.jpg/bar/baz.jpg`
+
+Example: `\..+ -name-re-> !`
+- Doesn't match `.`, `test`, `a.b`
+- Ignores all files and directories starting with `.`.
+
+### The `-exact-re->` arrow
+
+The `-exact-re>` arrow is like a combination of the `-exact->` and `-re->` arrows.
+
+Example: `f(oo+)/be?ar -exactre-> B{g1.upper()}H/fear`
+- Doesn't match `a/foo/bar`, `foo/abc/bar`, `afoo/bar` or `foo/bars`
+- Converts `foo/bar` into `BOOH/fear`
+- Converts `fooooo/bear` into `BOOOOOH/fear`
+- Doesn't match `foo/bar/baz`
 
 ### Example: Tutorials
 
@@ -327,7 +376,7 @@ The second rule is required for many crawlers since they use the rules to decide
 which directories to crawl. If it was missing when the crawler looks at
 `tutorials/`, the third rule would match. This means the crawler would not crawl
 the `tutorials/` directory and thus not discover that `tutorials/tut02/`
-existed.
+exists.
 
 Since the second rule is only relevant for crawling, the `TARGET` is left out.
 
@@ -352,9 +401,9 @@ To do this, you can use the most powerful of arrows: The regex arrow.
 
 Note the escaped backslashes on the `SOURCE` side.
 
-### Example: Crawl a python project
+### Example: Crawl a Python project
 
-You are crawling a python project and want to ignore all hidden files (files
+You are crawling a Python project and want to ignore all hidden files (files
 whose name starts with a `.`), all `__pycache__` directories and all markdown
 files (for some weird reason).
 
@@ -374,8 +423,7 @@ README.md
 ...
 ```
 
-For this task, the name arrows can be used. They are variants of the normal
-arrows that only look at the file name instead of the entire path.
+For this task, the name arrows can be used.
 
 ```
 \..*        -name-re-> !

From bc65ea7ab696bf3f455c49bad4ae4375a75182a8 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 9 Jun 2021 22:35:55 +0200
Subject: [PATCH 008/224] Fix mypy complaining about missing type hints

---
 scripts/setup | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/setup b/scripts/setup
index b48fb1a..f6680bb 100755
--- a/scripts/setup
+++ b/scripts/setup
@@ -12,6 +12,6 @@ pip install --upgrade setuptools
 # Installing PFERD itself
 pip install --editable .
 
-# Installing various tools
-pip install --upgrade mypy flake8 autopep8 isort
-pip install --upgrade pyinstaller
+# Installing tools and type hints
+pip install --upgrade mypy flake8 autopep8 isort pyinstaller
+pip install --upgrade types-chardet types-certifi

From a292c4c437d631d7eae3a0adfd98adbefd52c2eb Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 12 Jun 2021 14:57:29 +0200
Subject: [PATCH 009/224] Add example for ">>" arrow heads

---
 CONFIG.md | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/CONFIG.md b/CONFIG.md
index 1793ddc..f2710e1 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -430,3 +430,14 @@ For this task, the name arrows can be used.
 __pycache__ -name->    !
 .*\.md      -name-re-> !
 ```
+
+### Example: Clean up names
+
+You want to convert all paths into lowercase and replace spaces with underscores
+before applying any rules. This can be achieved using the `>>` arrow heads.
+
+```
+(.*) -re->> "{g1.lower().replace(' ', '_')}"
+
+<other rules go here>
+```

From 601e4b936b320e766c0de18d384a92a5750f72b9 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 12 Jun 2021 15:00:52 +0200
Subject: [PATCH 010/224] Use new arrow logic in README example config

---
 README.md | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index d25e86f..681bdf7 100644
--- a/README.md
+++ b/README.md
@@ -116,17 +116,18 @@ transform =
   Online-Tests --> !
   Vorlesungswerbung --> !
 
+  # Rename folders
+  Lehrbücher --> Vorlesung
+  # Note the ">>" arrow head which lets us apply further rules to files moved to "Übung"
+  Übungsunterlagen -->> Übung
+
   # Move exercises to own folder. Rename them to "Blatt-XX.pdf" to make them sort properly
-  "Übungsunterlagen/(\d+). Übungsblatt.pdf" -re-> Blätter/Blatt-{i1:02}.pdf
+  "Übung/(\d+). Übungsblatt.pdf" -re-> Blätter/Blatt-{i1:02}.pdf
   # Move solutions to own folder. Rename them to "Blatt-XX-Lösung.pdf" to make them sort properly
-  "Übungsunterlagen/(\d+). Übungsblatt.*Musterlösung.pdf" -re-> Blätter/Blatt-{i1:02}-Lösung.pdf
+  "Übung/(\d+). Übungsblatt.*Musterlösung.pdf" -re-> Blätter/Blatt-{i1:02}-Lösung.pdf
 
   # The course has nested folders with the same name - flatten them
-  "Übungsunterlagen/(.+?)/\\1/(.*)" -re-> Übung/{g1}/{g2}
-
-  # Rename remaining folders
-  Übungsunterlagen --> Übung
-  Lehrbücher --> Vorlesung
+  "Übung/(.+?)/\\1" -re-> Übung/{g1}
 
 [crawl:Bar]
 type = kit-ilias-web

From 70b33ecfd9ca3230303cc17f39fd8bc634737e2b Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 13 Jun 2021 15:06:50 +0200
Subject: [PATCH 011/224] Add migration notes to changelog

Also clean up some other formatting for consistency
---
 CHANGELOG.md | 5 +++++
 CONFIG.md    | 6 +++---
 README.md    | 6 +++---
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ffc6e81..d6049d2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,11 @@ ambiguous situations.
 
 ## Unreleased
 
+If your config file doesn't do weird things with transforms, it should continue
+to work. If your `-re->` arrows behave weirdly, try replacing them with
+`-exact-re->` arrows. If you're on Windows, you might need to switch from `\`
+path separators to `/` in your regex rules.
+
 ### Added
 - `skip` option for crawlers
 - Rules with `>>` instead of `>` as arrow head
diff --git a/CONFIG.md b/CONFIG.md
index f2710e1..19afbd2 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -342,7 +342,8 @@ Example: `\..+ -name-re-> !`
 
 ### The `-exact-re->` arrow
 
-The `-exact-re>` arrow is like a combination of the `-exact->` and `-re->` arrows.
+The `-exact-re>` arrow is like a combination of the `-exact->` and `-re->`
+arrows.
 
 Example: `f(oo+)/be?ar -exactre-> B{g1.upper()}H/fear`
 - Doesn't match `a/foo/bar`, `foo/abc/bar`, `afoo/bar` or `foo/bars`
@@ -375,8 +376,7 @@ tutorials --> !
 The second rule is required for many crawlers since they use the rules to decide
 which directories to crawl. If it was missing when the crawler looks at
 `tutorials/`, the third rule would match. This means the crawler would not crawl
-the `tutorials/` directory and thus not discover that `tutorials/tut02/`
-exists.
+the `tutorials/` directory and thus not discover that `tutorials/tut02/` exists.
 
 Since the second rule is only relevant for crawling, the `TARGET` is left out.
 
diff --git a/README.md b/README.md
index 681bdf7..836147f 100644
--- a/README.md
+++ b/README.md
@@ -28,9 +28,9 @@ The use of [venv](https://docs.python.org/3/library/venv.html) is recommended.
 
 ## Basic usage
 
-PFERD can be run directly from the command line with no config file.
-Run `pferd -h` to get an overview of available commands and options.
-Run `pferd <command> -h` to see which options a command has.
+PFERD can be run directly from the command line with no config file. Run `pferd
+-h` to get an overview of available commands and options. Run `pferd <command>
+-h` to see which options a command has.
 
 For example, you can download your personal desktop from the KIT ILIAS like
 this:

From 70ec64a48ba8a56a819dfdbacba974f108d1206e Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 13 Jun 2021 15:39:22 +0200
Subject: [PATCH 012/224] Fix wrong base URL for multi-stage pages

---
 CHANGELOG.md                               | 3 +++
 PFERD/crawl/ilias/kit_ilias_html.py        | 2 +-
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 2 +-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d6049d2..c09f921 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -38,6 +38,9 @@ path separators to `/` in your regex rules.
 - Use the label to the left for exercises instead of the button name to
   determine the folder name
 
+### Fixed
+- Video pagination handling in ILIAS crawler
+
 ## 3.0.1 - 2021-06-01
 
 ### Added
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index db9a303..384f0de 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -480,7 +480,7 @@ class IliasPage:
             return None
 
         if "opencast" in str(img_tag["alt"]).lower():
-            return IliasElementType.VIDEO_FOLDER
+            return IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED
 
         if str(img_tag["src"]).endswith("icon_exc.svg"):
             return IliasElementType.EXERCISE
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 78428e0..6495da9 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -253,7 +253,7 @@ instance's greatest bottleneck.
                     soup = await self._get_page(next_stage_url)
                     log.explain_topic(f"Parsing HTML page for {fmt_path(path)}")
                     log.explain(f"URL: {next_stage_url}")
-                    page = IliasPage(soup, url, parent)
+                    page = IliasPage(soup, next_stage_url, parent)
                     next_stage_url = page.get_next_stage_url()
 
                 elements.extend(page.get_child_elements())

From 57aef262179f72795e30f1c93254a32f084c0e23 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 13 Jun 2021 16:32:22 +0200
Subject: [PATCH 013/224] Fix name arrows

I seem to have (re-)implemented them incorrectly and never tested them.
---
 PFERD/transformer.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index bf51d6a..a37443a 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -142,18 +142,23 @@ class RenamingPartsTf(Transformation):
 
     def transform(self, path: PurePath) -> TransformResult:
         result = PurePath()
+        any_part_matched = False
         for part in path.parts:
             transformed = self.sub_tf.transform(PurePath(part))
             if not transformed:
                 result /= part
             elif isinstance(transformed, Transformed):
                 result /= transformed.path
+                any_part_matched = True
             elif isinstance(transformed, Ignored):
                 return transformed
             else:
                 raise RuntimeError(f"Invalid transform result of type {type(transformed)}: {transformed}")
 
-        return None
+        if any_part_matched:
+            return Transformed(result)
+        else:
+            return None
 
 
 class RuleParseError(Exception):

From 6e4d423c812c52aff95249ad992dc4889d971208 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 13 Jun 2021 16:50:29 +0200
Subject: [PATCH 014/224] Crawl all video stages in one crawl bar

This ensures folders are not renamed, as they are crawled twice
---
 PFERD/crawl/ilias/kit_ilias_html.py        | 6 ++++--
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 9 +++++++--
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 384f0de..41f45e2 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -62,9 +62,11 @@ class IliasPage:
         log.explain("Page is a normal folder, searching for elements")
         return self._find_normal_entries()
 
-    def get_next_stage_url(self) -> Optional[str]:
+    def get_next_stage_element(self) -> Optional[IliasPageElement]:
         if self._is_ilias_opencast_embedding():
-            return self.get_child_elements()[0].url
+            return self.get_child_elements()[0]
+        if self._page_type == IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED:
+            return self._find_video_entries_paginated()[0]
         return None
 
     def _is_video_player(self) -> bool:
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 6495da9..41c301c 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -248,13 +248,18 @@ instance's greatest bottleneck.
             elements.clear()
             async with cl:
                 next_stage_url: Optional[str] = url
+                current_parent = parent
 
                 while next_stage_url:
                     soup = await self._get_page(next_stage_url)
                     log.explain_topic(f"Parsing HTML page for {fmt_path(path)}")
                     log.explain(f"URL: {next_stage_url}")
-                    page = IliasPage(soup, next_stage_url, parent)
-                    next_stage_url = page.get_next_stage_url()
+                    page = IliasPage(soup, next_stage_url, current_parent)
+                    if next_element := page.get_next_stage_element():
+                        current_parent = next_element
+                        next_stage_url = next_element.url
+                    else:
+                        next_stage_url = None
 
                 elements.extend(page.get_child_elements())
 

From 75fde870c2cc4b0f8b87c80cae87e61f9379ddd2 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 13 Jun 2021 17:23:18 +0200
Subject: [PATCH 015/224] Bump version to 3.1.0

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c09f921..427219e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.1.0 - 2021-06-13
+
 If your config file doesn't do weird things with transforms, it should continue
 to work. If your `-re->` arrows behave weirdly, try replacing them with
 `-exact-re->` arrows. If you're on Windows, you might need to switch from `\`
diff --git a/PFERD/version.py b/PFERD/version.py
index 2aae99d..8ce7ae4 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.0.1"
+VERSION = "3.1.0"

From 80eeb8fe97e28437dcce0e148ffba202fde6a156 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 1 Jul 2021 11:01:55 +0200
Subject: [PATCH 016/224] Add --skip option

---
 PFERD/__main__.py   |  2 +-
 PFERD/cli/parser.py |  8 ++++++++
 PFERD/pferd.py      | 24 +++++++++++++++++++-----
 3 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index b274b6b..b665feb 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -116,7 +116,7 @@ def main() -> None:
         sys.exit()
 
     try:
-        pferd = Pferd(config, args.crawler)
+        pferd = Pferd(config, args.crawler, args.skip)
     except PferdLoadError as e:
         log.unlock()
         log.error(str(e))
diff --git a/PFERD/cli/parser.py b/PFERD/cli/parser.py
index f5fb215..e753023 100644
--- a/PFERD/cli/parser.py
+++ b/PFERD/cli/parser.py
@@ -181,6 +181,14 @@ PARSER.add_argument(
     help="only execute a single crawler."
     " Can be specified multiple times to execute multiple crawlers"
 )
+PARSER.add_argument(
+    "--skip", "-S",
+    action="append",
+    type=str,
+    metavar="NAME",
+    help="don't execute this particular crawler."
+    " Can be specified multiple times to skip multiple crawlers"
+)
 PARSER.add_argument(
     "--working-dir",
     type=Path,
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index d98b426..726ed45 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -15,13 +15,13 @@ class PferdLoadError(Exception):
 
 
 class Pferd:
-    def __init__(self, config: Config, cli_crawlers: Optional[List[str]]):
+    def __init__(self, config: Config, cli_crawlers: Optional[List[str]], cli_skips: Optional[List[str]]):
         """
         May throw PferdLoadError.
         """
 
         self._config = config
-        self._crawlers_to_run = self._find_crawlers_to_run(config, cli_crawlers)
+        self._crawlers_to_run = self._find_crawlers_to_run(config, cli_crawlers, cli_skips)
 
         self._authenticators: Dict[str, Authenticator] = {}
         self._crawlers: Dict[str, Crawler] = {}
@@ -65,16 +65,30 @@ class Pferd:
 
         return crawlers_to_run
 
-    def _find_crawlers_to_run(self, config: Config, cli_crawlers: Optional[List[str]]) -> List[str]:
+    def _find_crawlers_to_run(
+            self,
+            config: Config,
+            cli_crawlers: Optional[List[str]],
+            cli_skips: Optional[List[str]],
+    ) -> List[str]:
         log.explain_topic("Deciding which crawlers to run")
 
+        crawlers: List[str]
         if cli_crawlers is None:
             log.explain("No crawlers specified on CLI")
             log.explain("Running crawlers specified in config")
-            return self._find_config_crawlers(config)
+            crawlers = self._find_config_crawlers(config)
         else:
             log.explain("Crawlers specified on CLI")
-            return self._find_cli_crawlers(config, cli_crawlers)
+            crawlers = self._find_cli_crawlers(config, cli_crawlers)
+
+        skips = {f"crawl:{name}" for name in cli_skips} if cli_skips else set()
+        for crawler in crawlers:
+            if crawler in skips:
+                log.explain(f"Skipping crawler {crawler!r}")
+        crawlers = [crawler for crawler in crawlers if crawler not in skips]
+
+        return crawlers
 
     def _load_authenticators(self) -> None:
         for name, section in self._config.auth_sections():

From 9ffd6033575ed0ed603663e60bd00b8adb5b8295 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 1 Jul 2021 11:14:50 +0200
Subject: [PATCH 017/224] Error when using multiple segments with -name->

Previously, PFERD just silently never matched the -name-> arrow. Now, it errors
when loading the config file.
---
 PFERD/transformer.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index a37443a..1a56e27 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -41,9 +41,11 @@ TransformResult = Optional[Union[Transformed, Ignored]]
 @dataclass
 class Rule:
     left: str
+    left_index: int
     name: str
     head: ArrowHead
     right: RightSide
+    right_index: int
 
     def right_result(self, path: PurePath) -> Union[str, Transformed, Ignored]:
         if isinstance(self.right, str):
@@ -345,6 +347,7 @@ def parse_eol(line: Line) -> None:
 
 def parse_rule(line: Line) -> Rule:
     parse_zero_or_more_spaces(line)
+    left_index = line.index
     left = parse_left(line)
 
     parse_one_or_more_spaces(line)
@@ -354,19 +357,19 @@ def parse_rule(line: Line) -> Rule:
     line.expect("-")
     head = parse_arrow_head(line)
 
-    index = line.index
+    right_index = line.index
     right: RightSide
     try:
         parse_zero_or_more_spaces(line)
         parse_eol(line)
         right = Empty()
     except RuleParseError:
-        line.index = index
+        line.index = right_index
         parse_one_or_more_spaces(line)
         right = parse_right(line)
         parse_eol(line)
 
-    return Rule(left, name, head, right)
+    return Rule(left, left_index, name, head, right, right_index)
 
 
 def parse_transformation(line: Line) -> Transformation:
@@ -377,6 +380,9 @@ def parse_transformation(line: Line) -> Transformation:
     elif rule.name == "exact":
         return ExactTf(rule)
     elif rule.name == "name":
+        if len(PurePath(rule.left).parts) > 1:
+            line.index = rule.left_index
+            raise RuleParseError(line, "Expected name, not multiple segments")
         return RenamingPartsTf(ExactTf(rule))
     elif rule.name == "re":
         return RenamingParentsTf(ExactReTf(rule))

From 91200f3684973f40d6409ce38368eceb6e73da0f Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 3 Jul 2021 12:07:18 +0200
Subject: [PATCH 018/224] Fix nondeterministic name deduplication

---
 PFERD/crawl/crawler.py                     |   8 +-
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 145 +++++++++++++--------
 2 files changed, 93 insertions(+), 60 deletions(-)

diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index d61783f..d798bc3 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -56,7 +56,7 @@ def noncritical(f: Wrapped) -> Wrapped:
     return wrapper  # type: ignore
 
 
-AWrapped = TypeVar("AWrapped", bound=Callable[..., Awaitable[None]])
+AWrapped = TypeVar("AWrapped", bound=Callable[..., Awaitable[Optional[Any]]])
 
 
 def anoncritical(f: AWrapped) -> AWrapped:
@@ -72,14 +72,14 @@ def anoncritical(f: AWrapped) -> AWrapped:
     Warning: Must only be applied to member functions of the Crawler class!
     """
 
-    async def wrapper(*args: Any, **kwargs: Any) -> None:
+    async def wrapper(*args: Any, **kwargs: Any) -> Optional[Any]:
         if not (args and isinstance(args[0], Crawler)):
             raise RuntimeError("@anoncritical must only applied to Crawler methods")
 
         crawler = args[0]
 
         try:
-            await f(*args, **kwargs)
+            return await f(*args, **kwargs)
         except (CrawlWarning, OutputDirError, MarkDuplicateError, MarkConflictError) as e:
             log.warn(str(e))
             crawler.error_free = False
@@ -87,6 +87,8 @@ def anoncritical(f: AWrapped) -> AWrapped:
             crawler.error_free = False
             raise
 
+        return None
+
     return wrapper  # type: ignore
 
 
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 41c301c..a61eb4e 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -12,7 +12,7 @@ from ...config import Config
 from ...logging import ProgressBar, log
 from ...output_dir import FileSink, Redownload
 from ...utils import fmt_path, soupify, url_set_query_param
-from ..crawler import CrawlError, CrawlWarning, anoncritical
+from ..crawler import CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
 from ..http_crawler import HttpCrawler, HttpCrawlerSection
 from .file_templates import Links
 from .kit_ilias_html import IliasElementType, IliasPage, IliasPageElement
@@ -81,17 +81,16 @@ _VIDEO_ELEMENTS: Set[IliasElementType] = set([
     IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED,
 ])
 
-AWrapped = TypeVar("AWrapped", bound=Callable[..., Awaitable[None]])
+AWrapped = TypeVar("AWrapped", bound=Callable[..., Awaitable[Optional[Any]]])
 
 
 def _iorepeat(attempts: int, name: str) -> Callable[[AWrapped], AWrapped]:
     def decorator(f: AWrapped) -> AWrapped:
-        async def wrapper(*args: Any, **kwargs: Any) -> None:
+        async def wrapper(*args: Any, **kwargs: Any) -> Optional[Any]:
             last_exception: Optional[BaseException] = None
             for round in range(attempts):
                 try:
-                    await f(*args, **kwargs)
-                    return
+                    return await f(*args, **kwargs)
                 except aiohttp.ContentTypeError:  # invalid content type
                     raise CrawlWarning("ILIAS returned an invalid content type")
                 except aiohttp.TooManyRedirects:
@@ -230,17 +229,33 @@ instance's greatest bottleneck.
 
         # Fill up our task list with the found elements
         await gather_elements()
-        tasks = [self._handle_ilias_element(PurePath("."), element) for element in elements]
+
+        tasks: List[Awaitable[None]] = []
+        for element in elements:
+            if handle := await self._handle_ilias_element(PurePath("."), element):
+                tasks.append(asyncio.create_task(handle))
 
         # And execute them
         await self.gather(tasks)
 
-    async def _handle_ilias_page(self, url: str, parent: IliasPageElement, path: PurePath) -> None:
+    async def _handle_ilias_page(
+        self,
+        url: str,
+        parent: IliasPageElement,
+        path: PurePath,
+    ) -> Optional[Awaitable[None]]:
         maybe_cl = await self.crawl(path)
         if not maybe_cl:
-            return
-        cl = maybe_cl  # Not mypy's fault, but explained here: https://github.com/python/mypy/issues/2608
+            return None
+        return self._crawl_ilias_page(url, parent, path, maybe_cl)
 
+    async def _crawl_ilias_page(
+        self,
+        url: str,
+        parent: IliasPageElement,
+        path: PurePath,
+        cl: CrawlToken,
+    ) -> None:
         elements: List[IliasPageElement] = []
 
         @_iorepeat(3, "crawling folder")
@@ -265,7 +280,11 @@ instance's greatest bottleneck.
 
         # Fill up our task list with the found elements
         await gather_elements()
-        tasks = [self._handle_ilias_element(cl.path, element) for element in elements]
+
+        tasks: List[Awaitable[None]] = []
+        for element in elements:
+            if handle := await self._handle_ilias_element(cl.path, element):
+                tasks.append(asyncio.create_task(handle))
 
         # And execute them
         await self.gather(tasks)
@@ -274,7 +293,11 @@ instance's greatest bottleneck.
     # Shouldn't happen but we also really don't want to let I/O errors bubble up to anoncritical.
     # If that happens we will be terminated as anoncritical doesn't tream them as non-critical.
     @_wrap_io_in_warning("handling ilias element")
-    async def _handle_ilias_element(self, parent_path: PurePath, element: IliasPageElement) -> None:
+    async def _handle_ilias_element(
+        self,
+        parent_path: PurePath,
+        element: IliasPageElement,
+    ) -> Optional[Awaitable[None]]:
         element_path = PurePath(parent_path, element.name)
 
         if element.type in _VIDEO_ELEMENTS:
@@ -282,35 +305,41 @@ instance's greatest bottleneck.
             if not self._videos:
                 log.explain("Video crawling is disabled")
                 log.explain("Answer: no")
-                return
+                return None
             else:
                 log.explain("Video crawling is enabled")
                 log.explain("Answer: yes")
 
         if element.type == IliasElementType.FILE:
-            await self._download_file(element, element_path)
+            return await self._handle_file(element, element_path)
         elif element.type == IliasElementType.FORUM:
             log.explain_topic(f"Decision: Crawl {fmt_path(element_path)}")
             log.explain("Forums are not supported")
             log.explain("Answer: No")
+            return None
         elif element.type == IliasElementType.TEST:
             log.explain_topic(f"Decision: Crawl {fmt_path(element_path)}")
             log.explain("Tests contain no relevant files")
             log.explain("Answer: No")
+            return None
         elif element.type == IliasElementType.LINK:
-            await self._download_link(element, element_path)
+            return await self._handle_link(element, element_path)
         elif element.type == IliasElementType.VIDEO:
-            await self._download_file(element, element_path)
+            return await self._handle_file(element, element_path)
         elif element.type == IliasElementType.VIDEO_PLAYER:
-            await self._download_video(element, element_path)
+            return await self._handle_video(element, element_path)
         elif element.type in _DIRECTORY_PAGES:
-            await self._handle_ilias_page(element.url, element, element_path)
+            return await self._handle_ilias_page(element.url, element, element_path)
         else:
             # This will retry it a few times, failing everytime. It doesn't make any network
             # requests, so that's fine.
             raise CrawlWarning(f"Unknown element type: {element.type!r}")
 
-    async def _download_link(self, element: IliasPageElement, element_path: PurePath) -> None:
+    async def _handle_link(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Awaitable[None]]:
         log.explain_topic(f"Decision: Crawl Link {fmt_path(element_path)}")
         log.explain(f"Links type is {self._links}")
 
@@ -318,32 +347,30 @@ instance's greatest bottleneck.
         link_extension = self._links.extension()
         if not link_template_maybe or not link_extension:
             log.explain("Answer: No")
-            return
+            return None
         else:
             log.explain("Answer: Yes")
-        link_template = link_template_maybe
         element_path = element_path.with_name(element_path.name + link_extension)
 
         maybe_dl = await self.download(element_path, mtime=element.mtime)
         if not maybe_dl:
-            return
-        dl = maybe_dl  # Not mypy's fault, but explained here: https://github.com/python/mypy/issues/2608
+            return None
 
-        @_iorepeat(3, "resolving link")
-        async def impl() -> None:
-            async with dl as (bar, sink):
-                export_url = element.url.replace("cmd=calldirectlink", "cmd=exportHTML")
-                real_url = await self._resolve_link_target(export_url)
+        return self._download_link(element, link_template_maybe, maybe_dl)
 
-                content = link_template
-                content = content.replace("{{link}}", real_url)
-                content = content.replace("{{name}}", element.name)
-                content = content.replace("{{description}}", str(element.description))
-                content = content.replace("{{redirect_delay}}", str(self._link_file_redirect_delay))
-                sink.file.write(content.encode("utf-8"))
-                sink.done()
+    @_iorepeat(3, "resolving link")
+    async def _download_link(self, element: IliasPageElement, link_template: str, dl: DownloadToken) -> None:
+        async with dl as (bar, sink):
+            export_url = element.url.replace("cmd=calldirectlink", "cmd=exportHTML")
+            real_url = await self._resolve_link_target(export_url)
 
-        await impl()
+            content = link_template
+            content = content.replace("{{link}}", real_url)
+            content = content.replace("{{name}}", element.name)
+            content = content.replace("{{description}}", str(element.description))
+            content = content.replace("{{redirect_delay}}", str(self._link_file_redirect_delay))
+            sink.file.write(content.encode("utf-8"))
+            sink.done()
 
     async def _resolve_link_target(self, export_url: str) -> str:
         async with self.session.get(export_url, allow_redirects=False) as resp:
@@ -360,39 +387,43 @@ instance's greatest bottleneck.
 
         raise CrawlError("resolve_link_target failed even after authenticating")
 
-    async def _download_video(self, element: IliasPageElement, element_path: PurePath) -> None:
+    async def _handle_video(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Awaitable[None]]:
         # Videos will NOT be redownloaded - their content doesn't really change and they are chunky
         maybe_dl = await self.download(element_path, mtime=element.mtime, redownload=Redownload.NEVER)
         if not maybe_dl:
-            return
-        dl = maybe_dl  # Not mypy's fault, but explained here: https://github.com/python/mypy/issues/2608
+            return None
 
-        @_iorepeat(3, "downloading video")
-        async def impl() -> None:
-            assert dl  # The function is only reached when dl is not None
-            async with dl as (bar, sink):
-                page = IliasPage(await self._get_page(element.url), element.url, element)
-                real_element = page.get_child_elements()[0]
+        return self._download_video(element, maybe_dl)
 
-                log.explain(f"Streaming video from real url {real_element.url}")
+    @_iorepeat(3, "downloading video")
+    async def _download_video(self, element: IliasPageElement, dl: DownloadToken) -> None:
+        async with dl as (bar, sink):
+            page = IliasPage(await self._get_page(element.url), element.url, element)
+            real_element = page.get_child_elements()[0]
 
-                await self._stream_from_url(real_element.url, sink, bar, is_video=True)
+            log.explain(f"Streaming video from real url {real_element.url}")
 
-        await impl()
+            await self._stream_from_url(real_element.url, sink, bar, is_video=True)
 
-    async def _download_file(self, element: IliasPageElement, element_path: PurePath) -> None:
+    async def _handle_file(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Awaitable[None]]:
         maybe_dl = await self.download(element_path, mtime=element.mtime)
         if not maybe_dl:
-            return
-        dl = maybe_dl  # Not mypy's fault, but explained here: https://github.com/python/mypy/issues/2608
+            return None
+        return self._download_file(element, maybe_dl)
 
-        @_iorepeat(3, "downloading file")
-        async def impl() -> None:
-            assert dl  # The function is only reached when dl is not None
-            async with dl as (bar, sink):
-                await self._stream_from_url(element.url, sink, bar, is_video=False)
-
-        await impl()
+    @_iorepeat(3, "downloading file")
+    async def _download_file(self, element: IliasPageElement, dl: DownloadToken) -> None:
+        assert dl  # The function is only reached when dl is not None
+        async with dl as (bar, sink):
+            await self._stream_from_url(element.url, sink, bar, is_video=False)
 
     async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar, is_video: bool) -> None:
         async def try_stream() -> bool:

From 89be07d4d3562c75f10539c7a51c171933d3de82 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 3 Jul 2021 17:05:48 +0200
Subject: [PATCH 019/224] Use final crawl path in HTML parsing message

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index a61eb4e..83cac32 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -247,13 +247,12 @@ instance's greatest bottleneck.
         maybe_cl = await self.crawl(path)
         if not maybe_cl:
             return None
-        return self._crawl_ilias_page(url, parent, path, maybe_cl)
+        return self._crawl_ilias_page(url, parent, maybe_cl)
 
     async def _crawl_ilias_page(
         self,
         url: str,
         parent: IliasPageElement,
-        path: PurePath,
         cl: CrawlToken,
     ) -> None:
         elements: List[IliasPageElement] = []
@@ -267,7 +266,7 @@ instance's greatest bottleneck.
 
                 while next_stage_url:
                     soup = await self._get_page(next_stage_url)
-                    log.explain_topic(f"Parsing HTML page for {fmt_path(path)}")
+                    log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
                     log.explain(f"URL: {next_stage_url}")
                     page = IliasPage(soup, next_stage_url, current_parent)
                     if next_element := page.get_next_stage_element():

From 8ec3f41251cf69a365c9009400e67d539bb4afc4 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 6 Jul 2021 16:13:23 +0200
Subject: [PATCH 020/224] Crawl ilias booking objects as links

---
 PFERD/crawl/ilias/kit_ilias_html.py        |  4 ++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 58 +++++++++++++++++++---
 2 files changed, 55 insertions(+), 7 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 41f45e2..247002b 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -22,6 +22,7 @@ class IliasElementType(Enum):
     FOLDER = "folder"
     FORUM = "forum"
     LINK = "link"
+    BOOKING = "booking"
     MEETING = "meeting"
     VIDEO = "video"
     VIDEO_PLAYER = "video_player"
@@ -490,6 +491,9 @@ class IliasPage:
         if str(img_tag["src"]).endswith("icon_webr.svg"):
             return IliasElementType.LINK
 
+        if str(img_tag["src"]).endswith("icon_book.svg"):
+            return IliasElementType.BOOKING
+
         if str(img_tag["src"]).endswith("frm.svg"):
             return IliasElementType.FORUM
 
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 83cac32..a0e323b 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -323,6 +323,8 @@ instance's greatest bottleneck.
             return None
         elif element.type == IliasElementType.LINK:
             return await self._handle_link(element, element_path)
+        elif element.type == IliasElementType.BOOKING:
+            return await self._handle_booking(element, element_path)
         elif element.type == IliasElementType.VIDEO:
             return await self._handle_file(element, element_path)
         elif element.type == IliasElementType.VIDEO_PLAYER:
@@ -362,14 +364,56 @@ instance's greatest bottleneck.
         async with dl as (bar, sink):
             export_url = element.url.replace("cmd=calldirectlink", "cmd=exportHTML")
             real_url = await self._resolve_link_target(export_url)
+            self._write_link_content(link_template, real_url, element.name, element.description, sink)
 
-            content = link_template
-            content = content.replace("{{link}}", real_url)
-            content = content.replace("{{name}}", element.name)
-            content = content.replace("{{description}}", str(element.description))
-            content = content.replace("{{redirect_delay}}", str(self._link_file_redirect_delay))
-            sink.file.write(content.encode("utf-8"))
-            sink.done()
+    def _write_link_content(
+        self,
+        link_template: str,
+        url: str,
+        name: str,
+        description: Optional[str],
+        sink: FileSink,
+    ) -> None:
+        content = link_template
+        content = content.replace("{{link}}", url)
+        content = content.replace("{{name}}", name)
+        content = content.replace("{{description}}", str(description))
+        content = content.replace("{{redirect_delay}}", str(self._link_file_redirect_delay))
+        sink.file.write(content.encode("utf-8"))
+        sink.done()
+
+    async def _handle_booking(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Awaitable[None]]:
+        log.explain_topic(f"Decision: Crawl Booking Link {fmt_path(element_path)}")
+        log.explain(f"Links type is {self._links}")
+
+        link_template_maybe = self._links.template()
+        link_extension = self._links.extension()
+        if not link_template_maybe or not link_extension:
+            log.explain("Answer: No")
+            return None
+        else:
+            log.explain("Answer: Yes")
+        element_path = element_path.with_name(element_path.name + link_extension)
+
+        maybe_dl = await self.download(element_path, mtime=element.mtime)
+        if not maybe_dl:
+            return None
+
+        return self._download_booking(element, link_template_maybe, maybe_dl)
+
+    @_iorepeat(3, "resolving booking")
+    async def _download_booking(
+        self,
+        element: IliasPageElement,
+        link_template: str,
+        dl: DownloadToken,
+    ) -> None:
+        async with dl as (bar, sink):
+            self._write_link_content(link_template, element.url, element.name, element.description, sink)
 
     async def _resolve_link_target(self, export_url: str) -> str:
         async with self.session.get(export_url, allow_redirects=False) as resp:

From ee67f9f4725be9f418d66b85bb8a749de8e5d713 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 6 Jul 2021 17:45:12 +0200
Subject: [PATCH 021/224] Sort elements by ILIAS id to ensure deterministic
 ordering

---
 PFERD/crawl/ilias/kit_ilias_html.py        | 11 +++++++++++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py |  4 ++++
 2 files changed, 15 insertions(+)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 247002b..7e91926 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -38,6 +38,17 @@ class IliasPageElement:
     mtime: Optional[datetime] = None
     description: Optional[str] = None
 
+    def id(self) -> str:
+        regexes = [r"eid=(?P<id>[0-9a-z\-]+)", r"file_(?P<id>\d+)", r"ref_id=(?P<id>\d+)"]
+
+        for regex in regexes:
+            if match := re.search(regex, self.url):
+                return match.groupdict()["id"]
+
+        # Fall back to URL
+        log.warn(f"Didn't find identity for {self.name} - {self.url}. Please report this.")
+        return self.url
+
 
 class IliasPage:
 
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index a0e323b..cca6987 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -230,6 +230,8 @@ instance's greatest bottleneck.
         # Fill up our task list with the found elements
         await gather_elements()
 
+        elements.sort(key=lambda e: e.id())
+
         tasks: List[Awaitable[None]] = []
         for element in elements:
             if handle := await self._handle_ilias_element(PurePath("."), element):
@@ -280,6 +282,8 @@ instance's greatest bottleneck.
         # Fill up our task list with the found elements
         await gather_elements()
 
+        elements.sort(key=lambda e: e.id())
+
         tasks: List[Awaitable[None]] = []
         for element in elements:
             if handle := await self._handle_ilias_element(cl.path, element):

From 86f79ff1f137f6f728df08a51b12acb096e00979 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 7 Jul 2021 14:26:20 +0200
Subject: [PATCH 022/224] Update changelog

---
 CHANGELOG.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 427219e..20dd53c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,19 @@ ambiguous situations.
 
 ## Unreleased
 
+### Added
+- `--skip` command line option
+- Support for ILIAS booking objects
+
+### Changed
+- Using multiple path segments on left side of `-name->` now results in an
+  error. This was already forbidden by the documentation but silently accepted
+  by PFERD.
+- More consistent path printing in some `--explain` messages
+
+### Fixed
+- Nondeterministic name deduplication due to ILIAS reordering elements
+
 ## 3.1.0 - 2021-06-13
 
 If your config file doesn't do weird things with transforms, it should continue

From 544d45cbc570080964ab50044301b304343f9a31 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 13 Jul 2021 15:42:11 +0200
Subject: [PATCH 023/224] Catch non-critical exceptions at crawler top level

---
 CHANGELOG.md           | 1 +
 PFERD/crawl/crawler.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 20dd53c..181ef99 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -34,6 +34,7 @@ ambiguous situations.
 
 ### Fixed
 - Nondeterministic name deduplication due to ILIAS reordering elements
+- More exceptions are handled properly
 
 ## 3.1.0 - 2021-06-13
 
diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index d798bc3..c492ee9 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -320,6 +320,7 @@ class Crawler(ABC):
             log.explain("Warnings or errors occurred during this run")
             log.explain("Answer: No")
 
+    @anoncritical
     async def run(self) -> None:
         """
         Start the crawling process. Call this function if you want to use a

From 742632ed8d6cebd10c7e28902afba2fccb108712 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 4 Aug 2021 18:27:26 +0000
Subject: [PATCH 024/224] Bump version to 3.2.0

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 181ef99..1ac3a8d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.2.0 - 2021-08-04
+
 ### Added
 - `--skip` command line option
 - Support for ILIAS booking objects
diff --git a/PFERD/version.py b/PFERD/version.py
index 8ce7ae4..b8efadd 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.1.0"
+VERSION = "3.2.0"

From 66730773977a2602aebd5396efc1c6d8bd7b0ad7 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 21 Oct 2021 12:01:41 +0200
Subject: [PATCH 025/224] Add kit-ipd crawler

---
 CHANGELOG.md                   |   1 +
 CONFIG.md                      |   7 ++
 PFERD/cli/__init__.py          |   1 +
 PFERD/cli/command_kit_ipd.py   |  46 +++++++++++
 PFERD/crawl/__init__.py        |   3 +
 PFERD/crawl/kit_ipd_crawler.py | 138 +++++++++++++++++++++++++++++++++
 6 files changed, 196 insertions(+)
 create mode 100644 PFERD/cli/command_kit_ipd.py
 create mode 100644 PFERD/crawl/kit_ipd_crawler.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1ac3a8d..cca4839 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,7 @@ ambiguous situations.
 ### Added
 - `--skip` command line option
 - Support for ILIAS booking objects
+- A KIT IPD crawler
 
 ### Changed
 - Using multiple path segments on left side of `-name->` now results in an
diff --git a/CONFIG.md b/CONFIG.md
index 19afbd2..06b9246 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -136,6 +136,13 @@ crawler simulate a slower, network-based crawler.
   requests. (Default: `0.0`)
 - `download_speed`: Download speed (in bytes per second) to simulate. (Optional)
 
+### The `kit-ipd` crawler
+
+This crawler crals a KIT ipd page by url. The root page can be crawled from
+outside the KIT network so you will be informed about any new/deleted files,
+but downloading files requires you to be within. Adding a show delay between
+requests is likely a good idea.
+
 ### The `kit-ilias-web` crawler
 
 This crawler crawls the KIT ILIAS instance.
diff --git a/PFERD/cli/__init__.py b/PFERD/cli/__init__.py
index d70ecd9..efa8f00 100644
--- a/PFERD/cli/__init__.py
+++ b/PFERD/cli/__init__.py
@@ -9,4 +9,5 @@
 
 from . import command_local  # noqa: F401 imported but unused
 from . import command_kit_ilias_web  # noqa: F401 imported but unused
+from . import command_kit_ipd  # noqa: F401 imported but unused
 from .parser import PARSER, ParserLoadError, load_default_section  # noqa: F401 imported but unused
diff --git a/PFERD/cli/command_kit_ipd.py b/PFERD/cli/command_kit_ipd.py
new file mode 100644
index 0000000..480cc9b
--- /dev/null
+++ b/PFERD/cli/command_kit_ipd.py
@@ -0,0 +1,46 @@
+import argparse
+import configparser
+from pathlib import Path
+
+from ..logging import log
+from .parser import CRAWLER_PARSER, SUBPARSERS, load_crawler
+
+SUBPARSER = SUBPARSERS.add_parser(
+    "kit-ipd",
+    parents=[CRAWLER_PARSER],
+)
+
+GROUP = SUBPARSER.add_argument_group(
+    title="kit ipd crawler arguments",
+    description="arguments for the 'kit-ipd' crawler",
+)
+GROUP.add_argument(
+    "target",
+    type=str,
+    metavar="TARGET",
+    help="url to crawl"
+)
+GROUP.add_argument(
+    "output",
+    type=Path,
+    metavar="OUTPUT",
+    help="output directory"
+)
+
+
+def load(
+        args: argparse.Namespace,
+        parser: configparser.ConfigParser,
+) -> None:
+    log.explain("Creating config for command 'kit-ipd'")
+
+    parser["crawl:kit-ipd"] = {}
+    section = parser["crawl:ipd"]
+    load_crawler(args, section)
+
+    section["type"] = "kit-ipd"
+    section["target"] = str(args.target)
+    section["output_dir"] = str(args.output)
+
+
+SUBPARSER.set_defaults(command=load)
diff --git a/PFERD/crawl/__init__.py b/PFERD/crawl/__init__.py
index 7eb2fb1..1f8bd59 100644
--- a/PFERD/crawl/__init__.py
+++ b/PFERD/crawl/__init__.py
@@ -5,6 +5,7 @@ from ..auth import Authenticator
 from ..config import Config
 from .crawler import Crawler, CrawlError, CrawlerSection  # noqa: F401
 from .ilias import KitIliasWebCrawler, KitIliasWebCrawlerSection
+from .kit_ipd_crawler import KitIpdCrawler, KitIpdCrawlerSection
 from .local_crawler import LocalCrawler, LocalCrawlerSection
 
 CrawlerConstructor = Callable[[
@@ -19,4 +20,6 @@ CRAWLERS: Dict[str, CrawlerConstructor] = {
         LocalCrawler(n, LocalCrawlerSection(s), c),
     "kit-ilias-web": lambda n, s, c, a:
         KitIliasWebCrawler(n, KitIliasWebCrawlerSection(s), c, a),
+    "kit-ipd": lambda n, s, c, a:
+        KitIpdCrawler(n, KitIpdCrawlerSection(s), c),
 }
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
new file mode 100644
index 0000000..4d4addd
--- /dev/null
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -0,0 +1,138 @@
+import os
+from dataclasses import dataclass
+from pathlib import PurePath
+from typing import List, Set, Union
+from urllib.parse import urljoin
+
+from bs4 import BeautifulSoup, Tag
+
+from ..config import Config
+from ..logging import ProgressBar, log
+from ..output_dir import FileSink
+from ..utils import soupify
+from .crawler import CrawlError
+from .http_crawler import HttpCrawler, HttpCrawlerSection
+
+
+class KitIpdCrawlerSection(HttpCrawlerSection):
+    def target(self) -> str:
+        target = self.s.get("target")
+        if not target:
+            self.missing_value("target")
+
+        if not target.startswith("https://"):
+            self.invalid_value("target", target, "Should be a URL")
+
+        return target
+
+
+@dataclass
+class KitIpdFile:
+    name: str
+    url: str
+
+
+@dataclass
+class KitIpdFolder:
+    name: str
+    files: List[KitIpdFile]
+
+
+class KitIpdCrawler(HttpCrawler):
+
+    def __init__(
+            self,
+            name: str,
+            section: KitIpdCrawlerSection,
+            config: Config,
+    ):
+        super().__init__(name, section, config)
+        self._url = section.target()
+
+    async def _run(self) -> None:
+        maybe_cl = await self.crawl(PurePath("."))
+        if not maybe_cl:
+            return
+
+        folders: List[KitIpdFolder] = []
+
+        async with maybe_cl:
+            folder_tags = await self._fetch_folder_tags()
+            folders = [self._extract_folder(tag) for tag in folder_tags]
+
+        tasks = [self._crawl_folder(folder) for folder in folders]
+
+        await self.gather(tasks)
+
+    async def _crawl_folder(self, folder: KitIpdFolder) -> None:
+        path = PurePath(folder.name)
+        if not await self.crawl(path):
+            return
+
+        tasks = [self._download_file(path, file) for file in folder.files]
+
+        await self.gather(tasks)
+
+    async def _download_file(self, parent: PurePath, file: KitIpdFile) -> None:
+        element_path = parent / file.name
+        maybe_dl = await self.download(element_path)
+        if not maybe_dl:
+            return
+
+        async with maybe_dl as (bar, sink):
+            await self._stream_from_url(file.url, sink, bar)
+
+    async def _fetch_folder_tags(self) -> Set[Tag]:
+        page = await self.get_page()
+        elements: List[Tag] = self._find_file_links(page)
+        folder_tags: Set[Tag] = set()
+
+        for element in elements:
+            enclosing_data: Tag = element.findParent(name="td")
+            label: Tag = enclosing_data.findPreviousSibling(name="td")
+            folder_tags.add(label)
+
+        return folder_tags
+
+    def _extract_folder(self, folder_tag: Tag) -> KitIpdFolder:
+        name = folder_tag.getText().strip()
+        files: List[KitIpdFile] = []
+
+        container: Tag = folder_tag.findNextSibling(name="td")
+        for link in self._find_file_links(container):
+            files.append(self._extract_file(link))
+
+        log.explain_topic(f"Found folder {name!r}")
+        for file in files:
+            log.explain(f"Found file {file.name!r}")
+
+        return KitIpdFolder(name, files)
+
+    def _extract_file(self, link: Tag) -> KitIpdFile:
+        name = link.getText().strip()
+        url = self._abs_url_from_link(link)
+        _, extension = os.path.splitext(url)
+        return KitIpdFile(name + extension, url)
+
+    def _find_file_links(self, tag: Union[Tag, BeautifulSoup]) -> List[Tag]:
+        return tag.findAll(name="a", attrs={"href": lambda x: x and "intern" in x})
+
+    def _abs_url_from_link(self, link_tag: Tag) -> str:
+        return urljoin(self._url, link_tag.get("href"))
+
+    async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar) -> None:
+        async with self.session.get(url, allow_redirects=False) as resp:
+            if resp.status == 403:
+                raise CrawlError("Received a 403. Are you within the KIT network/VPN?")
+            if resp.content_length:
+                bar.set_total(resp.content_length)
+
+            async for data in resp.content.iter_chunked(1024):
+                sink.file.write(data)
+                bar.advance(len(data))
+
+            sink.done()
+
+    async def get_page(self) -> BeautifulSoup:
+        async with self.session.get(self._url) as request:
+            return soupify(await request.read())

From fee12b3d9e8469d37b972f28d84a7d44538744bc Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Mon, 25 Oct 2021 17:44:12 +0000
Subject: [PATCH 026/224] Fix changelog

---
 CHANGELOG.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cca4839..522d96d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,12 +22,14 @@ ambiguous situations.
 
 ## Unreleased
 
+### Added
+- A KIT IPD crawler
+
 ## 3.2.0 - 2021-08-04
 
 ### Added
 - `--skip` command line option
 - Support for ILIAS booking objects
-- A KIT IPD crawler
 
 ### Changed
 - Using multiple path segments on left side of `-name->` now results in an

From 55ea304ff338f249914b95938675a4e9b07d0875 Mon Sep 17 00:00:00 2001
From: lukasprobst <lukas.probst@student.kit.edu>
Date: Mon, 25 Oct 2021 22:32:54 +0200
Subject: [PATCH 027/224] Disable interpolation of ConfigParser

---
 CHANGELOG.md      | 3 +++
 CONFIG.md         | 6 +++---
 LICENSE           | 2 +-
 PFERD/__main__.py | 2 +-
 4 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 522d96d..a90c978 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,9 @@ ambiguous situations.
 ### Added
 - A KIT IPD crawler
 
+### Removed
+- [Interpolation](https://docs.python.org/3/library/configparser.html#interpolation-of-values) in config file
+
 ## 3.2.0 - 2021-08-04
 
 ### Added
diff --git a/CONFIG.md b/CONFIG.md
index 06b9246..4d2ec33 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -4,11 +4,11 @@ A config file consists of sections. A section begins with a `[section]` header,
 which is followed by a list of `key = value` pairs. Comments must be on their
 own line and start with `#`. Multiline values must be indented beyond their key.
 Boolean values can be `yes` or `no`. For more details and some examples on the
-format, see the [configparser documentation][1] ([basic interpolation][2] is
-enabled).
+format, see the [configparser documentation][1] ([interpolation][2] is
+disabled).
 
 [1]: <https://docs.python.org/3/library/configparser.html#supported-ini-file-structure> "Supported INI File Structure"
-[2]: <https://docs.python.org/3/library/configparser.html#configparser.BasicInterpolation> "BasicInterpolation"
+[2]: <https://docs.python.org/3/library/configparser.html#interpolation-of-values> "Interpolation of values"
 
 ## The `DEFAULT` section
 
diff --git a/LICENSE b/LICENSE
index 01f15f5..c096c4a 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-Copyright 2019-2020 Garmelon, I-Al-Istannen, danstooamerican, pavelzw, TheChristophe, Scriptim
+Copyright 2019-2021 Garmelon, I-Al-Istannen, danstooamerican, pavelzw, TheChristophe, Scriptim, thelukasprobst
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index b665feb..bdf5b34 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -15,7 +15,7 @@ from .transformer import RuleParseError
 
 def load_config_parser(args: argparse.Namespace) -> configparser.ConfigParser:
     log.explain_topic("Loading config")
-    parser = configparser.ConfigParser()
+    parser = configparser.ConfigParser(interpolation=None)
 
     if args.command is None:
         log.explain("No CLI command specified, loading config from file")

From ef7d5ea2d3282e71cf0ba82698e409483cc1ad0a Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 30 Oct 2021 18:09:05 +0200
Subject: [PATCH 028/224] Allow storing crawler-specific data in reports

---
 PFERD/report.py | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/PFERD/report.py b/PFERD/report.py
index 919bb35..99a4661 100644
--- a/PFERD/report.py
+++ b/PFERD/report.py
@@ -1,6 +1,6 @@
 import json
 from pathlib import Path, PurePath
-from typing import Any, Dict, List, Set
+from typing import Any, Dict, List, Optional, Set
 
 
 class ReportLoadError(Exception):
@@ -67,6 +67,7 @@ class Report:
         self.deleted_files: Set[PurePath] = set()
         # Files that should have been deleted by the cleanup but weren't
         self.not_deleted_files: Set[PurePath] = set()
+        self.custom: Dict[str, Any] = dict()
 
     @staticmethod
     def _get_list_of_strs(data: Dict[str, Any], key: str) -> List[str]:
@@ -81,6 +82,15 @@ class Report:
 
         return result
 
+    @staticmethod
+    def _get_str_dictionary(data: Dict[str, Any], key: str) -> Dict[str, Any]:
+        result: Dict[str, Any] = data.get(key, {})
+
+        if not isinstance(result, dict):
+            raise ReportLoadError(f"Incorrect format: {key!r} is not a dictionary")
+
+        return result
+
     @classmethod
     def load(cls, path: Path) -> "Report":
         """
@@ -108,6 +118,7 @@ class Report:
             self.delete_file(PurePath(elem))
         for elem in self._get_list_of_strs(data, "not_deleted"):
             self.not_delete_file(PurePath(elem))
+        self.custom = self._get_str_dictionary(data, "custom")
 
         return self
 
@@ -124,6 +135,7 @@ class Report:
             "changed": [str(path) for path in sorted(self.changed_files)],
             "deleted": [str(path) for path in sorted(self.deleted_files)],
             "not_deleted": [str(path) for path in sorted(self.not_deleted_files)],
+            "custom": self.custom
         }
 
         with open(path, "w") as f:
@@ -190,3 +202,15 @@ class Report:
         """
 
         self.not_deleted_files.add(path)
+
+    def add_custom_value(self, key: str, value: Any) -> None:
+        """
+        Adds a custom value under the passed key, overwriting any existing
+        """
+        self.custom[key] = value
+
+    def get_custom_value(self, key: str) -> Optional[Any]:
+        """
+        Retrieves a custom value for the given key.
+        """
+        return self.custom.get(key)

From f9a3f9b9f2702796f64d11d5d649261ea76a908d Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 30 Oct 2021 18:12:29 +0200
Subject: [PATCH 029/224] Handle multi-stream videos

---
 PFERD/crawl/ilias/kit_ilias_html.py        | 18 ++++-
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 86 +++++++++++++++++++---
 2 files changed, 92 insertions(+), 12 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 7e91926..78ae084 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -133,9 +133,21 @@ class IliasPage:
 
         # parse it
         json_object = json.loads(json_str)
-        # and fetch the video url!
-        video_url = json_object["streams"][0]["sources"]["mp4"][0]["src"]
-        return [IliasPageElement(IliasElementType.VIDEO, video_url, self._source_name)]
+        streams = [stream for stream in json_object["streams"] if stream["type"] == "video"]
+
+        # and just fetch the lone video url!
+        if len(streams) == 1:
+            video_url = streams[0]["sources"]["mp4"][0]["src"]
+            return [IliasPageElement(IliasElementType.VIDEO, video_url, self._source_name)]
+
+        log.explain(f"Found multiple videos for stream at {self._source_name}")
+        items = []
+        for stream in sorted(streams, key=lambda stream: stream["content"]):
+            full_name = f"{self._source_name.replace('.mp4', '')} ({stream['content']}).mp4"
+            video_url = stream["sources"]["mp4"][0]["src"]
+            items.append(IliasPageElement(IliasElementType.VIDEO, video_url, full_name))
+
+        return items
 
     def _find_video_entries(self) -> List[IliasPageElement]:
         # ILIAS has three stages for video pages
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index cca6987..f483754 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -1,7 +1,7 @@
 import asyncio
 import re
 from pathlib import PurePath
-from typing import Any, Awaitable, Callable, Dict, List, Optional, Set, TypeVar, Union
+from typing import Any, Awaitable, Callable, Dict, List, Optional, Set, TypeVar, Union, cast
 
 import aiohttp
 from aiohttp import hdrs
@@ -439,22 +439,90 @@ instance's greatest bottleneck.
         element: IliasPageElement,
         element_path: PurePath,
     ) -> Optional[Awaitable[None]]:
-        # Videos will NOT be redownloaded - their content doesn't really change and they are chunky
-        maybe_dl = await self.download(element_path, mtime=element.mtime, redownload=Redownload.NEVER)
-        if not maybe_dl:
+        # Copy old mapping as it is likely still relevant
+        if self.prev_report:
+            self.report.add_custom_value(
+                str(element_path),
+                self.prev_report.get_custom_value(str(element_path))
+            )
+
+        # A video might contain other videos, so let's "crawl" the video first
+        # to ensure rate limits apply. This must be a download as *this token*
+        # is re-used if the video consists of a single stream. In that case the
+        # file name is used and *not* the stream name the ilias html parser reported
+        # to ensure backwards compatibility.
+        maybe_dl = await self.download(element_path, redownload=Redownload.ALWAYS)
+
+        # If we do not want to crawl it (user filter) or we have every file
+        # from the cached mapping already, we can ignore this and bail
+        if not maybe_dl or self._all_videos_locally_present(element_path):
+            # Mark all existing cideos as known so they do not get deleted
+            # during dleanup. We "downloaded" them, just without actually making
+            # a network request as we assumed they did not change.
+            for video in self._previous_contained_videos(element_path):
+                await self.download(video)
+
             return None
 
-        return self._download_video(element, maybe_dl)
+        return self._download_video(element_path, element, maybe_dl)
+
+    def _previous_contained_videos(self, video_path: PurePath) -> List[PurePath]:
+        if not self.prev_report:
+            return []
+        custom_value = self.prev_report.get_custom_value(str(video_path))
+        if not custom_value:
+            return []
+        names = cast(List[str], custom_value)
+        folder = video_path.parent
+        return [PurePath(folder, name) for name in names]
+
+    def _all_videos_locally_present(self, video_path: PurePath) -> bool:
+        if contained_videos := self._previous_contained_videos(video_path):
+            log.explain_topic(f"Checking local cache for video {video_path.name}")
+            all_found_locally = True
+            for video in contained_videos:
+                all_found_locally = all_found_locally and self._output_dir.resolve(video).exists()
+            if all_found_locally:
+                log.explain("Found all videos locally, skipping enumeration request")
+                return True
+            log.explain("Missing at least one video, continuing with requests!")
+        return False
 
     @_iorepeat(3, "downloading video")
-    async def _download_video(self, element: IliasPageElement, dl: DownloadToken) -> None:
+    async def _download_video(
+        self,
+        original_path: PurePath,
+        element: IliasPageElement,
+        dl: DownloadToken
+    ) -> None:
+        stream_elements: List[IliasPageElement] = []
         async with dl as (bar, sink):
             page = IliasPage(await self._get_page(element.url), element.url, element)
-            real_element = page.get_child_elements()[0]
+            stream_elements = page.get_child_elements()
 
-            log.explain(f"Streaming video from real url {real_element.url}")
+            if len(stream_elements) > 1:
+                log.explain(f"Found multiple video streams for {element.name}")
+            else:
+                log.explain(f"Using single video mode for {element.name}")
+                stream_element = stream_elements[0]
+                await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
+                self.report.add_custom_value(str(original_path), [original_path.name])
+                return
 
-            await self._stream_from_url(real_element.url, sink, bar, is_video=True)
+        contained_video_paths: List[str] = []
+
+        for stream_element in stream_elements:
+            contained_video_paths.append(stream_element.name)
+            video_path = original_path.parent / stream_element.name
+
+            maybe_dl = await self.download(video_path, mtime=element.mtime, redownload=Redownload.NEVER)
+            if not maybe_dl:
+                continue
+            async with maybe_dl as (bar, sink):
+                log.explain(f"Streaming video from real url {stream_element.url}")
+                await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
+
+        self.report.add_custom_value(str(original_path), contained_video_paths)
 
     async def _handle_file(
         self,

From e42ab83d32ce852eb26e1a21982399e2988e769a Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 25 Oct 2021 11:07:25 +0200
Subject: [PATCH 030/224] Add support for ILIAS cards

---
 PFERD/crawl/ilias/kit_ilias_html.py | 94 ++++++++++++++++++++++++++++-
 1 file changed, 93 insertions(+), 1 deletion(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 78ae084..d8c347d 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -368,6 +368,8 @@ class IliasPage:
             log.explain(f"Found {element_name!r}")
             result.append(IliasPageElement(element_type, abs_url, element_name, description=description))
 
+        result += self._find_cards()
+
         return result
 
     def _find_upwards_folder_hierarchy(self, tag: Tag) -> List[str]:
@@ -450,6 +452,90 @@ class IliasPage:
         log.explain(f"Found file {full_path!r}")
         return IliasPageElement(IliasElementType.FILE, url, full_path, modification_date)
 
+    def _find_cards(self) -> List[IliasPageElement]:
+        result: List[IliasPageElement] = []
+
+        card_titles: List[Tag] = self._soup.select(".card-title a")
+
+        for title in card_titles:
+            url = self._abs_url_from_link(title)
+            name = _sanitize_path_name(title.getText().strip())
+            type = self._find_type_from_card(title)
+
+            if not type:
+                _unexpected_html_warning()
+                log.warn_contd(f"Could not extract type for {title}")
+                continue
+
+            result.append(IliasPageElement(type, url, name))
+
+        card_button_tiles: List[Tag] = self._soup.select(".card-title button")
+
+        for button in card_button_tiles:
+            regex = re.compile(button["id"] + r".*window.open\(['\"](.+?)['\"]")
+            res = regex.search(str(self._soup))
+            if not res:
+                _unexpected_html_warning()
+                log.warn_contd(f"Could not find click handler target for {button}")
+                continue
+            url = self._abs_url_from_relative(res.group(1))
+            name = _sanitize_path_name(button.getText().strip())
+            type = self._find_type_from_card(button)
+            caption_parent = button.findParent(
+                "div",
+                attrs={"class": lambda x: x and "caption" in x},
+            )
+            description = caption_parent.find_next_sibling("div").getText().strip()
+
+            if not type:
+                _unexpected_html_warning()
+                log.warn_contd(f"Could not extract type for {button}")
+                continue
+
+            result.append(IliasPageElement(type, url, name, description=description))
+
+        return result
+
+    def _find_type_from_card(self, card_title: Tag) -> Optional[IliasElementType]:
+        def is_card_root(element: Tag) -> bool:
+            return "il-card" in element["class"] and "thumbnail" in element["class"]
+
+        card_root: Optional[Tag] = None
+
+        # We look for the card root
+        for parent in card_title.parents:
+            if is_card_root(parent):
+                card_root = parent
+                break
+
+        if card_root is None:
+            _unexpected_html_warning()
+            log.warn_contd(f"Tried to figure out element type, but did not find an icon for {card_title}")
+            return None
+
+        icon: Tag = card_root.select_one(".il-card-repository-head .icon")
+
+        if "opencast" in icon["class"]:
+            return IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED
+        if "exc" in icon["class"]:
+            return IliasElementType.EXERCISE
+        if "webr" in icon["class"]:
+            return IliasElementType.LINK
+        if "book" in icon["class"]:
+            return IliasElementType.BOOKING
+        if "frm" in icon["class"]:
+            return IliasElementType.FORUM
+        if "sess" in icon["class"]:
+            return IliasElementType.MEETING
+        if "tst" in icon["class"]:
+            return IliasElementType.TEST
+        if "fold" in icon["class"]:
+            return IliasElementType.FOLDER
+
+        _unexpected_html_warning()
+        log.warn_contd(f"Could not extract type from {icon} for card title {card_title}")
+        return None
+
     @staticmethod
     def _find_type_from_link(
             element_name: str,
@@ -550,7 +636,13 @@ class IliasPage:
         """
         Create an absolute url from an <a> tag.
         """
-        return urljoin(self._page_url, link_tag.get("href"))
+        return self._abs_url_from_relative(link_tag.get("href"))
+
+    def _abs_url_from_relative(self, relative_url: str) -> str:
+        """
+        Create an absolute url from a relative URL.
+        """
+        return urljoin(self._page_url, relative_url)
 
 
 def _unexpected_html_warning() -> None:

From ad3f4955f72a6bfbdcbaaae24b821f078e6e44d5 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 30 Oct 2021 18:14:39 +0200
Subject: [PATCH 031/224] Update changelog

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a90c978..faa2507 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,8 @@ ambiguous situations.
 
 ### Added
 - A KIT IPD crawler
+- Support for ILIAS cards
+- Support for multi-stream videos
 
 ### Removed
 - [Interpolation](https://docs.python.org/3/library/configparser.html#interpolation-of-values) in config file

From d6f38a61e16fa95d8a2365abc1cfd70f35ee0289 Mon Sep 17 00:00:00 2001
From: Toorero <22551563+Toorero@users.noreply.github.com>
Date: Mon, 25 Oct 2021 21:34:51 +0200
Subject: [PATCH 032/224] Fixed minor spelling mistakes

---
 CONFIG.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index 4d2ec33..8ccaa50 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -36,7 +36,7 @@ Sections whose names start with `crawl:` are used to configure crawlers. The
 rest of the section name specifies the name of the crawler.
 
 A crawler synchronizes a remote resource to a local directory. There are
-different types of crawlers for different kinds of resources, e. g. ILIAS
+different types of crawlers for different kinds of resources, e.g. ILIAS
 courses or lecture websites.
 
 Each crawl section represents an instance of a specific type of crawler. The
@@ -53,7 +53,7 @@ common to all crawlers:
   crawler can still be executed manually using the `--crawler` or `-C` flags.
   (Default: `no`)
 - `output_dir`: The directory the crawler synchronizes files to. A crawler will
-  never place any files outside of this directory. (Default: the crawler's name)
+  never place any files outside this directory. (Default: the crawler's name)
 - `redownload`: When to download a file that is already present locally.
   (Default: `never-smart`)
     - `never`: If a file is present locally, it is not downloaded again.
@@ -138,7 +138,7 @@ crawler simulate a slower, network-based crawler.
 
 ### The `kit-ipd` crawler
 
-This crawler crals a KIT ipd page by url. The root page can be crawled from
+This crawler crawls a KIT ipd page by url. The root page can be crawled from
 outside the KIT network so you will be informed about any new/deleted files,
 but downloading files requires you to be within. Adding a show delay between
 requests is likely a good idea.
@@ -312,11 +312,11 @@ matches `SOURCE`, the output path is created using `TARGET` as template.
 `SOURCE` is automatically anchored.
 
 `TARGET` uses Python's [format string syntax][3]. The *n*-th capturing group can
-be referred to as `{g<n>}` (e. g. `{g3}`). `{g0}` refers to the original path.
+be referred to as `{g<n>}` (e.g. `{g3}`). `{g0}` refers to the original path.
 If capturing group *n*'s contents are a valid integer, the integer value is
-available as `{i<n>}` (e. g. `{i3}`). If capturing group *n*'s contents are a
-valid float, the float value is available as `{f<n>}` (e. g. `{f3}`). If a
-capturing group is not present (e. g. when matching the string `cd` with the
+available as `{i<n>}` (e.g. `{i3}`). If capturing group *n*'s contents are a
+valid float, the float value is available as `{f<n>}` (e.g. `{f3}`). If a
+capturing group is not present (e.g. when matching the string `cd` with the
 regex `(ab)?cd`), the corresponding variables are not defined.
 
 Python's format string syntax has rich options for formatting its arguments. For

From 6b2a65757373193a5ecb8d2263ae7d758178014d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Julius=20R=C3=BCberg?=
 <22551563+Toorero@users.noreply.github.com>
Date: Mon, 1 Nov 2021 10:09:50 +0100
Subject: [PATCH 033/224] Fix IPD crawler for different subpages (#42)

This patch reworks the IPD crawler to support subpages which do not use
"/intern" for links and fetches the folder names from table headings.
---
 PFERD/crawl/kit_ipd_crawler.py | 50 ++++++++++++++++++++++++++--------
 1 file changed, 38 insertions(+), 12 deletions(-)

diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index 4d4addd..1ed5ffe 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -1,7 +1,9 @@
 import os
+import re
 from dataclasses import dataclass
 from pathlib import PurePath
-from typing import List, Set, Union
+from re import Pattern
+from typing import List, Set, Union, AnyStr, Optional
 from urllib.parse import urljoin
 
 from bs4 import BeautifulSoup, Tag
@@ -25,6 +27,10 @@ class KitIpdCrawlerSection(HttpCrawlerSection):
 
         return target
 
+    def link_regex(self) -> Pattern[AnyStr]:
+        regex = self.s.get("link_regex", "^.*/[^/]*\.(?:pdf|zip|c|java)$")
+        return re.compile(regex)
+
 
 @dataclass
 class KitIpdFile:
@@ -48,6 +54,7 @@ class KitIpdCrawler(HttpCrawler):
     ):
         super().__init__(name, section, config)
         self._url = section.target()
+        self._file_regex = section.link_regex()
 
     async def _run(self) -> None:
         maybe_cl = await self.crawl(PurePath("."))
@@ -88,19 +95,28 @@ class KitIpdCrawler(HttpCrawler):
         folder_tags: Set[Tag] = set()
 
         for element in elements:
-            enclosing_data: Tag = element.findParent(name="td")
-            label: Tag = enclosing_data.findPreviousSibling(name="td")
-            folder_tags.add(label)
+            folder_label = self._fetch_folder_label(element)
+            if folder_label is None:
+                folder_tags.add(page)
+            else:
+                folder_tags.add(folder_label)
 
         return folder_tags
 
     def _extract_folder(self, folder_tag: Tag) -> KitIpdFolder:
-        name = folder_tag.getText().strip()
         files: List[KitIpdFile] = []
+        # if files have found outside a regular table
+        if not folder_tag.name.startswith("h"):
+            name = "."
+            root_links = filter(lambda f: self._fetch_folder_label(f) is None, self._find_file_links(folder_tag))
+            for link in root_links:
+                files.append(self._extract_file(link))
 
-        container: Tag = folder_tag.findNextSibling(name="td")
-        for link in self._find_file_links(container):
-            files.append(self._extract_file(link))
+        else:
+            name = folder_tag.getText().strip()
+            container: Tag = folder_tag.findNextSibling(name="table")
+            for link in self._find_file_links(container):
+                files.append(self._extract_file(link))
 
         log.explain_topic(f"Found folder {name!r}")
         for file in files:
@@ -108,14 +124,24 @@ class KitIpdCrawler(HttpCrawler):
 
         return KitIpdFolder(name, files)
 
+    @staticmethod
+    def _fetch_folder_label(file_link: Tag) -> Optional[Tag]:
+        enclosing_table: Tag = file_link.findParent(name="table")
+        if enclosing_table is None:
+            return None
+        label: Tag = enclosing_table.findPreviousSibling(name=re.compile("^h[1-6]$"))
+        if label is None:
+            return None
+        else:
+            return label
+
     def _extract_file(self, link: Tag) -> KitIpdFile:
-        name = link.getText().strip()
         url = self._abs_url_from_link(link)
-        _, extension = os.path.splitext(url)
-        return KitIpdFile(name + extension, url)
+        name = os.path.basename(url)
+        return KitIpdFile(name, url)
 
     def _find_file_links(self, tag: Union[Tag, BeautifulSoup]) -> List[Tag]:
-        return tag.findAll(name="a", attrs={"href": lambda x: x and "intern" in x})
+        return tag.findAll(name="a", attrs={"href": self._file_regex})
 
     def _abs_url_from_link(self, link_tag: Tag) -> str:
         return urljoin(self._url, link_tag.get("href"))

From 88afe64a928fce7108264f386298edbbe60117f5 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 1 Nov 2021 10:43:13 +0100
Subject: [PATCH 034/224] Refactor IPD crawler a bit

---
 PFERD/cli/command_kit_ipd.py   |  2 +-
 PFERD/crawl/kit_ipd_crawler.py | 75 +++++++++++++++++-----------------
 2 files changed, 39 insertions(+), 38 deletions(-)

diff --git a/PFERD/cli/command_kit_ipd.py b/PFERD/cli/command_kit_ipd.py
index 480cc9b..c4c593f 100644
--- a/PFERD/cli/command_kit_ipd.py
+++ b/PFERD/cli/command_kit_ipd.py
@@ -35,7 +35,7 @@ def load(
     log.explain("Creating config for command 'kit-ipd'")
 
     parser["crawl:kit-ipd"] = {}
-    section = parser["crawl:ipd"]
+    section = parser["crawl:kit-ipd"]
     load_crawler(args, section)
 
     section["type"] = "kit-ipd"
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index 1ed5ffe..76145b4 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -3,7 +3,7 @@ import re
 from dataclasses import dataclass
 from pathlib import PurePath
 from re import Pattern
-from typing import List, Set, Union, AnyStr, Optional
+from typing import Awaitable, List, Optional, Set, Union
 from urllib.parse import urljoin
 
 from bs4 import BeautifulSoup, Tag
@@ -27,12 +27,12 @@ class KitIpdCrawlerSection(HttpCrawlerSection):
 
         return target
 
-    def link_regex(self) -> Pattern[AnyStr]:
-        regex = self.s.get("link_regex", "^.*/[^/]*\.(?:pdf|zip|c|java)$")
+    def link_regex(self) -> Pattern[str]:
+        regex = self.s.get("link_regex", r"^.*/[^/]*\.(?:pdf|zip|c|java)$")
         return re.compile(regex)
 
 
-@dataclass
+@dataclass(unsafe_hash=True)
 class KitIpdFile:
     name: str
     url: str
@@ -43,6 +43,14 @@ class KitIpdFolder:
     name: str
     files: List[KitIpdFile]
 
+    def explain(self) -> None:
+        log.explain_topic(f"Folder {self.name!r}")
+        for file in self.files:
+            log.explain(f"File {file.name!r}")
+
+    def __hash__(self) -> int:
+        return self.name.__hash__()
+
 
 class KitIpdCrawler(HttpCrawler):
 
@@ -61,13 +69,15 @@ class KitIpdCrawler(HttpCrawler):
         if not maybe_cl:
             return
 
-        folders: List[KitIpdFolder] = []
+        tasks: List[Awaitable[None]] = []
 
         async with maybe_cl:
-            folder_tags = await self._fetch_folder_tags()
-            folders = [self._extract_folder(tag) for tag in folder_tags]
-
-        tasks = [self._crawl_folder(folder) for folder in folders]
+            for item in await self._fetch_items():
+                if isinstance(item, KitIpdFolder):
+                    tasks.append(self._crawl_folder(item))
+                else:
+                    # Orphan files are placed in the root folder
+                    tasks.append(self._download_file(PurePath("."), item))
 
         await self.gather(tasks)
 
@@ -89,51 +99,42 @@ class KitIpdCrawler(HttpCrawler):
         async with maybe_dl as (bar, sink):
             await self._stream_from_url(file.url, sink, bar)
 
-    async def _fetch_folder_tags(self) -> Set[Tag]:
+    async def _fetch_items(self) -> Set[Union[KitIpdFile, KitIpdFolder]]:
         page = await self.get_page()
         elements: List[Tag] = self._find_file_links(page)
-        folder_tags: Set[Tag] = set()
+        items: Set[Union[KitIpdFile, KitIpdFolder]] = set()
 
         for element in elements:
-            folder_label = self._fetch_folder_label(element)
-            if folder_label is None:
-                folder_tags.add(page)
+            folder_label = self._find_folder_label(element)
+            if folder_label:
+                folder = self._extract_folder(folder_label)
+                if folder not in items:
+                    items.add(folder)
+                    folder.explain()
             else:
-                folder_tags.add(folder_label)
+                file = self._extract_file(element)
+                items.add(file)
+                log.explain_topic(f"Orphan file {file.name!r}")
+                log.explain("Attributing it to root folder")
 
-        return folder_tags
+        return items
 
     def _extract_folder(self, folder_tag: Tag) -> KitIpdFolder:
         files: List[KitIpdFile] = []
-        # if files have found outside a regular table
-        if not folder_tag.name.startswith("h"):
-            name = "."
-            root_links = filter(lambda f: self._fetch_folder_label(f) is None, self._find_file_links(folder_tag))
-            for link in root_links:
-                files.append(self._extract_file(link))
+        name = folder_tag.getText().strip()
 
-        else:
-            name = folder_tag.getText().strip()
-            container: Tag = folder_tag.findNextSibling(name="table")
-            for link in self._find_file_links(container):
-                files.append(self._extract_file(link))
-
-        log.explain_topic(f"Found folder {name!r}")
-        for file in files:
-            log.explain(f"Found file {file.name!r}")
+        container: Tag = folder_tag.findNextSibling(name="table")
+        for link in self._find_file_links(container):
+            files.append(self._extract_file(link))
 
         return KitIpdFolder(name, files)
 
     @staticmethod
-    def _fetch_folder_label(file_link: Tag) -> Optional[Tag]:
+    def _find_folder_label(file_link: Tag) -> Optional[Tag]:
         enclosing_table: Tag = file_link.findParent(name="table")
         if enclosing_table is None:
             return None
-        label: Tag = enclosing_table.findPreviousSibling(name=re.compile("^h[1-6]$"))
-        if label is None:
-            return None
-        else:
-            return label
+        return enclosing_table.findPreviousSibling(name=re.compile("^h[1-6]$"))
 
     def _extract_file(self, link: Tag) -> KitIpdFile:
         url = self._abs_url_from_link(link)

From 13b8c3d9c6c59ab2714e2670506d89c5a2cb6eb6 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 2 Nov 2021 09:30:46 +0100
Subject: [PATCH 035/224] Add regex option to config and CLI parser

---
 CONFIG.md                    | 7 ++++++-
 LICENSE                      | 3 ++-
 PFERD/cli/command_kit_ipd.py | 8 ++++++++
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index 8ccaa50..569780d 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -138,11 +138,16 @@ crawler simulate a slower, network-based crawler.
 
 ### The `kit-ipd` crawler
 
-This crawler crawls a KIT ipd page by url. The root page can be crawled from
+This crawler crawls a KIT-IPD page by url. The root page can be crawled from
 outside the KIT network so you will be informed about any new/deleted files,
 but downloading files requires you to be within. Adding a show delay between
 requests is likely a good idea.
 
+- `target`: URL to a KIT-IPD page
+- `link_regex`: A regex that is matched against the `href` part of links. If it
+  matches, the given link is downloaded as a file. This is used to extract
+  files from KIT-IPD pages. (Default: `^.*/[^/]*\.(?:pdf|zip|c|java)$`)
+
 ### The `kit-ilias-web` crawler
 
 This crawler crawls the KIT ILIAS instance.
diff --git a/LICENSE b/LICENSE
index c096c4a..fe2293f 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,5 @@
-Copyright 2019-2021 Garmelon, I-Al-Istannen, danstooamerican, pavelzw, TheChristophe, Scriptim, thelukasprobst
+Copyright 2019-2021 Garmelon, I-Al-Istannen, danstooamerican, pavelzw,
+                    TheChristophe, Scriptim, thelukasprobst, Toorero
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
diff --git a/PFERD/cli/command_kit_ipd.py b/PFERD/cli/command_kit_ipd.py
index c4c593f..b53e67e 100644
--- a/PFERD/cli/command_kit_ipd.py
+++ b/PFERD/cli/command_kit_ipd.py
@@ -14,6 +14,12 @@ GROUP = SUBPARSER.add_argument_group(
     title="kit ipd crawler arguments",
     description="arguments for the 'kit-ipd' crawler",
 )
+GROUP.add_argument(
+    "--link-regex",
+    type=str,
+    metavar="REGEX",
+    help="href-matching regex to identify downloadable files"
+)
 GROUP.add_argument(
     "target",
     type=str,
@@ -41,6 +47,8 @@ def load(
     section["type"] = "kit-ipd"
     section["target"] = str(args.target)
     section["output_dir"] = str(args.output)
+    if args.link_regex:
+        section["link_regex"] = str(args.link_regex)
 
 
 SUBPARSER.set_defaults(command=load)

From 6289938d7c772660a5d497ce456168186eb8a6fb Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 6 Nov 2021 12:09:51 +0100
Subject: [PATCH 036/224] Do not stop crawling files when encountering a
 CrawlWarning

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index f483754..c3e51ef 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -84,7 +84,7 @@ _VIDEO_ELEMENTS: Set[IliasElementType] = set([
 AWrapped = TypeVar("AWrapped", bound=Callable[..., Awaitable[Optional[Any]]])
 
 
-def _iorepeat(attempts: int, name: str) -> Callable[[AWrapped], AWrapped]:
+def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Callable[[AWrapped], AWrapped]:
     def decorator(f: AWrapped) -> AWrapped:
         async def wrapper(*args: Any, **kwargs: Any) -> Optional[Any]:
             last_exception: Optional[BaseException] = None
@@ -105,7 +105,10 @@ def _iorepeat(attempts: int, name: str) -> Callable[[AWrapped], AWrapped]:
 
             if last_exception:
                 message = f"Error in I/O Operation: {last_exception}"
-                raise CrawlWarning(message) from last_exception
+                if failure_is_error:
+                    raise CrawlError(message) from last_exception
+                else:
+                    raise CrawlWarning(message) from last_exception
             raise CrawlError("Impossible return in ilias _iorepeat")
 
         return wrapper  # type: ignore
@@ -251,6 +254,7 @@ instance's greatest bottleneck.
             return None
         return self._crawl_ilias_page(url, parent, maybe_cl)
 
+    @anoncritical
     async def _crawl_ilias_page(
         self,
         url: str,
@@ -292,10 +296,12 @@ instance's greatest bottleneck.
         # And execute them
         await self.gather(tasks)
 
+    # These decorators only apply *to this method* and *NOT* to the returned
+    # awaitables!
+    # This method does not await the handlers but returns them instead.
+    # This ensures one level is handled at a time and name deduplication
+    # works correctly.
     @anoncritical
-    # Shouldn't happen but we also really don't want to let I/O errors bubble up to anoncritical.
-    # If that happens we will be terminated as anoncritical doesn't tream them as non-critical.
-    @_wrap_io_in_warning("handling ilias element")
     async def _handle_ilias_element(
         self,
         parent_path: PurePath,
@@ -363,6 +369,7 @@ instance's greatest bottleneck.
 
         return self._download_link(element, link_template_maybe, maybe_dl)
 
+    @anoncritical
     @_iorepeat(3, "resolving link")
     async def _download_link(self, element: IliasPageElement, link_template: str, dl: DownloadToken) -> None:
         async with dl as (bar, sink):
@@ -409,6 +416,7 @@ instance's greatest bottleneck.
 
         return self._download_booking(element, link_template_maybe, maybe_dl)
 
+    @anoncritical
     @_iorepeat(3, "resolving booking")
     async def _download_booking(
         self,
@@ -488,6 +496,7 @@ instance's greatest bottleneck.
             log.explain("Missing at least one video, continuing with requests!")
         return False
 
+    @anoncritical
     @_iorepeat(3, "downloading video")
     async def _download_video(
         self,
@@ -534,6 +543,7 @@ instance's greatest bottleneck.
             return None
         return self._download_file(element, maybe_dl)
 
+    @anoncritical
     @_iorepeat(3, "downloading file")
     async def _download_file(self, element: IliasPageElement, dl: DownloadToken) -> None:
         assert dl  # The function is only reached when dl is not None
@@ -589,7 +599,7 @@ instance's greatest bottleneck.
 
     # We repeat this as the login method in shibboleth doesn't handle I/O errors.
     # Shibboleth is quite reliable as well, the repeat is likely not critical here.
-    @_iorepeat(3, "Login")
+    @_iorepeat(3, "Login", failure_is_error=True)
     async def _authenticate(self) -> None:
         await self._shibboleth_login.login(self.session)
 

From 90cb6e989b492bbfe2f242c77aad616b86637052 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 6 Nov 2021 23:20:24 +0100
Subject: [PATCH 037/224] Do not download single videos if cache does not exist

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index c3e51ef..c6115f4 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -514,7 +514,12 @@ instance's greatest bottleneck.
             else:
                 log.explain(f"Using single video mode for {element.name}")
                 stream_element = stream_elements[0]
-                await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
+
+                # We do not have a local cache yet
+                if self._output_dir.resolve(original_path).exists():
+                    log.explain(f"Video for {element.name} existed locally")
+                else:
+                    await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
                 self.report.add_custom_value(str(original_path), [original_path.name])
                 return
 

From a82a0b19c2193c6817ae07361889de8fd392868e Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 7 Nov 2021 21:40:22 +0100
Subject: [PATCH 038/224] Collect crawler warnings/errors and include them in
 the report

---
 PFERD/crawl/crawler.py |  8 ++++++--
 PFERD/pferd.py         |  8 ++++++++
 PFERD/report.py        | 24 +++++++++++++++++++++++-
 3 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index c492ee9..53f43e9 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -47,10 +47,12 @@ def noncritical(f: Wrapped) -> Wrapped:
         try:
             f(*args, **kwargs)
         except (CrawlWarning, OutputDirError, MarkDuplicateError, MarkConflictError) as e:
+            crawler.report.add_warning(str(e))
             log.warn(str(e))
             crawler.error_free = False
-        except:  # noqa: E722 do not use bare 'except'
+        except Exception as e:
             crawler.error_free = False
+            crawler.report.add_error(str(e))
             raise
 
     return wrapper  # type: ignore
@@ -83,8 +85,10 @@ def anoncritical(f: AWrapped) -> AWrapped:
         except (CrawlWarning, OutputDirError, MarkDuplicateError, MarkConflictError) as e:
             log.warn(str(e))
             crawler.error_free = False
-        except:  # noqa: E722 do not use bare 'except'
+            crawler.report.add_warning(str(e))
+        except Exception as e:
             crawler.error_free = False
+            crawler.report.add_error(str(e))
             raise
 
         return None
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index 726ed45..079053b 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -182,5 +182,13 @@ class Pferd:
                 something_changed = True
                 log.report(f"  [bold bright_magenta]Not deleted[/] {fmt_path(path)}")
 
+            for warning in crawler.report.encountered_warnings:
+                something_changed = True
+                log.report(f"  [bold bright_red]Warning[/] {warning}")
+
+            for error in crawler.report.encountered_errors:
+                something_changed = True
+                log.report(f"  [bold bright_red]Error[/] {error}")
+
             if not something_changed:
                 log.report("  Nothing changed")
diff --git a/PFERD/report.py b/PFERD/report.py
index 99a4661..0e0c789 100644
--- a/PFERD/report.py
+++ b/PFERD/report.py
@@ -67,8 +67,14 @@ class Report:
         self.deleted_files: Set[PurePath] = set()
         # Files that should have been deleted by the cleanup but weren't
         self.not_deleted_files: Set[PurePath] = set()
+
+        # Custom crawler-specific data
         self.custom: Dict[str, Any] = dict()
 
+        # Encountered errors and warnings
+        self.encountered_warnings: List[str] = []
+        self.encountered_errors: List[str] = []
+
     @staticmethod
     def _get_list_of_strs(data: Dict[str, Any], key: str) -> List[str]:
         result: Any = data.get(key, [])
@@ -119,6 +125,8 @@ class Report:
         for elem in self._get_list_of_strs(data, "not_deleted"):
             self.not_delete_file(PurePath(elem))
         self.custom = self._get_str_dictionary(data, "custom")
+        self.encountered_errors = self._get_list_of_strs(data, "encountered_errors")
+        self.encountered_warnings = self._get_list_of_strs(data, "encountered_warnings")
 
         return self
 
@@ -135,7 +143,9 @@ class Report:
             "changed": [str(path) for path in sorted(self.changed_files)],
             "deleted": [str(path) for path in sorted(self.deleted_files)],
             "not_deleted": [str(path) for path in sorted(self.not_deleted_files)],
-            "custom": self.custom
+            "custom": self.custom,
+            "encountered_warnings": self.encountered_warnings,
+            "encountered_errors": self.encountered_errors,
         }
 
         with open(path, "w") as f:
@@ -214,3 +224,15 @@ class Report:
         Retrieves a custom value for the given key.
         """
         return self.custom.get(key)
+
+    def add_error(self, error: str) -> None:
+        """
+        Adds an error to this report's error list.
+        """
+        self.encountered_errors.append(error)
+
+    def add_warning(self, warning: str) -> None:
+        """
+        Adds a warning to this report's warning list.
+        """
+        self.encountered_warnings.append(warning)

From eac2e341612461987d37314110c3f4c7640499f3 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Fri, 7 Jan 2022 23:32:31 +0100
Subject: [PATCH 039/224] Fix is_logged_in for ILIAS 7

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index c6115f4..c5b2953 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -611,9 +611,10 @@ instance's greatest bottleneck.
     @staticmethod
     def _is_logged_in(soup: BeautifulSoup) -> bool:
         # Normal ILIAS pages
-        userlog = soup.find("li", {"id": "userlog"})
-        if userlog is not None:
-            return True
+        mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")
+        if mainbar is not None:
+            login_button = mainbar.find("button", attrs={"data-action": lambda x: x and "login.php" in x})
+            return not login_button
         # Video listing embeds do not have complete ILIAS html. Try to match them by
         # their video listing table
         video_table = soup.find(

From a99356f2a2d403ffb40f47bb159707d73e55a0e3 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 8 Jan 2022 00:27:34 +0100
Subject: [PATCH 040/224] Fix video stream extraction

---
 PFERD/crawl/ilias/kit_ilias_html.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index d8c347d..ece88c5 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -133,7 +133,7 @@ class IliasPage:
 
         # parse it
         json_object = json.loads(json_str)
-        streams = [stream for stream in json_object["streams"] if stream["type"] == "video"]
+        streams = [stream for stream in json_object["streams"]]
 
         # and just fetch the lone video url!
         if len(streams) == 1:

From 462d993fbc00602b4952d675fa4c77e5372c27fa Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 8 Jan 2022 00:27:48 +0100
Subject: [PATCH 041/224] Fix local video path cache (hopefully)

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index c5b2953..5d44566 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -489,7 +489,10 @@ instance's greatest bottleneck.
             log.explain_topic(f"Checking local cache for video {video_path.name}")
             all_found_locally = True
             for video in contained_videos:
-                all_found_locally = all_found_locally and self._output_dir.resolve(video).exists()
+                transformed_path = self._transformer.transform(video)
+                if transformed_path:
+                    exists_locally = self._output_dir.resolve(transformed_path).exists()
+                    all_found_locally = all_found_locally and exists_locally
             if all_found_locally:
                 log.explain("Found all videos locally, skipping enumeration request")
                 return True
@@ -515,8 +518,12 @@ instance's greatest bottleneck.
                 log.explain(f"Using single video mode for {element.name}")
                 stream_element = stream_elements[0]
 
+                transformed_path = self._transformer.transform(original_path)
+                if not transformed_path:
+                    raise CrawlError(f"Download returned a path but transform did not for {original_path}")
+
                 # We do not have a local cache yet
-                if self._output_dir.resolve(original_path).exists():
+                if self._output_dir.resolve(transformed_path).exists():
                     log.explain(f"Video for {element.name} existed locally")
                 else:
                     await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
@@ -526,8 +533,8 @@ instance's greatest bottleneck.
         contained_video_paths: List[str] = []
 
         for stream_element in stream_elements:
-            contained_video_paths.append(stream_element.name)
             video_path = original_path.parent / stream_element.name
+            contained_video_paths.append(str(video_path))
 
             maybe_dl = await self.download(video_path, mtime=element.mtime, redownload=Redownload.NEVER)
             if not maybe_dl:

From 6f3cfd43969cdac557c4f2d38bd2b4f0ffd40721 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 8 Jan 2022 16:58:15 +0100
Subject: [PATCH 042/224] Fix personal desktop crawling

---
 PFERD/crawl/ilias/kit_ilias_html.py        | 61 ++++++++++++++++++++--
 PFERD/crawl/ilias/kit_ilias_web_crawler.py |  9 +++-
 2 files changed, 66 insertions(+), 4 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index ece88c5..9c8ab95 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -39,7 +39,12 @@ class IliasPageElement:
     description: Optional[str] = None
 
     def id(self) -> str:
-        regexes = [r"eid=(?P<id>[0-9a-z\-]+)", r"file_(?P<id>\d+)", r"ref_id=(?P<id>\d+)"]
+        regexes = [
+            r"eid=(?P<id>[0-9a-z\-]+)",
+            r"file_(?P<id>\d+)",
+            r"ref_id=(?P<id>\d+)",
+            r"target=[a-z]+_(?P<id>\d+)"
+        ]
 
         for regex in regexes:
             if match := re.search(regex, self.url):
@@ -71,6 +76,9 @@ class IliasPage:
         if self._is_exercise_file():
             log.explain("Page is an exercise, searching for elements")
             return self._find_exercise_entries()
+        if self._is_personal_desktop():
+            log.explain("Page is the personal desktop")
+            return self._find_personal_desktop_entries()
         log.explain("Page is a normal folder, searching for elements")
         return self._find_normal_entries()
 
@@ -115,6 +123,9 @@ class IliasPage:
 
         return False
 
+    def _is_personal_desktop(self) -> bool:
+        return self._soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x})
+
     def _player_to_video(self) -> List[IliasPageElement]:
         # Fetch the actual video page. This is a small wrapper page initializing a javscript
         # player. Sadly we can not execute that JS. The actual video stream url is nowhere
@@ -149,6 +160,26 @@ class IliasPage:
 
         return items
 
+    def _find_personal_desktop_entries(self) -> List[IliasPageElement]:
+        items: List[IliasPageElement] = []
+
+        titles: List[Tag] = self._soup.select(".il-item-title")
+        for title in titles:
+            link = title.find("a")
+            name = _sanitize_path_name(link.text.strip())
+            url = self._abs_url_from_link(link)
+
+            type = self._find_type_from_link(name, link, url)
+            if not type:
+                _unexpected_html_warning()
+                log.warn_contd(f"Could not extract type for {link}")
+                continue
+
+            log.explain(f"Found {name!r}")
+            items.append(IliasPageElement(type, url, name))
+
+        return items
+
     def _find_video_entries(self) -> List[IliasPageElement]:
         # ILIAS has three stages for video pages
         # 1. The initial dummy page without any videos. This page contains the link to the listing
@@ -551,9 +582,30 @@ class IliasPage:
         if "target=file_" in parsed_url.query:
             return IliasElementType.FILE
 
+        if "target=grp_" in parsed_url.query:
+            return IliasElementType.FOLDER
+
+        if "target=crs_" in parsed_url.query:
+            return IliasElementType.FOLDER
+
+        if "baseClass=ilExerciseHandlerGUI" in parsed_url.query:
+            return IliasElementType.EXERCISE
+
+        if "baseClass=ilLinkResourceHandlerGUI" in parsed_url.query and "calldirectlink" in parsed_url.query:
+            return IliasElementType.LINK
+
+        if "cmd=showThreads" in parsed_url.query or "target=frm_" in parsed_url.query:
+            return IliasElementType.FORUM
+
+        if "cmdClass=ilobjtestgui" in parsed_url.query:
+            return IliasElementType.TEST
+
+        # Booking and Meeting can not be detected based on the link. They do have a ref_id though, so
+        # try to guess it from the image.
+
         # Everything with a ref_id can *probably* be opened to reveal nested things
         # video groups, directories, exercises, etc
-        if "ref_id=" in parsed_url.query:
+        if "ref_id=" in parsed_url.query or "goto.php" in parsed_url.path:
             return IliasPage._find_type_from_folder_like(link_element, url)
 
         _unexpected_html_warning()
@@ -574,7 +626,7 @@ class IliasPage:
         # We look for the outer div of our inner link, to find information around it
         # (mostly the icon)
         for parent in link_element.parents:
-            if "ilContainerListItemOuter" in parent["class"]:
+            if "ilContainerListItemOuter" in parent["class"] or "il-std-item" in parent["class"]:
                 found_parent = parent
                 break
 
@@ -586,6 +638,9 @@ class IliasPage:
         # Find the small descriptive icon to figure out the type
         img_tag: Optional[Tag] = found_parent.select_one("img.ilListItemIcon")
 
+        if img_tag is None:
+            img_tag = found_parent.select_one("img.icon")
+
         if img_tag is None:
             _unexpected_html_warning()
             log.warn_contd(f"Tried to figure out element type, but did not find an image for {url}")
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 5d44566..99d6cf6 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -203,7 +203,9 @@ instance's greatest bottleneck.
         await self._crawl_url(root_url, expected_id=course_id)
 
     async def _crawl_desktop(self) -> None:
-        await self._crawl_url(self._base_url)
+        appendix = r"ILIAS\PersonalDesktop\PDMainBarProvider|mm_pd_sel_items"
+        appendix = appendix.encode("ASCII").hex()
+        await self._crawl_url(self._base_url + "/gs_content.php?item=" + appendix)
 
     async def _crawl_url(self, url: str, expected_id: Optional[int] = None) -> None:
         maybe_cl = await self.crawl(PurePath("."))
@@ -622,6 +624,11 @@ instance's greatest bottleneck.
         if mainbar is not None:
             login_button = mainbar.find("button", attrs={"data-action": lambda x: x and "login.php" in x})
             return not login_button
+
+        # Personal Desktop
+        if soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x}):
+            return True
+
         # Video listing embeds do not have complete ILIAS html. Try to match them by
         # their video listing table
         video_table = soup.find(

From ced8b9a2d032e7e4956b331d4408cb4b0829c780 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 8 Jan 2022 16:58:30 +0100
Subject: [PATCH 043/224] Fix some accordions

---
 PFERD/crawl/ilias/kit_ilias_html.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 9c8ab95..0a81222 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -428,7 +428,10 @@ class IliasPage:
                     continue
                 prev: Tag = parent.findPreviousSibling("div")
                 if "ilContainerBlockHeader" in prev.get("class"):
-                    found_titles.append(prev.find("h3").getText().strip())
+                    if prev.find("h3"):
+                        found_titles.append(prev.find("h3").getText().strip())
+                    else:
+                        found_titles.append(prev.find("h2").getText().strip())
 
             # And this for real accordions
             if "il_VAccordionContentDef" in parent.get("class"):

From 5f527bc697b58512520f4d8ff93b856ff3a345b1 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 8 Jan 2022 17:14:40 +0100
Subject: [PATCH 044/224] Remove Python 3.9 Pattern typehints

---
 PFERD/crawl/ilias/kit_ilias_html.py | 2 +-
 PFERD/crawl/kit_ipd_crawler.py      | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 0a81222..78bedbf 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -132,7 +132,7 @@ class IliasPage:
         # on the page, but defined in a JS object inside a script tag, passed to the player
         # library.
         # We do the impossible and RegEx the stream JSON object out of the page's HTML source
-        regex: re.Pattern[str] = re.compile(
+        regex = re.compile(
             r"({\"streams\"[\s\S]+?),\s*{\"paella_config_file", re.IGNORECASE
         )
         json_match = regex.search(str(self._soup))
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index 76145b4..1a5314b 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -2,8 +2,7 @@ import os
 import re
 from dataclasses import dataclass
 from pathlib import PurePath
-from re import Pattern
-from typing import Awaitable, List, Optional, Set, Union
+from typing import Awaitable, List, Optional, Pattern, Set, Union
 from urllib.parse import urljoin
 
 from bs4 import BeautifulSoup, Tag

From e32c1f000fb9abcc47f8dc127b4d674acfa1662c Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 8 Jan 2022 18:05:48 +0100
Subject: [PATCH 045/224] Fix mtime for single streams

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 99d6cf6..c4e70c0 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -461,7 +461,7 @@ instance's greatest bottleneck.
         # is re-used if the video consists of a single stream. In that case the
         # file name is used and *not* the stream name the ilias html parser reported
         # to ensure backwards compatibility.
-        maybe_dl = await self.download(element_path, redownload=Redownload.ALWAYS)
+        maybe_dl = await self.download(element_path, mtime=element.mtime, redownload=Redownload.ALWAYS)
 
         # If we do not want to crawl it (user filter) or we have every file
         # from the cached mapping already, we can ignore this and bail

From eb4de8ae0cc37e38e9fa801f729e68d1f71a0bb0 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 8 Jan 2022 18:14:43 +0100
Subject: [PATCH 046/224] Ignore 1970 dates as windows crashes when calling
 .timestamp()

---
 PFERD/output_dir.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 0fb9911..e612267 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -231,7 +231,8 @@ class OutputDirectory:
         stat = local_path.stat()
 
         remote_newer = None
-        if mtime := heuristics.mtime:
+        if heuristics.mtime and heuristics.mtime.year > 1970:
+            mtime = heuristics.mtime
             remote_newer = mtime.timestamp() > stat.st_mtime
             if remote_newer:
                 log.explain("Remote file seems to be newer")

From 43c5453e100aedede844a242721d2990845c2c26 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 8 Jan 2022 19:59:42 +0100
Subject: [PATCH 047/224] Correctly crawl files on desktop

The files on the desktop do not include a download link, so we need to
rewrite it.
---
 PFERD/crawl/ilias/kit_ilias_html.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 78bedbf..cee0555 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -176,6 +176,11 @@ class IliasPage:
                 continue
 
             log.explain(f"Found {name!r}")
+
+            if type == IliasElementType.FILE and "_download" not in url:
+                url = re.sub(r"(target=file_\d+)", r"\1_download", url)
+                log.explain("Rewired file URL to include download part")
+
             items.append(IliasPageElement(type, url, name))
 
         return items

From 10d9d7452809aafe4f406f894944a078072f16bf Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 8 Jan 2022 20:28:30 +0100
Subject: [PATCH 048/224] Bail out when crawling recursive courses

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index c4e70c0..8f78e7a 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -182,6 +182,7 @@ instance's greatest bottleneck.
         self._link_file_redirect_delay = section.link_redirect_delay()
         self._links = section.links()
         self._videos = section.videos()
+        self._visited_urls: Set[str] = set()
 
     async def _run(self) -> None:
         if isinstance(self._target, int):
@@ -309,6 +310,12 @@ instance's greatest bottleneck.
         parent_path: PurePath,
         element: IliasPageElement,
     ) -> Optional[Awaitable[None]]:
+        if element.url in self._visited_urls:
+            raise CrawlWarning(
+                f"Found second path to element {element.name!r} at {element.url!r}. Aborting subpath"
+            )
+        self._visited_urls.add(element.url)
+
         element_path = PurePath(parent_path, element.name)
 
         if element.type in _VIDEO_ELEMENTS:

From d30f25ee9788d3363544ba9779cabf157dba3b98 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 8 Jan 2022 20:28:45 +0100
Subject: [PATCH 049/224] Detect shib login page as login page

And do not assume we are logged in...
---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 8f78e7a..c3b2342 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -630,7 +630,8 @@ instance's greatest bottleneck.
         mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")
         if mainbar is not None:
             login_button = mainbar.find("button", attrs={"data-action": lambda x: x and "login.php" in x})
-            return not login_button
+            shib_login = soup.find(id="button_shib_login")
+            return not login_button and not shib_login
 
         # Personal Desktop
         if soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x}):

From 4ee919625da8d3d04cbb889e24d05b1c09436fe8 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 8 Jan 2022 20:47:35 +0100
Subject: [PATCH 050/224] Add rudimentary support for content pages

---
 PFERD/crawl/ilias/kit_ilias_html.py | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index cee0555..754af16 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -77,8 +77,11 @@ class IliasPage:
             log.explain("Page is an exercise, searching for elements")
             return self._find_exercise_entries()
         if self._is_personal_desktop():
-            log.explain("Page is the personal desktop")
+            log.explain("Page is the personal desktop, searching for elements")
             return self._find_personal_desktop_entries()
+        if self._is_content_page():
+            log.explain("Page is a content page, searching for elements")
+            return self._find_copa_entries()
         log.explain("Page is a normal folder, searching for elements")
         return self._find_normal_entries()
 
@@ -126,6 +129,12 @@ class IliasPage:
     def _is_personal_desktop(self) -> bool:
         return self._soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x})
 
+    def _is_content_page(self) -> bool:
+        link = self._soup.find(id="current_perma_link")
+        if not link:
+            return False
+        return "target=copa_" in link.get("value")
+
     def _player_to_video(self) -> List[IliasPageElement]:
         # Fetch the actual video page. This is a small wrapper page initializing a javscript
         # player. Sadly we can not execute that JS. The actual video stream url is nowhere
@@ -185,6 +194,23 @@ class IliasPage:
 
         return items
 
+    def _find_copa_entries(self) -> List[IliasPageElement]:
+        items: List[IliasPageElement] = []
+        links: List[Tag] = self._soup.findAll(class_="ilc_flist_a_FileListItemLink")
+
+        for link in links:
+            url = self._abs_url_from_link(link)
+            name = _sanitize_path_name(link.getText().strip().replace("\t", ""))
+
+            if "file_id" not in url:
+                _unexpected_html_warning()
+                log.warn_contd(f"Found unknown content page item {name!r} with url {url!r}")
+                continue
+
+            items.append(IliasPageElement(IliasElementType.FILE, url, name))
+
+        return items
+
     def _find_video_entries(self) -> List[IliasPageElement]:
         # ILIAS has three stages for video pages
         # 1. The initial dummy page without any videos. This page contains the link to the listing

From 4bf0c972e6e37afc7f9688104082189f5f78d390 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 9 Jan 2022 11:47:59 +0100
Subject: [PATCH 051/224] Update types for rich 11

---
 PFERD/logging.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/PFERD/logging.py b/PFERD/logging.py
index 32e5268..e2d64fc 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -5,7 +5,7 @@ from contextlib import asynccontextmanager, contextmanager
 # TODO In Python 3.9 and above, ContextManager is deprecated
 from typing import AsyncIterator, ContextManager, Iterator, List, Optional
 
-from rich.console import Console, RenderGroup
+from rich.console import Console, Group
 from rich.live import Live
 from rich.markup import escape
 from rich.panel import Panel
@@ -68,7 +68,7 @@ class Log:
         if self._download_progress.task_ids:
             elements.append(self._download_progress)
 
-        group = RenderGroup(*elements)  # type: ignore
+        group = Group(*elements)  # type: ignore
         self._live.update(group)
 
     @contextmanager

From e9d2d0503001728f6c1f313982d8843d83405e3d Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 9 Jan 2022 11:39:42 +0100
Subject: [PATCH 052/224] Update changelog

---
 CHANGELOG.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index faa2507..1b392c1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -26,10 +26,16 @@ ambiguous situations.
 - A KIT IPD crawler
 - Support for ILIAS cards
 - Support for multi-stream videos
+- Support for ILIAS 7
 
 ### Removed
 - [Interpolation](https://docs.python.org/3/library/configparser.html#interpolation-of-values) in config file
 
+### Fixed
+- Crawling of recursive courses
+- Crawling files directly placed on the personal desktop
+- Ignore timestamps at the unix epoch as they crash on windows
+
 ## 3.2.0 - 2021-08-04
 
 ### Added

From e467b38d739347d62cbb122d9f4752abe823b423 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 9 Jan 2022 18:23:00 +0100
Subject: [PATCH 053/224] Only reject 1970 timestamps on windows

---
 PFERD/output_dir.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index e612267..441717b 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -231,7 +231,9 @@ class OutputDirectory:
         stat = local_path.stat()
 
         remote_newer = None
-        if heuristics.mtime and heuristics.mtime.year > 1970:
+
+        # Python on Windows crashes when faced with timestamps around the unix epoch
+        if heuristics.mtime and (os.name != "nt" or heuristics.mtime.year > 1970):
             mtime = heuristics.mtime
             remote_newer = mtime.timestamp() > stat.st_mtime
             if remote_newer:

From 33453ede2d63b15bcca2ce541af2299440bfa8ff Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 9 Jan 2022 18:31:42 +0100
Subject: [PATCH 054/224] Update dependency versions in setup.py

---
 setup.cfg | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/setup.cfg b/setup.cfg
index 5758282..059798a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -6,11 +6,11 @@ version = attr: PFERD.version.VERSION
 packages = find:
 python_requires = >=3.8
 install_requires =
-  aiohttp>=3.7.4.post0
-  beautifulsoup4>=4.9.3
-  rich>=10.1.0
-  keyring>=23.0.1
-  certifi>=2020.12.5
+  aiohttp>=3.8.1
+  beautifulsoup4>=4.10.0
+  rich>=11.0.0
+  keyring>=23.5.0
+  certifi>=2021.10.8
 
 [options.entry_points]
 console_scripts =

From 9618aae83bf10b8e517c53a53c47d14dd707c707 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 9 Jan 2022 18:32:58 +0100
Subject: [PATCH 055/224] Add content pages to changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1b392c1..6e4c7e9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ ambiguous situations.
 ### Added
 - A KIT IPD crawler
 - Support for ILIAS cards
+- (Rudimentary) support for content pages
 - Support for multi-stream videos
 - Support for ILIAS 7
 

From 0045124a4e2851d4d1d84bc7c2b68c75f49d5375 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 9 Jan 2022 21:09:09 +0100
Subject: [PATCH 056/224] Bump version to 3.3.0

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6e4c7e9..132351b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.3.0 - 2022-01-09
+
 ### Added
 - A KIT IPD crawler
 - Support for ILIAS cards
diff --git a/PFERD/version.py b/PFERD/version.py
index b8efadd..ca58f3a 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.2.0"
+VERSION = "3.3.0"

From 57ec51e95a238960d1832ba0ad85b2ff6ec1de3b Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Fri, 14 Jan 2022 20:15:19 +0100
Subject: [PATCH 057/224] Fix login after shib url parser change

---
 CHANGELOG.md                               |  4 +++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 39 +++++++++++++++++++---
 2 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 132351b..41ee3d5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,10 @@ ambiguous situations.
 
 ## Unreleased
 
+### Fixed
+- Shibboleth login fixed. It was broken due to URL parser changes and really
+  *unfortunate* behaviour by aiohttp.
+
 ## 3.3.0 - 2022-01-09
 
 ### Added
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index c3b2342..c26ce8b 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -4,6 +4,7 @@ from pathlib import PurePath
 from typing import Any, Awaitable, Callable, Dict, List, Optional, Set, TypeVar, Union, cast
 
 import aiohttp
+import yarl
 from aiohttp import hdrs
 from bs4 import BeautifulSoup, Tag
 
@@ -674,14 +675,14 @@ class KitShibbolethLogin:
 
         # Equivalent: Click on "Mit KIT-Account anmelden" button in
         # https://ilias.studium.kit.edu/login.php
-        url = "https://ilias.studium.kit.edu/Shibboleth.sso/Login"
+        url = "https://ilias.studium.kit.edu/shib_login.php"
         data = {
             "sendLogin": "1",
             "idp_selection": "https://idp.scc.kit.edu/idp/shibboleth",
-            "target": "/shib_login.php",
-            "home_organization_selection": "Mit KIT-Account anmelden",
+            "il_target": "",
+            "home_organization_selection": "Weiter",
         }
-        soup: BeautifulSoup = await _post(sess, url, data)
+        soup: BeautifulSoup = await _shib_post(sess, url, data)
 
         # Attempt to login using credentials, if necessary
         while not self._login_successful(soup):
@@ -761,3 +762,33 @@ class KitShibbolethLogin:
 async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup:
     async with session.post(url, data=data) as response:
         return soupify(await response.read())
+
+
+async def _shib_post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup:
+    """
+    aiohttp unescapes '/' and ':' in URL query parameters which is not RFC compliant and rejected
+    by Shibboleth. Thanks a lot. So now we unroll the requests manually, parse location headers and
+    build encoded URL objects ourselfs... Who thought mangling location header was a good idea??
+    """
+    async with session.post(url, data=data, allow_redirects=False) as response:
+        location = response.headers.get("location")
+        if not location:
+            raise CrawlWarning(f"Login failed, no location header present at {url}")
+        correct_url = yarl.URL(location, encoded=True)
+
+        async with session.get(correct_url, allow_redirects=False) as response:
+            as_yarl = yarl.URL(response.url)
+            location = response.headers.get("location")
+
+            if not location or not as_yarl.host:
+                raise CrawlWarning(f"Login failed, no location header present at {correct_url}")
+
+            correct_url = yarl.URL.build(
+                scheme=as_yarl.scheme,
+                host=as_yarl.host,
+                path=location,
+                encoded=True
+            )
+
+            async with session.get(correct_url, allow_redirects=False) as response:
+                return soupify(await response.read())

From f47e7374d23b71396b511ee7b57f59d46c34e00d Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Fri, 14 Jan 2022 22:01:45 +0100
Subject: [PATCH 058/224] Use fixed windows path for video cache

---
 CHANGELOG.md                               | 4 +++-
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 9 +++++++--
 PFERD/deduplicator.py                      | 6 ++++++
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 41ee3d5..7f35a90 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,8 +23,10 @@ ambiguous situations.
 ## Unreleased
 
 ### Fixed
-- Shibboleth login fixed. It was broken due to URL parser changes and really
+- Shibboleth login. It was broken due to URL parser changes and really
   *unfortunate* behaviour by aiohttp.
+- local video cache on windows if the path was changed to accomodate windows
+  file system limitations (e.g. replace `:`)
 
 ## 3.3.0 - 2022-01-09
 
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index c26ce8b..b197b6b 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -499,7 +499,7 @@ instance's greatest bottleneck.
             log.explain_topic(f"Checking local cache for video {video_path.name}")
             all_found_locally = True
             for video in contained_videos:
-                transformed_path = self._transformer.transform(video)
+                transformed_path = self._to_local_video_path(video)
                 if transformed_path:
                     exists_locally = self._output_dir.resolve(transformed_path).exists()
                     all_found_locally = all_found_locally and exists_locally
@@ -509,6 +509,11 @@ instance's greatest bottleneck.
             log.explain("Missing at least one video, continuing with requests!")
         return False
 
+    def _to_local_video_path(self, path: PurePath) -> Optional[PurePath]:
+        if transformed := self._transformer.transform(path):
+            return self._deduplicator.fixup_path(transformed)
+        return None
+
     @anoncritical
     @_iorepeat(3, "downloading video")
     async def _download_video(
@@ -528,7 +533,7 @@ instance's greatest bottleneck.
                 log.explain(f"Using single video mode for {element.name}")
                 stream_element = stream_elements[0]
 
-                transformed_path = self._transformer.transform(original_path)
+                transformed_path = self._to_local_video_path(original_path)
                 if not transformed_path:
                     raise CrawlError(f"Download returned a path but transform did not for {original_path}")
 
diff --git a/PFERD/deduplicator.py b/PFERD/deduplicator.py
index ef62dcb..7777f28 100644
--- a/PFERD/deduplicator.py
+++ b/PFERD/deduplicator.py
@@ -56,6 +56,12 @@ class Deduplicator:
             log.explain(f"Changed path to {fmt_path(new_path)} for windows compatibility")
         return new_path
 
+    def fixup_path(self, path: PurePath) -> PurePath:
+        """Fixes up the path for windows, if enabled. Returns the path unchanged otherwise."""
+        if self._windows_paths:
+            return self._fixup_for_windows(path)
+        return path
+
     def mark(self, path: PurePath) -> PurePath:
         if self._windows_paths:
             path = self._fixup_for_windows(path)

From 4f022e2d192552ddef22b169044f2692bc4e1563 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 15 Jan 2022 15:06:02 +0100
Subject: [PATCH 059/224] Reword changelog

---
 CHANGELOG.md                               | 6 ++----
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 2 +-
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7f35a90..76cf836 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,10 +23,8 @@ ambiguous situations.
 ## Unreleased
 
 ### Fixed
-- Shibboleth login. It was broken due to URL parser changes and really
-  *unfortunate* behaviour by aiohttp.
-- local video cache on windows if the path was changed to accomodate windows
-  file system limitations (e.g. replace `:`)
+- ILIAS login
+- Local video cache if `windows_paths` is enabled
 
 ## 3.3.0 - 2022-01-09
 
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index b197b6b..a3e37a9 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -773,7 +773,7 @@ async def _shib_post(session: aiohttp.ClientSession, url: str, data: Any) -> Bea
     """
     aiohttp unescapes '/' and ':' in URL query parameters which is not RFC compliant and rejected
     by Shibboleth. Thanks a lot. So now we unroll the requests manually, parse location headers and
-    build encoded URL objects ourselfs... Who thought mangling location header was a good idea??
+    build encoded URL objects ourselves... Who thought mangling location header was a good idea??
     """
     async with session.post(url, data=data, allow_redirects=False) as response:
         location = response.headers.get("location")

From 86947e4874f0853444e38de0fac4d2ddab5ae41e Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 15 Jan 2022 15:11:22 +0100
Subject: [PATCH 060/224] Bump version to 3.3.1

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 76cf836..d5f9dc6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.3.1 - 2022-01-15
+
 ### Fixed
 - ILIAS login
 - Local video cache if `windows_paths` is enabled
diff --git a/PFERD/version.py b/PFERD/version.py
index ca58f3a..37e91f3 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.3.0"
+VERSION = "3.3.1"

From 7872fe5221c4c8b95b59ffe54f879c1c39e736f3 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 18 Jan 2022 22:32:43 +0100
Subject: [PATCH 061/224] Fix tables with more columns than expected

---
 PFERD/crawl/ilias/kit_ilias_html.py | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 754af16..94b2e4b 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -280,11 +280,22 @@ class IliasPage:
 
     def _listed_video_to_element(self, link: Tag) -> IliasPageElement:
         # The link is part of a table with multiple columns, describing metadata.
-        # 6th child (1 indexed) is the modification time string
-        modification_string = link.parent.parent.parent.select_one(
-            "td.std:nth-child(6)"
-        ).getText().strip()
-        modification_time = datetime.strptime(modification_string, "%d.%m.%Y - %H:%M")
+        # 6th or 7th child (1 indexed) is the modification time string. Try to find it
+        # by parsing backwards from the end and finding something that looks like a date
+        modification_time = None
+        row: Tag = link.parent.parent.parent
+        column_count = len(row.select("td.std"))
+        for index in range(column_count, 0, -1):
+            modification_string = link.parent.parent.parent.select_one(
+                f"td.std:nth-child({index})"
+            ).getText().strip()
+            if re.search(r"\d+\.\d+.\d+ - \d+:\d+", modification_string):
+                modification_time = datetime.strptime(modification_string, "%d.%m.%Y - %H:%M")
+                break
+
+        if modification_time is None:
+            log.warn(f"Could not determine upload time for {link}")
+            modification_time = datetime.now()
 
         title = link.parent.parent.parent.select_one("td.std:nth-child(3)").getText().strip()
         title += ".mp4"

From 86e2e226dcefb98232410cc2289d11a664076adc Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 3 Apr 2022 11:32:38 +0200
Subject: [PATCH 062/224] Notify user when shibboleth presents new entitlements

---
 CHANGELOG.md                               | 2 ++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 6 ++++++
 2 files changed, 8 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d5f9dc6..4e11224 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,8 @@ ambiguous situations.
 ### Fixed
 - ILIAS login
 - Local video cache if `windows_paths` is enabled
+- Report when Shibboleth reviews entitlements
+- Support for video listings with more columns
 
 ## 3.3.0 - 2022-01-09
 
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index a3e37a9..2a5fc87 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -710,6 +710,12 @@ class KitShibbolethLogin:
             }
             soup = await _post(sess, url, data)
 
+            if soup.find(id="attributeRelease"):
+                raise CrawlError(
+                    "ILIAS Shibboleth entitlements changed! "
+                    "Please log in once in your browser and review them"
+                )
+
             if self._tfa_required(soup):
                 soup = await self._authenticate_tfa(sess, soup)
 

From da72863b471c048768a0d8234ba02298b1f9e4c1 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 3 Apr 2022 13:19:08 +0200
Subject: [PATCH 063/224] Placate newer mypy

---
 PFERD/logging.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/logging.py b/PFERD/logging.py
index e2d64fc..e833716 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -68,7 +68,7 @@ class Log:
         if self._download_progress.task_ids:
             elements.append(self._download_progress)
 
-        group = Group(*elements)  # type: ignore
+        group = Group(*elements)
         self._live.update(group)
 
     @contextmanager

From a2831fbea2e8758686677c44645fdd6f3cbc40fa Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 27 Apr 2022 13:55:24 +0200
Subject: [PATCH 064/224] Fix shib authentication

Authentication failed previously if the shib session was still valid.
If Shibboleth gets a request and the session is still valid, it directly
responds without a second redirect.
---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 2a5fc87..571e4d7 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -784,15 +784,19 @@ async def _shib_post(session: aiohttp.ClientSession, url: str, data: Any) -> Bea
     async with session.post(url, data=data, allow_redirects=False) as response:
         location = response.headers.get("location")
         if not location:
-            raise CrawlWarning(f"Login failed, no location header present at {url}")
+            raise CrawlWarning(f"Login failed (1), no location header present at {url}")
         correct_url = yarl.URL(location, encoded=True)
 
         async with session.get(correct_url, allow_redirects=False) as response:
-            as_yarl = yarl.URL(response.url)
             location = response.headers.get("location")
+            # If shib still still has a valid session, it will directly respond to the request
+            if location is None:
+                return soupify(await response.read())
 
+            as_yarl = yarl.URL(response.url)
+            # Probably not needed anymore, but might catch a few weird situations with a nicer message
             if not location or not as_yarl.host:
-                raise CrawlWarning(f"Login failed, no location header present at {correct_url}")
+                raise CrawlWarning(f"Login failed (2), no location header present at {correct_url}")
 
             correct_url = yarl.URL.build(
                 scheme=as_yarl.scheme,

From f17b9b68f4cdc397b029361260d35aad7e778308 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 27 Apr 2022 14:01:40 +0200
Subject: [PATCH 065/224] Add shibboleth authentication fix to changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4e11224..b3da789 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ ambiguous situations.
 - Local video cache if `windows_paths` is enabled
 - Report when Shibboleth reviews entitlements
 - Support for video listings with more columns
+- Authentication when the shib session is still valid
 
 ## 3.3.0 - 2022-01-09
 

From 07a21f80a63dfd4f47dae4dadc8e515334a9891d Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 27 Apr 2022 21:15:33 +0200
Subject: [PATCH 066/224] Link to unofficial packages

---
 CHANGELOG.md | 3 +++
 README.md    | 8 ++++++++
 2 files changed, 11 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b3da789..c64b69a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Added
+- Links to unofficial packages and repology in the readme
+
 ## 3.3.1 - 2022-01-15
 
 ### Fixed
diff --git a/README.md b/README.md
index 836147f..b8b2551 100644
--- a/README.md
+++ b/README.md
@@ -26,6 +26,14 @@ $ pip install --upgrade git+https://github.com/Garmelon/PFERD@latest
 
 The use of [venv](https://docs.python.org/3/library/venv.html) is recommended.
 
+### With package managers
+
+Unofficial packages are available for:
+- [AUR](https://aur.archlinux.org/packages/pferd)
+- [nixpkgs](https://github.com/NixOS/nixpkgs/blob/master/pkgs/tools/misc/pferd/default.nix)
+
+See also PFERD's [repology page](https://repology.org/project/pferd/versions).
+
 ## Basic usage
 
 PFERD can be run directly from the command line with no config file. Run `pferd

From ba3d299c05bae299a3da5c378e9c5f311e78f62f Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 27 Apr 2022 21:23:55 +0200
Subject: [PATCH 067/224] Fix changelog

---
 CHANGELOG.md | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c64b69a..c5480f2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,16 +23,18 @@ ambiguous situations.
 ## Unreleased
 
 ### Added
+- Message when Shibboleth entitlements need to be manually reviewed
+- Support for video listings with more columns
 - Links to unofficial packages and repology in the readme
 
+### Fixed
+- Crash during authentication when the Shibboleth session is still valid
+
 ## 3.3.1 - 2022-01-15
 
 ### Fixed
 - ILIAS login
 - Local video cache if `windows_paths` is enabled
-- Report when Shibboleth reviews entitlements
-- Support for video listings with more columns
-- Authentication when the shib session is still valid
 
 ## 3.3.0 - 2022-01-09
 

From a99ddaa0cc28e04edfc95d541f0b1f6ca885965c Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 27 Apr 2022 21:47:51 +0200
Subject: [PATCH 068/224] Read and write config in UTF-8

---
 PFERD/config.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/PFERD/config.py b/PFERD/config.py
index 0ea7abc..5635573 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -120,7 +120,7 @@ class Config:
         # Using config.read_file instead of config.read because config.read
         # would just ignore a missing file and carry on.
         try:
-            with open(path) as f:
+            with open(path, encoding="utf-8") as f:
                 parser.read_file(f, source=str(path))
         except FileNotFoundError:
             raise ConfigLoadError(path, "File does not exist")
@@ -154,12 +154,12 @@ class Config:
             try:
                 # x = open for exclusive creation, failing if the file already
                 # exists
-                with open(path, "x") as f:
+                with open(path, "x", encoding="utf-8") as f:
                     self._parser.write(f)
             except FileExistsError:
                 print("That file already exists.")
                 if asyncio.run(prompt_yes_no("Overwrite it?", default=False)):
-                    with open(path, "w") as f:
+                    with open(path, "w", encoding="utf-8") as f:
                         self._parser.write(f)
                 else:
                     raise ConfigDumpError(path, "File already exists")

From a709280cbf0bf5dbb62507f9829647862ef5f6bc Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 27 Apr 2022 21:48:09 +0200
Subject: [PATCH 069/224] Try to detect unsupported config file encoding

The encoding detection is quite rudimentary, but should detect the
default windows encoding in many cases.
---
 PFERD/config.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/PFERD/config.py b/PFERD/config.py
index 5635573..8f7e682 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -128,6 +128,8 @@ class Config:
             raise ConfigLoadError(path, "That's a directory, not a file")
         except PermissionError:
             raise ConfigLoadError(path, "Insufficient permissions")
+        except UnicodeDecodeError:
+            raise ConfigLoadError(path, "File is not encoded using UTF-8")
 
     def dump(self, path: Optional[Path] = None) -> None:
         """

From 00db34821825a719712f6bc25420bdfaed9bda11 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 27 Apr 2022 21:53:29 +0200
Subject: [PATCH 070/224] Update changelog

---
 CHANGELOG.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c5480f2..e70d328 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,9 +24,12 @@ ambiguous situations.
 
 ### Added
 - Message when Shibboleth entitlements need to be manually reviewed
-- Support for video listings with more columns
 - Links to unofficial packages and repology in the readme
 
+### Changed
+- Support video listings with more columns
+- Use UTF-8 when reading/writing the config file
+
 ### Fixed
 - Crash during authentication when the Shibboleth session is still valid
 

From 31631fb409d80f7c0cf8dd964da993ef08aa6fe5 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 27 Apr 2022 22:16:47 +0200
Subject: [PATCH 071/224] Increase minimum python version to 3.9

---
 .github/workflows/build-and-release.yml | 2 +-
 CHANGELOG.md                            | 1 +
 README.md                               | 2 +-
 setup.cfg                               | 2 +-
 4 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/build-and-release.yml b/.github/workflows/build-and-release.yml
index 565c4e3..090ac7e 100644
--- a/.github/workflows/build-and-release.yml
+++ b/.github/workflows/build-and-release.yml
@@ -14,7 +14,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, windows-latest, macos-latest]
-        python: ["3.8"]
+        python: ["3.9"]
     steps:
 
       - uses: actions/checkout@v2
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e70d328..7cee430 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,7 @@ ambiguous situations.
 - Links to unofficial packages and repology in the readme
 
 ### Changed
+- Increase minimum supported Python version to 3.9
 - Support video listings with more columns
 - Use UTF-8 when reading/writing the config file
 
diff --git a/README.md b/README.md
index b8b2551..ce917b0 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@ Binaries for Linux, Windows and Mac can be downloaded directly from the
 
 ### With pip
 
-Ensure you have at least Python 3.8 installed. Run the following command to
+Ensure you have at least Python 3.9 installed. Run the following command to
 install PFERD or upgrade it to the latest version:
 
 ```
diff --git a/setup.cfg b/setup.cfg
index 059798a..2378c48 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -4,7 +4,7 @@ version = attr: PFERD.version.VERSION
 
 [options]
 packages = find:
-python_requires = >=3.8
+python_requires = >=3.9
 install_requires =
   aiohttp>=3.8.1
   beautifulsoup4>=4.10.0

From 602044ff1b0b49348a50248f7f93334df979044a Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 27 Apr 2022 22:50:06 +0200
Subject: [PATCH 072/224] Fix mypy errors and add missing await

---
 PFERD/crawl/crawler.py                     |  5 +++--
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 21 ++++++++++-----------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index 53f43e9..0e67c02 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -1,9 +1,10 @@
 import asyncio
 import os
 from abc import ABC, abstractmethod
+from collections.abc import Awaitable, Coroutine
 from datetime import datetime
 from pathlib import Path, PurePath
-from typing import Any, Awaitable, Callable, Dict, List, Optional, Sequence, Set, Tuple, TypeVar
+from typing import Any, Callable, Dict, List, Optional, Sequence, Set, Tuple, TypeVar
 
 from ..auth import Authenticator
 from ..config import Config, Section
@@ -58,7 +59,7 @@ def noncritical(f: Wrapped) -> Wrapped:
     return wrapper  # type: ignore
 
 
-AWrapped = TypeVar("AWrapped", bound=Callable[..., Awaitable[Optional[Any]]])
+AWrapped = TypeVar("AWrapped", bound=Callable[..., Coroutine[Any, Any, Optional[Any]]])
 
 
 def anoncritical(f: AWrapped) -> AWrapped:
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 571e4d7..ae9ebd4 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -1,7 +1,8 @@
 import asyncio
 import re
+from collections.abc import Awaitable, Coroutine
 from pathlib import PurePath
-from typing import Any, Awaitable, Callable, Dict, List, Optional, Set, TypeVar, Union, cast
+from typing import Any, Callable, Dict, List, Optional, Set, Union, cast
 
 import aiohttp
 import yarl
@@ -13,7 +14,7 @@ from ...config import Config
 from ...logging import ProgressBar, log
 from ...output_dir import FileSink, Redownload
 from ...utils import fmt_path, soupify, url_set_query_param
-from ..crawler import CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
+from ..crawler import AWrapped, CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
 from ..http_crawler import HttpCrawler, HttpCrawlerSection
 from .file_templates import Links
 from .kit_ilias_html import IliasElementType, IliasPage, IliasPageElement
@@ -82,8 +83,6 @@ _VIDEO_ELEMENTS: Set[IliasElementType] = set([
     IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED,
 ])
 
-AWrapped = TypeVar("AWrapped", bound=Callable[..., Awaitable[Optional[Any]]])
-
 
 def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Callable[[AWrapped], AWrapped]:
     def decorator(f: AWrapped) -> AWrapped:
@@ -252,7 +251,7 @@ instance's greatest bottleneck.
         url: str,
         parent: IliasPageElement,
         path: PurePath,
-    ) -> Optional[Awaitable[None]]:
+    ) -> Optional[Coroutine[Any, Any, None]]:
         maybe_cl = await self.crawl(path)
         if not maybe_cl:
             return None
@@ -310,7 +309,7 @@ instance's greatest bottleneck.
         self,
         parent_path: PurePath,
         element: IliasPageElement,
-    ) -> Optional[Awaitable[None]]:
+    ) -> Optional[Coroutine[Any, Any, None]]:
         if element.url in self._visited_urls:
             raise CrawlWarning(
                 f"Found second path to element {element.name!r} at {element.url!r}. Aborting subpath"
@@ -360,7 +359,7 @@ instance's greatest bottleneck.
         self,
         element: IliasPageElement,
         element_path: PurePath,
-    ) -> Optional[Awaitable[None]]:
+    ) -> Optional[Coroutine[Any, Any, None]]:
         log.explain_topic(f"Decision: Crawl Link {fmt_path(element_path)}")
         log.explain(f"Links type is {self._links}")
 
@@ -407,7 +406,7 @@ instance's greatest bottleneck.
         self,
         element: IliasPageElement,
         element_path: PurePath,
-    ) -> Optional[Awaitable[None]]:
+    ) -> Optional[Coroutine[Any, Any, None]]:
         log.explain_topic(f"Decision: Crawl Booking Link {fmt_path(element_path)}")
         log.explain(f"Links type is {self._links}")
 
@@ -443,7 +442,7 @@ instance's greatest bottleneck.
             if hdrs.LOCATION not in resp.headers:
                 return soupify(await resp.read()).select_one("a").get("href").strip()
 
-        self._authenticate()
+        await self._authenticate()
 
         async with self.session.get(export_url, allow_redirects=False) as resp:
             # No redirect means we were authenticated
@@ -456,7 +455,7 @@ instance's greatest bottleneck.
         self,
         element: IliasPageElement,
         element_path: PurePath,
-    ) -> Optional[Awaitable[None]]:
+    ) -> Optional[Coroutine[Any, Any, None]]:
         # Copy old mapping as it is likely still relevant
         if self.prev_report:
             self.report.add_custom_value(
@@ -564,7 +563,7 @@ instance's greatest bottleneck.
         self,
         element: IliasPageElement,
         element_path: PurePath,
-    ) -> Optional[Awaitable[None]]:
+    ) -> Optional[Coroutine[Any, Any, None]]:
         maybe_dl = await self.download(element_path, mtime=element.mtime)
         if not maybe_dl:
             return None

From d2e6d918806310a3bcda7a82c74853b7f59eb99f Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 27 Apr 2022 22:50:36 +0200
Subject: [PATCH 073/224] Make PFERD executable via python -m

---
 PFERD/__main__.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index bdf5b34..4faeb13 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -159,3 +159,7 @@ def main() -> None:
         sys.exit(1)
     else:
         pferd.print_report()
+
+
+if __name__ == "__main__":
+    main()

From aa74604d293ec25ae7f94431d4431313dabfc26c Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 29 Apr 2022 23:11:27 +0200
Subject: [PATCH 074/224] Use utf-8 for report

---
 PFERD/output_dir.py | 2 +-
 PFERD/report.py     | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 441717b..c92f4a6 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -503,7 +503,7 @@ class OutputDirectory:
         try:
             self._prev_report = Report.load(self._report_path)
             log.explain("Loaded report successfully")
-        except (OSError, json.JSONDecodeError, ReportLoadError) as e:
+        except (OSError, UnicodeDecodeError, json.JSONDecodeError, ReportLoadError) as e:
             log.explain("Failed to load report")
             log.explain(str(e))
 
diff --git a/PFERD/report.py b/PFERD/report.py
index 0e0c789..0eaaca9 100644
--- a/PFERD/report.py
+++ b/PFERD/report.py
@@ -100,10 +100,10 @@ class Report:
     @classmethod
     def load(cls, path: Path) -> "Report":
         """
-        May raise OSError, JsonDecodeError, ReportLoadError.
+        May raise OSError, UnicodeDecodeError, JsonDecodeError, ReportLoadError.
         """
 
-        with open(path) as f:
+        with open(path, encoding="utf-8") as f:
             data = json.load(f)
 
         if not isinstance(data, dict):
@@ -148,7 +148,7 @@ class Report:
             "encountered_errors": self.encountered_errors,
         }
 
-        with open(path, "w") as f:
+        with open(path, "w", encoding="utf-8") as f:
             json.dump(data, f, indent=2, sort_keys=True)
             f.write("\n")  # json.dump doesn't do this
 

From b56475450de9a00a0ab12bfdf9adf9b5b229f38e Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 29 Apr 2022 23:12:41 +0200
Subject: [PATCH 075/224] Use utf-8 for cookies

---
 PFERD/crawl/http_crawler.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index fa4cf29..44ec4dd 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -108,7 +108,7 @@ class HttpCrawler(Crawler):
 
     def _load_cookies_from_file(self, path: Path) -> None:
         jar: Any = http.cookies.SimpleCookie()
-        with open(path) as f:
+        with open(path, encoding="utf-8") as f:
             for i, line in enumerate(f):
                 # Names of headers are case insensitive
                 if line[:11].lower() == "set-cookie:":
@@ -121,7 +121,7 @@ class HttpCrawler(Crawler):
         jar: Any = http.cookies.SimpleCookie()
         for morsel in self._cookie_jar:
             jar[morsel.key] = morsel
-        with open(path, "w") as f:
+        with open(path, "w", encoding="utf-8") as f:
             f.write(jar.output(sep="\n"))
             f.write("\n")  # A trailing newline is just common courtesy
 

From a8f76e9be76f4bb0ee24030ea252354ede1c8ce4 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 29 Apr 2022 23:15:12 +0200
Subject: [PATCH 076/224] Use utf-8 for credential file

---
 PFERD/auth/credential_file.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/PFERD/auth/credential_file.py b/PFERD/auth/credential_file.py
index d0fcdda..94ffa73 100644
--- a/PFERD/auth/credential_file.py
+++ b/PFERD/auth/credential_file.py
@@ -20,8 +20,10 @@ class CredentialFileAuthenticator(Authenticator):
 
         path = config.default_section.working_dir() / section.path()
         try:
-            with open(path) as f:
+            with open(path, encoding="utf-8") as f:
                 lines = list(f)
+        except UnicodeDecodeError:
+            raise AuthLoadError(f"Credential file at {fmt_real_path(path)} is not encoded using UTF-8")
         except OSError as e:
             raise AuthLoadError(f"No credential file at {fmt_real_path(path)}") from e
 

From a241672726529d1a0ed852b1db2df7968ee6f137 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 1 May 2022 22:29:06 +0200
Subject: [PATCH 077/224] Bump version to 3.4.0

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7cee430..310059a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.4.0 - 2022-05-01
+
 ### Added
 - Message when Shibboleth entitlements need to be manually reviewed
 - Links to unofficial packages and repology in the readme
diff --git a/PFERD/version.py b/PFERD/version.py
index 37e91f3..8102d37 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.3.1"
+VERSION = "3.4.0"

From b8fe25c580a8cafc14c32890f0635c7daecafc4d Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 4 May 2022 14:13:39 +0200
Subject: [PATCH 078/224] Add `.cpp` to ipd link regex

---
 CHANGELOG.md                   | 3 +++
 PFERD/crawl/kit_ipd_crawler.py | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 310059a..22fdd29 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Changed
+- Add `.cpp` to IPD link regex
+
 ## 3.4.0 - 2022-05-01
 
 ### Added
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index 1a5314b..e5ec58f 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -27,7 +27,7 @@ class KitIpdCrawlerSection(HttpCrawlerSection):
         return target
 
     def link_regex(self) -> Pattern[str]:
-        regex = self.s.get("link_regex", r"^.*/[^/]*\.(?:pdf|zip|c|java)$")
+        regex = self.s.get("link_regex", r"^.*/[^/]*\.(?:pdf|zip|c|cpp|java)$")
         return re.compile(regex)
 
 

From afbd03f7774a1c0f22c471d98f995153bb08edcd Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 5 May 2022 14:15:48 +0200
Subject: [PATCH 079/224] Fix docs

---
 CHANGELOG.md | 2 +-
 CONFIG.md    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 22fdd29..f5af29d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,7 +23,7 @@ ambiguous situations.
 ## Unreleased
 
 ### Changed
-- Add `.cpp` to IPD link regex
+- Add `cpp` extension to default `link_regex` of IPD crawler
 
 ## 3.4.0 - 2022-05-01
 
diff --git a/CONFIG.md b/CONFIG.md
index 569780d..1355c34 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -146,7 +146,7 @@ requests is likely a good idea.
 - `target`: URL to a KIT-IPD page
 - `link_regex`: A regex that is matched against the `href` part of links. If it
   matches, the given link is downloaded as a file. This is used to extract
-  files from KIT-IPD pages. (Default: `^.*/[^/]*\.(?:pdf|zip|c|java)$`)
+  files from KIT-IPD pages. (Default: `^.*/[^/]*\.(?:pdf|zip|c|cpp|java)$`)
 
 ### The `kit-ilias-web` crawler
 

From bc3fa36637b5a4f4ea26db1a9437e4cbd5cad5c4 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 5 May 2022 14:20:45 +0200
Subject: [PATCH 080/224] Fix IPD crawler crashing on weird HTML comments

---
 CHANGELOG.md                   | 3 +++
 PFERD/crawl/kit_ipd_crawler.py | 8 +++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f5af29d..de7b795 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,9 @@ ambiguous situations.
 ### Changed
 - Add `cpp` extension to default `link_regex` of IPD crawler
 
+### Fixed
+- IPD crawler crashes on some sites
+
 ## 3.4.0 - 2022-05-01
 
 ### Added
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index e5ec58f..58e71f8 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -161,4 +161,10 @@ class KitIpdCrawler(HttpCrawler):
 
     async def get_page(self) -> BeautifulSoup:
         async with self.session.get(self._url) as request:
-            return soupify(await request.read())
+            # The web page for Algorithmen für Routenplanung contains some
+            # weird comments that beautifulsoup doesn't parse correctly. This
+            # hack enables those pages to be crawled, and should hopefully not
+            # cause issues on other pages.
+            content = (await request.read()).decode("utf-8")
+            content = re.sub(r"<!--.*?-->", "", content)
+            return soupify(content.encode("utf-8"))

From af2cc1169ace7154349518f7f709023eeb76ba95 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 5 May 2022 14:23:19 +0200
Subject: [PATCH 081/224] Mention href for users of link_regex option

---
 CHANGELOG.md                   | 1 +
 PFERD/crawl/kit_ipd_crawler.py | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index de7b795..959fda0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ ambiguous situations.
 
 ### Changed
 - Add `cpp` extension to default `link_regex` of IPD crawler
+- Mention hrefs in IPD crawler for users of `link_regex` option
 
 ### Fixed
 - IPD crawler crashes on some sites
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index 58e71f8..78fe0b1 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -45,7 +45,7 @@ class KitIpdFolder:
     def explain(self) -> None:
         log.explain_topic(f"Folder {self.name!r}")
         for file in self.files:
-            log.explain(f"File {file.name!r}")
+            log.explain(f"File {file.name!r} (href={file.url!r})")
 
     def __hash__(self) -> int:
         return self.name.__hash__()
@@ -113,7 +113,7 @@ class KitIpdCrawler(HttpCrawler):
             else:
                 file = self._extract_file(element)
                 items.add(file)
-                log.explain_topic(f"Orphan file {file.name!r}")
+                log.explain_topic(f"Orphan file {file.name!r} (href={file.url!r})")
                 log.explain("Attributing it to root folder")
 
         return items

From 694ffb4d7711265d768a636cf1843e302485c62d Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 5 May 2022 22:28:30 +0200
Subject: [PATCH 082/224] Fix meeting date parsing

Apparently the new pattern "<relative time qualifier>: <date>," was
added. This patch adds support for it.
---
 PFERD/crawl/ilias/kit_ilias_html.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 94b2e4b..dfe111d 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -763,9 +763,14 @@ def demangle_date(date_str: str, fail_silently: bool = False) -> Optional[dateti
     """
     try:
         date_str = re.sub(r"\s+", " ", date_str)
+        date_str = re.sub("(Gestern|Yesterday):", "", date_str, re.I)
+        date_str = re.sub("(Heute|Today):", "", date_str, re.I)
+        date_str = re.sub("(Morgen|Tomorrow):",  "", date_str, re.I)
+
         date_str = re.sub("Gestern|Yesterday", _format_date_english(_yesterday()), date_str, re.I)
         date_str = re.sub("Heute|Today", _format_date_english(date.today()), date_str, re.I)
         date_str = re.sub("Morgen|Tomorrow",  _format_date_english(_tomorrow()), date_str, re.I)
+        date_str = date_str.strip()
         for german, english in zip(german_months, english_months):
             date_str = date_str.replace(german, english)
             # Remove trailing dots for abbreviations, e.g. "20. Apr. 2020" -> "20. Apr 2020"

From bcc537468c46088f78a037fb28364866e8653bb5 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 5 May 2022 22:53:37 +0200
Subject: [PATCH 083/224] Fix crawling of expanded meetings

The last meeting on every page is expanded by default.
Its content is then shown inline *and* in the meeting page itself.
We should skip the inline content.
---
 PFERD/crawl/ilias/kit_ilias_html.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index dfe111d..d93684c 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -428,6 +428,12 @@ class IliasPage:
             element_type = self._find_type_from_link(element_name, link, abs_url)
             description = self._find_link_description(link)
 
+            # The last meeting on every page is expanded by default.
+            # Its content is then shown inline *and* in the meeting page itself.
+            # We should skip the inline content.
+            if element_type != IliasElementType.MEETING and self._is_in_expanded_meeting(link):
+                continue
+
             if not element_type:
                 continue
             if element_type == IliasElementType.MEETING:
@@ -445,6 +451,26 @@ class IliasPage:
 
         return result
 
+    def _is_in_expanded_meeting(self, tag: Tag) -> bool:
+        """
+        Returns whether a file is part of an expanded meeting.
+        Has false positives for meetings themselves as their title is also "in the expanded meeting content".
+        It is in the same general div and this whole thing is guesswork.
+        Therefore, you should check for meetings before passing them in this function.
+        """
+        parents: List[Tag] = list(tag.parents)
+        for parent in parents:
+            if not parent.get("class"):
+                continue
+
+            # We should not crawl files under meetings
+            if "ilContainerListItemContentCB" in parent.get("class"):
+                link: Tag = parent.parent.find("a")
+                type = IliasPage._find_type_from_folder_like(link, self._page_url)
+                return type == IliasElementType.MEETING
+
+        return False
+
     def _find_upwards_folder_hierarchy(self, tag: Tag) -> List[str]:
         """
         Interprets accordions and expandable blocks as virtual folders and returns them

From 2f0e04ce13ebbc7c7ccaa93e03d8f707f246ceef Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 5 May 2022 22:57:55 +0200
Subject: [PATCH 084/224] Adjust changelog

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 959fda0..4249287 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -28,6 +28,8 @@ ambiguous situations.
 
 ### Fixed
 - IPD crawler crashes on some sites
+- Meeting name normalization for yesterday, today and tomorrow fails
+- Crawling of meeting file previews
 
 ## 3.4.0 - 2022-05-01
 

From 616b0480f7c92afe11c36d2c105c99ba5f960e96 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 8 May 2022 17:39:18 +0200
Subject: [PATCH 085/224] Simplify IPD crawler link regex

---
 CHANGELOG.md                   | 5 +++--
 CONFIG.md                      | 2 +-
 PFERD/crawl/kit_ipd_crawler.py | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4249287..e2d3840 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,11 +24,12 @@ ambiguous situations.
 
 ### Changed
 - Add `cpp` extension to default `link_regex` of IPD crawler
-- Mention hrefs in IPD crawler for users of `link_regex` option
+- Mention hrefs in IPD crawler's `--explain` output for users of `link_regex` option
+- Simplify default IPD crawler `link_regex`
 
 ### Fixed
 - IPD crawler crashes on some sites
-- Meeting name normalization for yesterday, today and tomorrow fails
+- Meeting name normalization for yesterday, today and tomorrow
 - Crawling of meeting file previews
 
 ## 3.4.0 - 2022-05-01
diff --git a/CONFIG.md b/CONFIG.md
index 1355c34..f572a80 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -146,7 +146,7 @@ requests is likely a good idea.
 - `target`: URL to a KIT-IPD page
 - `link_regex`: A regex that is matched against the `href` part of links. If it
   matches, the given link is downloaded as a file. This is used to extract
-  files from KIT-IPD pages. (Default: `^.*/[^/]*\.(?:pdf|zip|c|cpp|java)$`)
+  files from KIT-IPD pages. (Default: `^.*?[^/]+\.(pdf|zip|c|cpp|java)$`)
 
 ### The `kit-ilias-web` crawler
 
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index 78fe0b1..d9fac32 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -27,7 +27,7 @@ class KitIpdCrawlerSection(HttpCrawlerSection):
         return target
 
     def link_regex(self) -> Pattern[str]:
-        regex = self.s.get("link_regex", r"^.*/[^/]*\.(?:pdf|zip|c|cpp|java)$")
+        regex = self.s.get("link_regex", r"^.*?[^/]+\.(pdf|zip|c|cpp|java)$")
         return re.compile(regex)
 
 

From a5015fe9b16d484613a27687f2c122b15e109ba2 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 8 May 2022 23:21:18 +0200
Subject: [PATCH 086/224] Correctly parse day-only meeting dates

I failed to recognize the correct format in the previous adjustment, so
this (hopefully) fixes it for good.
Meetings apparently don't always have a time portion.
---
 PFERD/crawl/ilias/kit_ilias_html.py | 48 +++++++++++++++++++----------
 1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index d93684c..6d063b6 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -746,17 +746,26 @@ class IliasPage:
         Normalizes meeting names, which have a relative time as their first part,
         to their date in ISO format.
         """
-        date_portion_str = meeting_name.split(" - ")[0]
+
+        # This checks whether we can reach a `:` without passing a `-`
+        if re.search(r"^[^-]+: ", meeting_name):
+            # Meeting name only contains date: "05. Jan 2000:"
+            split_delimiter = ":"
+        else:
+            # Meeting name contains date and start/end times: "05. Jan 2000, 16:00 - 17:30:"
+            split_delimiter = ", "
+
+        # We have a meeting day without time
+        date_portion_str = meeting_name.split(split_delimiter)[0]
         date_portion = demangle_date(date_portion_str)
 
+        # We failed to parse the date, bail out
         if not date_portion:
             return meeting_name
 
-        rest_of_name = meeting_name
-        if rest_of_name.startswith(date_portion_str):
-            rest_of_name = rest_of_name[len(date_portion_str):]
-
-        return datetime.strftime(date_portion, "%Y-%m-%d, %H:%M") + rest_of_name
+        # Replace the first section with the absolute date
+        rest_of_name = split_delimiter.join(meeting_name.split(split_delimiter)[1:])
+        return datetime.strftime(date_portion, "%Y-%m-%d") + split_delimiter + rest_of_name
 
     def _abs_url_from_link(self, link_tag: Tag) -> str:
         """
@@ -781,17 +790,15 @@ english_months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep',
 
 def demangle_date(date_str: str, fail_silently: bool = False) -> Optional[datetime]:
     """
-    Demangle a given date in one of the following formats:
+    Demangle a given date in one of the following formats (hour/minute part is optional):
     "Gestern, HH:MM"
     "Heute, HH:MM"
     "Morgen, HH:MM"
     "dd. mon yyyy, HH:MM
     """
     try:
+        # Normalize whitespace because users
         date_str = re.sub(r"\s+", " ", date_str)
-        date_str = re.sub("(Gestern|Yesterday):", "", date_str, re.I)
-        date_str = re.sub("(Heute|Today):", "", date_str, re.I)
-        date_str = re.sub("(Morgen|Tomorrow):",  "", date_str, re.I)
 
         date_str = re.sub("Gestern|Yesterday", _format_date_english(_yesterday()), date_str, re.I)
         date_str = re.sub("Heute|Today", _format_date_english(date.today()), date_str, re.I)
@@ -802,19 +809,28 @@ def demangle_date(date_str: str, fail_silently: bool = False) -> Optional[dateti
             # Remove trailing dots for abbreviations, e.g. "20. Apr. 2020" -> "20. Apr 2020"
             date_str = date_str.replace(english + ".", english)
 
-        # We now have a nice english String in the format: "dd. mmm yyyy, hh:mm"
-        day_part, time_part = date_str.split(",")
+        # We now have a nice english String in the format: "dd. mmm yyyy, hh:mm" or "dd. mmm yyyy"
+
+        # Check if we have a time as well
+        if ", " in date_str:
+            day_part, time_part = date_str.split(",")
+        else:
+            day_part = date_str.split(",")[0]
+            time_part = None
+
         day_str, month_str, year_str = day_part.split(" ")
 
         day = int(day_str.strip().replace(".", ""))
         month = english_months.index(month_str.strip()) + 1
         year = int(year_str.strip())
 
-        hour_str, minute_str = time_part.split(":")
-        hour = int(hour_str)
-        minute = int(minute_str)
+        if time_part:
+            hour_str, minute_str = time_part.split(":")
+            hour = int(hour_str)
+            minute = int(minute_str)
+            return datetime(year, month, day, hour, minute)
 
-        return datetime(year, month, day, hour, minute)
+        return datetime(year, month, day)
     except Exception:
         if not fail_silently:
             log.warn(f"Date parsing failed for {date_str!r}")

From 846c29aee1867f7f0b7efae802af47fee77a3ec6 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 11 May 2022 21:16:09 +0200
Subject: [PATCH 087/224] Download page descriptions

---
 CHANGELOG.md                               |  3 +
 PFERD/crawl/ilias/ilias_html_cleaner.py    | 91 ++++++++++++++++++++++
 PFERD/crawl/ilias/kit_ilias_html.py        | 25 ++++++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 29 +++++++
 4 files changed, 148 insertions(+)
 create mode 100644 PFERD/crawl/ilias/ilias_html_cleaner.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e2d3840..b7cad13 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Added
+- Download of page descriptions
+
 ### Changed
 - Add `cpp` extension to default `link_regex` of IPD crawler
 - Mention hrefs in IPD crawler's `--explain` output for users of `link_regex` option
diff --git a/PFERD/crawl/ilias/ilias_html_cleaner.py b/PFERD/crawl/ilias/ilias_html_cleaner.py
new file mode 100644
index 0000000..5952309
--- /dev/null
+++ b/PFERD/crawl/ilias/ilias_html_cleaner.py
@@ -0,0 +1,91 @@
+from bs4 import BeautifulSoup, Comment, Tag
+
+_STYLE_TAG_CONTENT = """
+    .ilc_text_block_Information {
+      background-color: #f5f7fa;
+    }
+    div.ilc_text_block_Standard {
+      margin-bottom: 10px;
+      margin-top: 10px;
+    }
+    span.ilc_text_inline_Strong {
+      font-weight: bold;
+    }
+
+    .accordion-head {
+      background-color: #f5f7fa;
+      padding: 0.5rem 0;
+    }
+
+    h3 {
+      margin-top: 0.5rem;
+      margin-bottom: 1rem;
+    }
+
+    br.visible-break {
+      margin-bottom: 1rem;
+    }
+
+    article {
+      margin: 0.5rem 0;
+    }
+
+    body {
+      padding: 1em;
+      grid-template-columns: 1fr min(60rem, 90%) 1fr;
+      line-height: 1.2;
+    }
+"""
+
+_ARTICLE_WORTHY_CLASSES = [
+    "ilc_text_block_Information",
+    "ilc_section_Attention",
+    "ilc_section_Link",
+]
+
+
+def insert_base_markup(soup: BeautifulSoup) -> BeautifulSoup:
+    head = soup.new_tag("head")
+    soup.insert(0, head)
+
+    simplecss_link: Tag = soup.new_tag("link")
+    # <link rel="stylesheet" href="https://cdn.simplecss.org/simple.css">
+    simplecss_link["rel"] = "stylesheet"
+    simplecss_link["href"] = "https://cdn.simplecss.org/simple.css"
+    head.append(simplecss_link)
+
+    # Basic style tags for compat
+    style: Tag = soup.new_tag("style")
+    style.append(_STYLE_TAG_CONTENT)
+    head.append(style)
+
+    return soup
+
+
+def clean(soup: BeautifulSoup) -> BeautifulSoup:
+    for block in soup.find_all(class_=lambda x: x in _ARTICLE_WORTHY_CLASSES):
+        block.name = "article"
+
+    for block in soup.find_all("h3"):
+        block.name = "div"
+
+    for block in soup.find_all("h1"):
+        block.name = "h3"
+
+    for block in soup.find_all(class_="ilc_va_ihcap_VAccordIHeadCap"):
+        block.name = "h3"
+        block["class"] += ["accordion-head"]
+
+    for dummy in soup.select(".ilc_text_block_Standard.ilc_Paragraph"):
+        children = list(dummy.children)
+        if not children:
+            dummy.decompose()
+        if len(children) > 1:
+            continue
+        if type(children[0]) == Comment:
+            dummy.decompose()
+
+    for hrule_imposter in soup.find_all(class_="ilc_section_Separator"):
+        hrule_imposter.insert(0, soup.new_tag("hr"))
+
+    return soup
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 6d063b6..d58e5c8 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -85,6 +85,31 @@ class IliasPage:
         log.explain("Page is a normal folder, searching for elements")
         return self._find_normal_entries()
 
+    def get_description(self) -> Optional[BeautifulSoup]:
+        def is_interesting_class(name: str) -> bool:
+            return name in ["ilCOPageSection", "ilc_Paragraph", "ilc_va_ihcap_VAccordIHeadCap"]
+
+        paragraphs: List[Tag] = self._soup.findAll(class_=is_interesting_class)
+        if not paragraphs:
+            return None
+
+        # Extract bits and pieces into a string and parse it again.
+        # This ensures we don't miss anything and weird structures are resolved
+        # somewhat gracefully.
+        raw_html = ""
+        for p in paragraphs:
+            if p.find_parent(class_=is_interesting_class):
+                continue
+
+            # Ignore special listings (like folder groupings)
+            if "ilc_section_Special" in p["class"]:
+                continue
+
+            raw_html += str(p) + "\n"
+        raw_html = f"<body>\n{raw_html}\n</body>"
+
+        return BeautifulSoup(raw_html, "html.parser")
+
     def get_next_stage_element(self) -> Optional[IliasPageElement]:
         if self._is_ilias_opencast_embedding():
             return self.get_child_elements()[0]
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index ae9ebd4..bbed986 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -17,6 +17,7 @@ from ...utils import fmt_path, soupify, url_set_query_param
 from ..crawler import AWrapped, CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
 from ..http_crawler import HttpCrawler, HttpCrawlerSection
 from .file_templates import Links
+from .ilias_html_cleaner import clean, insert_base_markup
 from .kit_ilias_html import IliasElementType, IliasPage, IliasPageElement
 
 TargetType = Union[str, int]
@@ -215,6 +216,8 @@ instance's greatest bottleneck.
         cl = maybe_cl  # Not mypy's fault, but explained here: https://github.com/python/mypy/issues/2608
 
         elements: List[IliasPageElement] = []
+        # A list as variable redefinitions are not propagated to outer scopes
+        description: List[BeautifulSoup] = []
 
         @_iorepeat(3, "crawling url")
         async def gather_elements() -> None:
@@ -233,9 +236,15 @@ instance's greatest bottleneck.
                 page = IliasPage(soup, url, None)
                 elements.extend(page.get_child_elements())
 
+                if description_string := page.get_description():
+                    description.append(description_string)
+
         # Fill up our task list with the found elements
         await gather_elements()
 
+        if description:
+            await self._download_description(PurePath("."), description[0])
+
         elements.sort(key=lambda e: e.id())
 
         tasks: List[Awaitable[None]] = []
@@ -265,6 +274,8 @@ instance's greatest bottleneck.
         cl: CrawlToken,
     ) -> None:
         elements: List[IliasPageElement] = []
+        # A list as variable redefinitions are not propagated to outer scopes
+        description: List[BeautifulSoup] = []
 
         @_iorepeat(3, "crawling folder")
         async def gather_elements() -> None:
@@ -285,10 +296,15 @@ instance's greatest bottleneck.
                         next_stage_url = None
 
                 elements.extend(page.get_child_elements())
+                if description_string := page.get_description():
+                    description.append(description_string)
 
         # Fill up our task list with the found elements
         await gather_elements()
 
+        if description:
+            await self._download_description(PurePath("."), description[0])
+
         elements.sort(key=lambda e: e.id())
 
         tasks: List[Awaitable[None]] = []
@@ -425,6 +441,19 @@ instance's greatest bottleneck.
 
         return self._download_booking(element, link_template_maybe, maybe_dl)
 
+    @anoncritical
+    @_iorepeat(1, "downloading description")
+    async def _download_description(self, parent_path: PurePath, description: BeautifulSoup) -> None:
+        path = parent_path / "Description.html"
+        dl = await self.download(path, redownload=Redownload.ALWAYS)
+        if not dl:
+            return
+
+        async with dl as (bar, sink):
+            description = clean(insert_base_markup(description))
+            sink.file.write(description.prettify().encode("utf-8"))
+            sink.done()
+
     @anoncritical
     @_iorepeat(3, "resolving booking")
     async def _download_booking(

From 46fb782798725b6fde76b71cf7a4d90912ea2c7d Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 24 May 2022 23:28:09 +0200
Subject: [PATCH 088/224] Add forum crawling

This downloads all forum posts when needed and saves each thread in its
own html file, named after the thread title.
---
 CHANGELOG.md                               |   1 +
 PFERD/cli/command_kit_ilias_web.py         |   7 ++
 PFERD/crawl/ilias/kit_ilias_html.py        |  90 ++++++++++++++-
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 122 ++++++++++++++++++---
 PFERD/logging.py                           |   4 +-
 5 files changed, 208 insertions(+), 16 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b7cad13..1d70c4a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ ambiguous situations.
 
 ### Added
 - Download of page descriptions
+- Forum download support
 
 ### Changed
 - Add `cpp` extension to default `link_regex` of IPD crawler
diff --git a/PFERD/cli/command_kit_ilias_web.py b/PFERD/cli/command_kit_ilias_web.py
index 12803a6..de74fc3 100644
--- a/PFERD/cli/command_kit_ilias_web.py
+++ b/PFERD/cli/command_kit_ilias_web.py
@@ -62,6 +62,11 @@ GROUP.add_argument(
     action=BooleanOptionalAction,
     help="crawl and download videos"
 )
+GROUP.add_argument(
+    "--forums",
+    action=BooleanOptionalAction,
+    help="crawl and download forum posts"
+)
 GROUP.add_argument(
     "--http-timeout", "-t",
     type=float,
@@ -90,6 +95,8 @@ def load(
         section["link_redirect_delay"] = str(args.link_redirect_delay)
     if args.videos is not None:
         section["videos"] = "yes" if args.videos else "no"
+    if args.forums is not None:
+        section["forums"] = "yes" if args.forums else "no"
     if args.http_timeout is not None:
         section["http_timeout"] = str(args.http_timeout)
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index d58e5c8..7bab152 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -3,7 +3,7 @@ import re
 from dataclasses import dataclass
 from datetime import date, datetime, timedelta
 from enum import Enum
-from typing import List, Optional, Union
+from typing import Dict, List, Optional, Union
 from urllib.parse import urljoin, urlparse
 
 from bs4 import BeautifulSoup, Tag
@@ -55,6 +55,20 @@ class IliasPageElement:
         return self.url
 
 
+@dataclass
+class IliasDownloadForumData:
+    url: str
+    form_data: Dict[str, Union[str, List[str]]]
+
+
+@dataclass
+class IliasForumThread:
+    title: str
+    title_tag: Tag
+    content_tag: Tag
+    mtime: Optional[datetime]
+
+
 class IliasPage:
 
     def __init__(self, soup: BeautifulSoup, _page_url: str, source_element: Optional[IliasPageElement]):
@@ -110,13 +124,39 @@ class IliasPage:
 
         return BeautifulSoup(raw_html, "html.parser")
 
+    def get_download_forum_data(self) -> Optional[IliasDownloadForumData]:
+        form = self._soup.find("form", attrs={"action": lambda x: x and "fallbackCmd=showThreads" in x})
+        if not form:
+            return None
+        post_url = self._abs_url_from_relative(form["action"])
+
+        form_data: Dict[str, Union[str, List[ſtr]]] = {
+            "thread_ids[]": [f["value"] for f in form.find_all(attrs={"name": "thread_ids[]"})],
+            "selected_cmd2": "html",
+            "select_cmd2": "Ausführen",
+            "selected_cmd": "",
+        }
+
+        return IliasDownloadForumData(post_url, form_data)
+
     def get_next_stage_element(self) -> Optional[IliasPageElement]:
+        if self._is_forum_page():
+            if "trows=800" in self._page_url:
+                return None
+            return self._get_show_max_forum_entries_per_page_url()
         if self._is_ilias_opencast_embedding():
             return self.get_child_elements()[0]
         if self._page_type == IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED:
             return self._find_video_entries_paginated()[0]
         return None
 
+    def _is_forum_page(self) -> bool:
+        read_more_btn = self._soup.find(
+            "button",
+            attrs={"onclick": lambda x: x and "cmdClass=ilobjforumgui&cmd=markAllRead" in x}
+        )
+        return read_more_btn is not None
+
     def _is_video_player(self) -> bool:
         return "paella_config_file" in str(self._soup)
 
@@ -194,6 +234,19 @@ class IliasPage:
 
         return items
 
+    def _get_show_max_forum_entries_per_page_url(self) -> Optional[IliasPageElement]:
+        correct_link = self._soup.find(
+            "a",
+            attrs={"href": lambda x: x and "trows=800" in x and "cmd=showThreads" in x}
+        )
+
+        if not correct_link:
+            return None
+
+        link = self._abs_url_from_link(correct_link)
+
+        return IliasPageElement(IliasElementType.FORUM, link, "show all forum threads")
+
     def _find_personal_desktop_entries(self) -> List[IliasPageElement]:
         items: List[IliasPageElement] = []
 
@@ -877,3 +930,38 @@ def _tomorrow() -> date:
 
 def _sanitize_path_name(name: str) -> str:
     return name.replace("/", "-").replace("\\", "-").strip()
+
+
+def parse_ilias_forum_export(forum_export: BeautifulSoup) -> List[IliasForumThread]:
+    elements = []
+    for p in forum_export.select("body > p"):
+        title_tag = p
+        content_tag = p.find_next_sibling("ul")
+        title = p.find("b").text
+        if ":" in title:
+            title = title[title.find(":") + 1:]
+        title = title.strip()
+        mtime = _guess_timestamp_from_forum_post_content(content_tag)
+        elements.append(IliasForumThread(title, title_tag, content_tag, mtime))
+
+    return elements
+
+
+def _guess_timestamp_from_forum_post_content(content: Tag) -> Optional[datetime]:
+    posts: Optional[Tag] = content.select(".ilFrmPostHeader > span.small")
+    if not posts:
+        return None
+
+    newest_date: Optional[datetime] = None
+
+    for post in posts:
+        text = post.text.strip()
+        text = text[text.rfind("|") + 1:]
+        date = demangle_date(text, fail_silently=True)
+        if not date:
+            continue
+
+        if not newest_date or newest_date < date:
+            newest_date = date
+
+    return newest_date
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index bbed986..156cd4c 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -18,7 +18,8 @@ from ..crawler import AWrapped, CrawlError, CrawlToken, CrawlWarning, DownloadTo
 from ..http_crawler import HttpCrawler, HttpCrawlerSection
 from .file_templates import Links
 from .ilias_html_cleaner import clean, insert_base_markup
-from .kit_ilias_html import IliasElementType, IliasPage, IliasPageElement
+from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasPage, IliasPageElement,
+                             _sanitize_path_name, parse_ilias_forum_export)
 
 TargetType = Union[str, int]
 
@@ -67,6 +68,9 @@ class KitIliasWebCrawlerSection(HttpCrawlerSection):
     def videos(self) -> bool:
         return self.s.getboolean("videos", fallback=False)
 
+    def forums(self) -> bool:
+        return self.s.getboolean("forums", fallback=False)
+
 
 _DIRECTORY_PAGES: Set[IliasElementType] = set([
     IliasElementType.EXERCISE,
@@ -183,6 +187,7 @@ instance's greatest bottleneck.
         self._link_file_redirect_delay = section.link_redirect_delay()
         self._links = section.links()
         self._videos = section.videos()
+        self._forums = section.forums()
         self._visited_urls: Set[str] = set()
 
     async def _run(self) -> None:
@@ -335,22 +340,27 @@ instance's greatest bottleneck.
         element_path = PurePath(parent_path, element.name)
 
         if element.type in _VIDEO_ELEMENTS:
-            log.explain_topic(f"Decision: Crawl video element {fmt_path(element_path)}")
             if not self._videos:
-                log.explain("Video crawling is disabled")
-                log.explain("Answer: no")
+                log.status(
+                    "[bold bright_black]",
+                    "Ignored",
+                    fmt_path(element_path),
+                    "[bright_black](enable with option 'videos')"
+                )
                 return None
-            else:
-                log.explain("Video crawling is enabled")
-                log.explain("Answer: yes")
 
         if element.type == IliasElementType.FILE:
             return await self._handle_file(element, element_path)
         elif element.type == IliasElementType.FORUM:
-            log.explain_topic(f"Decision: Crawl {fmt_path(element_path)}")
-            log.explain("Forums are not supported")
-            log.explain("Answer: No")
-            return None
+            if not self._forums:
+                log.status(
+                    "[bold bright_black]",
+                    "Ignored",
+                    fmt_path(element_path),
+                    "[bright_black](enable with option 'forums')"
+                )
+                return None
+            return await self._handle_forum(element, element_path)
         elif element.type == IliasElementType.TEST:
             log.explain_topic(f"Decision: Crawl {fmt_path(element_path)}")
             log.explain("Tests contain no relevant files")
@@ -635,6 +645,68 @@ instance's greatest bottleneck.
         if not await try_stream():
             raise CrawlError("File streaming failed after authenticate()")
 
+    async def _handle_forum(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Coroutine[Any, Any, None]]:
+        maybe_cl = await self.crawl(element_path)
+        if not maybe_cl:
+            return None
+        return self._crawl_forum(element, maybe_cl)
+
+    @_iorepeat(3, "crawling forum")
+    @anoncritical
+    async def _crawl_forum(self, element: IliasPageElement, cl: CrawlToken) -> None:
+        elements = []
+
+        async with cl:
+            next_stage_url = element.url
+            while next_stage_url:
+                log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
+                log.explain(f"URL: {next_stage_url}")
+
+                soup = await self._get_page(next_stage_url)
+                page = IliasPage(soup, next_stage_url, None)
+
+                if next := page.get_next_stage_element():
+                    next_stage_url = next.url
+                else:
+                    break
+
+            download_data = page.get_download_forum_data()
+            if not download_data:
+                raise CrawlWarning("Failed to extract forum data")
+            html = await self._post_authenticated(download_data.url, download_data.form_data)
+            elements = parse_ilias_forum_export(soupify(html))
+
+        elements.sort(key=lambda elem: elem.title)
+
+        tasks: List[Awaitable[None]] = []
+        for elem in elements:
+            tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, elem)))
+
+        # And execute them
+        await self.gather(tasks)
+
+    @anoncritical
+    @_iorepeat(3, "saving forum thread")
+    async def _download_forum_thread(
+        self,
+        parent_path: PurePath,
+        element: IliasForumThread,
+    ) -> None:
+        path = parent_path / (_sanitize_path_name(element.title) + ".html")
+        maybe_dl = await self.download(path, mtime=element.mtime)
+        if not maybe_dl:
+            return
+
+        async with maybe_dl as (bar, sink):
+            content = element.title_tag.prettify()
+            content += element.content_tag.prettify()
+            sink.file.write(content.encode("utf-8"))
+            sink.done()
+
     async def _get_page(self, url: str) -> BeautifulSoup:
         auth_id = await self._current_auth_id()
         async with self.session.get(url) as request:
@@ -652,13 +724,37 @@ instance's greatest bottleneck.
                 return soup
         raise CrawlError("get_page failed even after authenticating")
 
+    async def _post_authenticated(
+        self,
+        url: str,
+        data: dict[str, Union[str, List[str]]]
+    ) -> BeautifulSoup:
+        auth_id = await self._current_auth_id()
+
+        form_data = aiohttp.FormData()
+        for key, val in data.items():
+            form_data.add_field(key, val)
+
+        async with self.session.post(url, data=form_data(), allow_redirects=False) as request:
+            if request.status == 200:
+                return await request.read()
+
+        # We weren't authenticated, so try to do that
+        await self.authenticate(auth_id)
+
+        # Retry once after authenticating. If this fails, we will die.
+        async with self.session.post(url, data=data, allow_redirects=False) as request:
+            if request.status == 200:
+                return await request.read()
+        raise CrawlError("post_authenticated failed even after authenticating")
+
     # We repeat this as the login method in shibboleth doesn't handle I/O errors.
     # Shibboleth is quite reliable as well, the repeat is likely not critical here.
-    @_iorepeat(3, "Login", failure_is_error=True)
+    @ _iorepeat(3, "Login", failure_is_error=True)
     async def _authenticate(self) -> None:
         await self._shibboleth_login.login(self.session)
 
-    @staticmethod
+    @ staticmethod
     def _is_logged_in(soup: BeautifulSoup) -> bool:
         # Normal ILIAS pages
         mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")
diff --git a/PFERD/logging.py b/PFERD/logging.py
index e833716..340b21f 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -197,7 +197,7 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
         if self.output_explain:
             self.print(f"  {escape(text)}")
 
-    def status(self, style: str, action: str, text: str) -> None:
+    def status(self, style: str, action: str, text: str, suffix: str = "") -> None:
         """
         Print a status update while crawling. Allows markup in the "style"
         argument which will be applied to the "action" string.
@@ -205,7 +205,7 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
 
         if self.output_status:
             action = escape(f"{action:<{self.STATUS_WIDTH}}")
-            self.print(f"{style}{action}[/] {escape(text)}")
+            self.print(f"{style}{action}[/] {escape(text)} {suffix}")
 
     def report(self, text: str) -> None:
         """

From ed24366aba7cfb8ca3cdd0df7b2650bc1220437f Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 15 Jan 2022 16:23:37 +0100
Subject: [PATCH 089/224] Add pass authenticator

---
 CHANGELOG.md           |  1 +
 CONFIG.md              | 21 ++++++++-
 PFERD/auth/__init__.py |  3 ++
 PFERD/auth/pass_.py    | 98 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 121 insertions(+), 2 deletions(-)
 create mode 100644 PFERD/auth/pass_.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1d70c4a..bc9f3e5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ ambiguous situations.
 ### Added
 - Download of page descriptions
 - Forum download support
+- `pass` authenticator
 
 ### Changed
 - Add `cpp` extension to default `link_regex` of IPD crawler
diff --git a/CONFIG.md b/CONFIG.md
index f572a80..0f114ed 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -223,6 +223,23 @@ is stored in the keyring.
 - `keyring_name`: The service name PFERD uses for storing credentials. (Default:
   `PFERD`)
 
+### The `pass` authenticator
+
+This authenticator queries the [`pass` password manager][3] for a username and
+password. It tries to be mostly compatible with [browserpass][4] and
+[passff][5], so see those links for an overview of the format. If PFERD fails
+to load your password, you can use the `--explain` flag to see why.
+
+- `passname`: The name of the password to use (Required)
+- `username_prefixes`: A comma-separated list of username line prefixes
+  (Default: `login,username,user`)
+- `password_prefixes`: A comma-separated list of password line prefixes
+  (Default: `password,pass,secret`)
+
+[3]: <https://www.passwordstore.org/> "Pass: The Standard Unix Password Manager"
+[4]: <https://github.com/browserpass/browserpass-extension#organizing-password-store> "Organizing password store"
+[5]: <https://github.com/passff/passff#multi-line-format> "Multi-line format"
+
 ### The `tfa` authenticator
 
 This authenticator prompts the user on the console for a two-factor
@@ -316,7 +333,7 @@ is a regular expression and `TARGET` an f-string based template. If a path
 matches `SOURCE`, the output path is created using `TARGET` as template.
 `SOURCE` is automatically anchored.
 
-`TARGET` uses Python's [format string syntax][3]. The *n*-th capturing group can
+`TARGET` uses Python's [format string syntax][6]. The *n*-th capturing group can
 be referred to as `{g<n>}` (e.g. `{g3}`). `{g0}` refers to the original path.
 If capturing group *n*'s contents are a valid integer, the integer value is
 available as `{i<n>}` (e.g. `{i3}`). If capturing group *n*'s contents are a
@@ -337,7 +354,7 @@ Example: `f(oo+)/be?ar -re-> B{g1.upper()}H/fear`
 - Converts `fooooo/bear` into `BOOOOOH/fear`
 - Converts `foo/bar/baz` into `BOOH/fear/baz`
 
-[3]: <https://docs.python.org/3/library/string.html#format-string-syntax> "Format String Syntax"
+[6]: <https://docs.python.org/3/library/string.html#format-string-syntax> "Format String Syntax"
 
 ### The `-name-re->` arrow
 
diff --git a/PFERD/auth/__init__.py b/PFERD/auth/__init__.py
index 277cade..aa3ba8e 100644
--- a/PFERD/auth/__init__.py
+++ b/PFERD/auth/__init__.py
@@ -5,6 +5,7 @@ from ..config import Config
 from .authenticator import Authenticator, AuthError, AuthLoadError, AuthSection  # noqa: F401
 from .credential_file import CredentialFileAuthenticator, CredentialFileAuthSection
 from .keyring import KeyringAuthenticator, KeyringAuthSection
+from .pass_ import PassAuthenticator, PassAuthSection
 from .simple import SimpleAuthenticator, SimpleAuthSection
 from .tfa import TfaAuthenticator
 
@@ -19,6 +20,8 @@ AUTHENTICATORS: Dict[str, AuthConstructor] = {
         CredentialFileAuthenticator(n, CredentialFileAuthSection(s), c),
     "keyring": lambda n, s, c:
         KeyringAuthenticator(n, KeyringAuthSection(s)),
+    "pass": lambda n, s, c:
+        PassAuthenticator(n, PassAuthSection(s)),
     "simple": lambda n, s, c:
         SimpleAuthenticator(n, SimpleAuthSection(s)),
     "tfa": lambda n, s, c:
diff --git a/PFERD/auth/pass_.py b/PFERD/auth/pass_.py
new file mode 100644
index 0000000..4c8e775
--- /dev/null
+++ b/PFERD/auth/pass_.py
@@ -0,0 +1,98 @@
+import re
+import subprocess
+from typing import List, Tuple
+
+from ..logging import log
+from .authenticator import Authenticator, AuthError, AuthSection
+
+
+class PassAuthSection(AuthSection):
+    def passname(self) -> str:
+        if (value := self.s.get("passname")) is None:
+            self.missing_value("passname")
+        return value
+
+    def username_prefixes(self) -> List[str]:
+        value = self.s.get("username_prefixes", "login,username,user")
+        return [prefix.lower() for prefix in value.split(",")]
+
+    def password_prefixes(self) -> List[str]:
+        value = self.s.get("password_prefixes", "password,pass,secret")
+        return [prefix.lower() for prefix in value.split(",")]
+
+
+class PassAuthenticator(Authenticator):
+    PREFIXED_LINE_RE = r"([a-zA-Z]+):\s?(.*)"  # to be used with fullmatch
+
+    def __init__(self, name: str, section: PassAuthSection) -> None:
+        super().__init__(name)
+
+        self._passname = section.passname()
+        self._username_prefixes = section.username_prefixes()
+        self._password_prefixes = section.password_prefixes()
+
+    async def credentials(self) -> Tuple[str, str]:
+        log.explain_topic("Obtaining credentials from pass")
+
+        try:
+            log.explain(f"Calling 'pass show {self._passname}'")
+            result = subprocess.check_output(["pass", "show", self._passname], text=True)
+        except subprocess.CalledProcessError as e:
+            raise AuthError(f"Failed to get password info from {self._passname}: {e}")
+
+        prefixed = {}
+        unprefixed = []
+        for line in result.strip().splitlines():
+            if match := re.fullmatch(self.PREFIXED_LINE_RE, line):
+                prefix = match.group(1).lower()
+                value = match.group(2)
+                log.explain(f"Found prefixed line {line!r} with prefix {prefix!r}, value {value!r}")
+                if prefix in prefixed:
+                    raise AuthError(f"Prefix {prefix} specified multiple times")
+                prefixed[prefix] = value
+            else:
+                log.explain(f"Found unprefixed line {line!r}")
+                unprefixed.append(line)
+
+        username = None
+        for prefix in self._username_prefixes:
+            log.explain(f"Looking for username at prefix {prefix!r}")
+            if prefix in prefixed:
+                username = prefixed[prefix]
+                log.explain(f"Found username {username!r}")
+                break
+
+        password = None
+        for prefix in self._password_prefixes:
+            log.explain(f"Looking for password at prefix {prefix!r}")
+            if prefix in prefixed:
+                password = prefixed[prefix]
+                log.explain(f"Found password {password!r}")
+                break
+
+        if password is None and username is None:
+            log.explain("No username and password found so far")
+            log.explain("Using first unprefixed line as password")
+            log.explain("Using second unprefixed line as username")
+        elif password is None:
+            log.explain("No password found so far")
+            log.explain("Using first unprefixed line as password")
+        elif username is None:
+            log.explain("No username found so far")
+            log.explain("Using first unprefixed line as username")
+
+        if password is None:
+            if not unprefixed:
+                log.explain("Not enough unprefixed lines left")
+                raise AuthError("Password could not be determined")
+            password = unprefixed.pop(0)
+            log.explain(f"Found password {password!r}")
+
+        if username is None:
+            if not unprefixed:
+                log.explain("Not enough unprefixed lines left")
+                raise AuthError("Username could not be determined")
+            username = unprefixed.pop(0)
+            log.explain(f"Found username {username!r}")
+
+        return username, password

From 345f52a1f6f55eecf6c31d3cc1a4350c5200087d Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 14 Aug 2022 21:41:29 +0200
Subject: [PATCH 090/224] Detect new login button

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 156cd4c..c99a920 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -759,7 +759,7 @@ instance's greatest bottleneck.
         # Normal ILIAS pages
         mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")
         if mainbar is not None:
-            login_button = mainbar.find("button", attrs={"data-action": lambda x: x and "login.php" in x})
+            login_button = mainbar.find(attrs={"href": lambda x: x and "login.php" in x})
             shib_login = soup.find(id="button_shib_login")
             return not login_button and not shib_login
 

From d9b111cec252f4b1810f06b0f2ca551cb5cdb2a2 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 14 Aug 2022 21:45:33 +0200
Subject: [PATCH 091/224] Correctly nest description entries

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index c99a920..1852c5f 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -308,7 +308,7 @@ instance's greatest bottleneck.
         await gather_elements()
 
         if description:
-            await self._download_description(PurePath("."), description[0])
+            await self._download_description(cl.path, description[0])
 
         elements.sort(key=lambda e: e.id())
 

From aa5a3a10bcbfa0dd54a0dc1a533625f76b2d6ed8 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 14 Aug 2022 21:48:59 +0200
Subject: [PATCH 092/224] Adjust changelog

---
 CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bc9f3e5..7f35c9c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -36,6 +36,9 @@ ambiguous situations.
 - IPD crawler crashes on some sites
 - Meeting name normalization for yesterday, today and tomorrow
 - Crawling of meeting file previews
+- Login with new login button html layout
+- Descriptions for courses are now placed in the correct subfolder when
+  downloading the whole desktop
 
 ## 3.4.0 - 2022-05-01
 

From 66a5b1ba0223848f713192b084f2dcd26a18dbe5 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 17 Aug 2022 13:24:01 +0200
Subject: [PATCH 093/224] Bump version to 3.4.1

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7f35c9c..671d48a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.4.1 - 2022-08-17
+
 ### Added
 - Download of page descriptions
 - Forum download support
diff --git a/PFERD/version.py b/PFERD/version.py
index 8102d37..8832a51 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.4.0"
+VERSION = "3.4.1"

From 4a51aaa4f5a1b3382f0bed59f1292fc0952c2832 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 19 Oct 2022 22:59:33 +0200
Subject: [PATCH 094/224] Fix forum crawling crashing for empty threads

---
 CHANGELOG.md                        | 3 +++
 PFERD/crawl/ilias/kit_ilias_html.py | 7 +++++++
 2 files changed, 10 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 671d48a..70d2cd5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Fixed
+- Forum crawling crashing when parsing empty (= 0 messages) threads
+
 ## 3.4.1 - 2022-08-17
 
 ### Added
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 7bab152..8795512 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -937,6 +937,13 @@ def parse_ilias_forum_export(forum_export: BeautifulSoup) -> List[IliasForumThre
     for p in forum_export.select("body > p"):
         title_tag = p
         content_tag = p.find_next_sibling("ul")
+
+        if not content_tag:
+            # ILIAS allows users to delete the initial post while keeping the thread open
+            # This produces empty threads without *any* content.
+            # I am not sure why you would want this, but ILIAS makes it easy to do.
+            continue
+
         title = p.find("b").text
         if ":" in title:
             title = title[title.find(":") + 1:]

From d72fc2760b1dd8243ccf21876bb8cc6e027944bb Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 24 Oct 2022 13:09:29 +0200
Subject: [PATCH 095/224] Handle empty forums

---
 CHANGELOG.md                               | 1 +
 PFERD/crawl/ilias/kit_ilias_html.py        | 7 +++++--
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 6 +++++-
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 70d2cd5..c7a9899 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ ambiguous situations.
 
 ### Fixed
 - Forum crawling crashing when parsing empty (= 0 messages) threads
+- Forum crawling crashing when a forum has no threads at all
 
 ## 3.4.1 - 2022-08-17
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 8795512..9ea6b9f 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -59,6 +59,7 @@ class IliasPageElement:
 class IliasDownloadForumData:
     url: str
     form_data: Dict[str, Union[str, List[str]]]
+    empty: bool
 
 
 @dataclass
@@ -130,14 +131,16 @@ class IliasPage:
             return None
         post_url = self._abs_url_from_relative(form["action"])
 
+        thread_ids = [f["value"] for f in form.find_all(attrs={"name": "thread_ids[]"})]
+
         form_data: Dict[str, Union[str, List[ſtr]]] = {
-            "thread_ids[]": [f["value"] for f in form.find_all(attrs={"name": "thread_ids[]"})],
+            "thread_ids[]": thread_ids,
             "selected_cmd2": "html",
             "select_cmd2": "Ausführen",
             "selected_cmd": "",
         }
 
-        return IliasDownloadForumData(post_url, form_data)
+        return IliasDownloadForumData(url=post_url, form_data=form_data, empty=len(thread_ids) == 0)
 
     def get_next_stage_element(self) -> Optional[IliasPageElement]:
         if self._is_forum_page():
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 1852c5f..f2d5215 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -658,7 +658,7 @@ instance's greatest bottleneck.
     @_iorepeat(3, "crawling forum")
     @anoncritical
     async def _crawl_forum(self, element: IliasPageElement, cl: CrawlToken) -> None:
-        elements = []
+        elements: List[IliasForumThread] = []
 
         async with cl:
             next_stage_url = element.url
@@ -677,6 +677,10 @@ instance's greatest bottleneck.
             download_data = page.get_download_forum_data()
             if not download_data:
                 raise CrawlWarning("Failed to extract forum data")
+            if download_data.empty:
+                log.explain("Forum had no threads")
+                elements = []
+                return
             html = await self._post_authenticated(download_data.url, download_data.form_data)
             elements = parse_ilias_forum_export(soupify(html))
 

From fb4631ba180a9ff0303d59e798d4bccfa0253666 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 24 Oct 2022 13:13:36 +0200
Subject: [PATCH 096/224] Fix ilias background login

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 35 ++++++++++++++++++----
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index f2d5215..10a270f 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -23,6 +23,12 @@ from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasPage, Ilia
 
 TargetType = Union[str, int]
 
+_ILIAS_URL = "https://ilias.studium.kit.edu"
+
+
+class KitShibbolethBackgroundLoginSuccessful():
+    pass
+
 
 class KitIliasWebCrawlerSection(HttpCrawlerSection):
     def target(self) -> TargetType:
@@ -36,7 +42,7 @@ class KitIliasWebCrawlerSection(HttpCrawlerSection):
         if target == "desktop":
             # Full personal desktop
             return target
-        if target.startswith("https://ilias.studium.kit.edu"):
+        if target.startswith(_ILIAS_URL):
             # ILIAS URL
             return target
 
@@ -181,7 +187,7 @@ instance's greatest bottleneck.
             section.tfa_auth(authenticators),
         )
 
-        self._base_url = "https://ilias.studium.kit.edu"
+        self._base_url = _ILIAS_URL
 
         self._target = section.target()
         self._link_file_redirect_delay = section.link_redirect_delay()
@@ -808,14 +814,17 @@ class KitShibbolethLogin:
 
         # Equivalent: Click on "Mit KIT-Account anmelden" button in
         # https://ilias.studium.kit.edu/login.php
-        url = "https://ilias.studium.kit.edu/shib_login.php"
+        url = f"{_ILIAS_URL}/shib_login.php"
         data = {
             "sendLogin": "1",
             "idp_selection": "https://idp.scc.kit.edu/idp/shibboleth",
             "il_target": "",
             "home_organization_selection": "Weiter",
         }
-        soup: BeautifulSoup = await _shib_post(sess, url, data)
+        soup: Union[BeautifulSoup, KitShibbolethBackgroundLoginSuccessful] = await _shib_post(sess, url, data)
+
+        if isinstance(soup, KitShibbolethBackgroundLoginSuccessful):
+            return
 
         # Attempt to login using credentials, if necessary
         while not self._login_successful(soup):
@@ -854,7 +863,7 @@ class KitShibbolethLogin:
         # (or clicking "Continue" if you have JS disabled)
         relay_state = soup.find("input", {"name": "RelayState"})
         saml_response = soup.find("input", {"name": "SAMLResponse"})
-        url = "https://ilias.studium.kit.edu/Shibboleth.sso/SAML2/POST"
+        url = f"{_ILIAS_URL}/Shibboleth.sso/SAML2/POST"
         data = {  # using the info obtained in the while loop above
             "RelayState": relay_state["value"],
             "SAMLResponse": saml_response["value"],
@@ -903,22 +912,35 @@ async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> Beautifu
         return soupify(await response.read())
 
 
-async def _shib_post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup:
+async def _shib_post(
+    session: aiohttp.ClientSession,
+    url: str,
+    data: Any
+) -> Union[BeautifulSoup, KitShibbolethBackgroundLoginSuccessful]:
     """
     aiohttp unescapes '/' and ':' in URL query parameters which is not RFC compliant and rejected
     by Shibboleth. Thanks a lot. So now we unroll the requests manually, parse location headers and
     build encoded URL objects ourselves... Who thought mangling location header was a good idea??
     """
+    log.explain_topic("Shib login POST")
     async with session.post(url, data=data, allow_redirects=False) as response:
         location = response.headers.get("location")
+        log.explain(f"Got location {location!r}")
         if not location:
             raise CrawlWarning(f"Login failed (1), no location header present at {url}")
         correct_url = yarl.URL(location, encoded=True)
+        log.explain(f"Corrected location to {correct_url!r}")
+
+        if str(correct_url).startswith(_ILIAS_URL):
+            log.explain("ILIAS recognized our shib token and logged us in in the background, returning")
+            return KitShibbolethBackgroundLoginSuccessful()
 
         async with session.get(correct_url, allow_redirects=False) as response:
             location = response.headers.get("location")
+            log.explain(f"Redirected to {location!r} with status {response.status}")
             # If shib still still has a valid session, it will directly respond to the request
             if location is None:
+                log.explain("Shib recognized us, returning its response directly")
                 return soupify(await response.read())
 
             as_yarl = yarl.URL(response.url)
@@ -932,6 +954,7 @@ async def _shib_post(session: aiohttp.ClientSession, url: str, data: Any) -> Bea
                 path=location,
                 encoded=True
             )
+            log.explain(f"Corrected location to {correct_url!r}")
 
             async with session.get(correct_url, allow_redirects=False) as response:
                 return soupify(await response.read())

From 5fdd40204b156b15c008ec1dee05e168672fe243 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 24 Oct 2022 14:33:58 +0200
Subject: [PATCH 097/224] Unwrap future meetings when ILIAS hides them behind a
 pagination

---
 PFERD/crawl/ilias/kit_ilias_html.py        | 20 +++++++++++++++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 29 ++++++++++++++--------
 2 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 9ea6b9f..2f0011e 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -146,11 +146,17 @@ class IliasPage:
         if self._is_forum_page():
             if "trows=800" in self._page_url:
                 return None
+            log.explain("Requesting *all* forum threads")
             return self._get_show_max_forum_entries_per_page_url()
         if self._is_ilias_opencast_embedding():
+            log.explain("Unwrapping opencast embedding")
             return self.get_child_elements()[0]
         if self._page_type == IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED:
+            log.explain("Unwrapping video pagination")
             return self._find_video_entries_paginated()[0]
+        if self._contains_collapsed_future_meetings():
+            log.explain("Requesting *all* future meetings")
+            return self._uncollapse_future_meetings_url()
         return None
 
     def _is_forum_page(self) -> bool:
@@ -203,6 +209,16 @@ class IliasPage:
             return False
         return "target=copa_" in link.get("value")
 
+    def _contains_collapsed_future_meetings(self) -> bool:
+        return self._uncollapse_future_meetings_url() is not None
+
+    def _uncollapse_future_meetings_url(self) -> Optional[IliasPageElement]:
+        element = self._soup.find("a", attrs={"href": lambda x: x and "crs_next_sess=1" in x})
+        if not element:
+            return None
+        link = self._abs_url_from_link(element)
+        return IliasPageElement(IliasElementType.FOLDER, link, "show all meetings")
+
     def _player_to_video(self) -> List[IliasPageElement]:
         # Fetch the actual video page. This is a small wrapper page initializing a javscript
         # player. Sadly we can not execute that JS. The actual video stream url is nowhere
@@ -793,6 +809,10 @@ class IliasPage:
         if img_tag is None:
             img_tag = found_parent.select_one("img.icon")
 
+        if img_tag is None and found_parent.find("a", attrs={"href": lambda x: x and "crs_next_sess=" in x}):
+            log.explain("Found session expansion button, skipping it as it has no content")
+            return None
+
         if img_tag is None:
             _unexpected_html_warning()
             log.warn_contd(f"Tried to figure out element type, but did not find an image for {url}")
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 10a270f..bc0d816 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -234,19 +234,28 @@ instance's greatest bottleneck.
         async def gather_elements() -> None:
             elements.clear()
             async with cl:
-                soup = await self._get_page(url)
-
-                if expected_id is not None:
-                    perma_link_element: Tag = soup.find(id="current_perma_link")
-                    if not perma_link_element or "crs_" not in perma_link_element.get("value"):
-                        raise CrawlError("Invalid course id? Didn't find anything looking like a course")
+                next_stage_url: Optional[str] = url
+                current_parent = None
 
                 # Duplicated code, but the root page is special - we want to avoid fetching it twice!
-                log.explain_topic("Parsing root HTML page")
-                log.explain(f"URL: {url}")
-                page = IliasPage(soup, url, None)
-                elements.extend(page.get_child_elements())
+                while next_stage_url:
+                    soup = await self._get_page(next_stage_url)
 
+                    if current_parent is None and expected_id is not None:
+                        perma_link_element: Tag = soup.find(id="current_perma_link")
+                        if not perma_link_element or "crs_" not in perma_link_element.get("value"):
+                            raise CrawlError("Invalid course id? Didn't find anything looking like a course")
+
+                    log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
+                    log.explain(f"URL: {next_stage_url}")
+                    page = IliasPage(soup, next_stage_url, current_parent)
+                    if next_element := page.get_next_stage_element():
+                        current_parent = next_element
+                        next_stage_url = next_element.url
+                    else:
+                        next_stage_url = None
+
+                elements.extend(page.get_child_elements())
                 if description_string := page.get_description():
                     description.append(description_string)
 

From e1430e629844ad122a78d18197ed54100c734bbb Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 24 Oct 2022 18:36:34 +0200
Subject: [PATCH 098/224] Handle (and ignore) surveys

---
 PFERD/crawl/ilias/kit_ilias_html.py        | 3 +++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 7 +++++++
 2 files changed, 10 insertions(+)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 2f0011e..d969577 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -24,6 +24,7 @@ class IliasElementType(Enum):
     LINK = "link"
     BOOKING = "booking"
     MEETING = "meeting"
+    SURVEY = "survey"
     VIDEO = "video"
     VIDEO_PLAYER = "video_player"
     VIDEO_FOLDER = "video_folder"
@@ -730,6 +731,8 @@ class IliasPage:
             return IliasElementType.TEST
         if "fold" in icon["class"]:
             return IliasElementType.FOLDER
+        if "svy" in icon["class"]:
+            return IliasElementType.SURVEY
 
         _unexpected_html_warning()
         log.warn_contd(f"Could not extract type from {icon} for card title {card_title}")
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index bc0d816..5ff8212 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -380,6 +380,13 @@ instance's greatest bottleneck.
             log.explain_topic(f"Decision: Crawl {fmt_path(element_path)}")
             log.explain("Tests contain no relevant files")
             log.explain("Answer: No")
+        elif element.type == IliasElementType.SURVEY:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](surveys contain no relevant data)"
+            )
             return None
         elif element.type == IliasElementType.LINK:
             return await self._handle_link(element, element_path)

From 1b6be6bd79112faea6e56c43f4756dde10ba00ba Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 24 Oct 2022 18:36:54 +0200
Subject: [PATCH 099/224] Handle content pages in cards

---
 PFERD/crawl/ilias/kit_ilias_html.py        |  2 ++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 10 +++++++---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index d969577..ee0364a 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -731,6 +731,8 @@ class IliasPage:
             return IliasElementType.TEST
         if "fold" in icon["class"]:
             return IliasElementType.FOLDER
+        if "copa" in icon["class"]:
+            return IliasElementType.FOLDER
         if "svy" in icon["class"]:
             return IliasElementType.SURVEY
 
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 5ff8212..9295e93 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -377,9 +377,13 @@ instance's greatest bottleneck.
                 return None
             return await self._handle_forum(element, element_path)
         elif element.type == IliasElementType.TEST:
-            log.explain_topic(f"Decision: Crawl {fmt_path(element_path)}")
-            log.explain("Tests contain no relevant files")
-            log.explain("Answer: No")
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](tests contain no relevant data)"
+            )
+            return None
         elif element.type == IliasElementType.SURVEY:
             log.status(
                 "[bold bright_black]",

From f47d2f11d843bfd3307815b231dd3e3df0265cef Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 25 Oct 2022 20:28:06 +0200
Subject: [PATCH 100/224] Append trailing slash to kit-ipd links to ensure
 urljoin works as expected

---
 CHANGELOG.md                   | 1 +
 PFERD/crawl/kit_ipd_crawler.py | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c7a9899..24d9fa6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ ambiguous situations.
 ### Fixed
 - Forum crawling crashing when parsing empty (= 0 messages) threads
 - Forum crawling crashing when a forum has no threads at all
+- kit-ipd crawler if URL did not end with a trailing slash
 
 ## 3.4.1 - 2022-08-17
 
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index d9fac32..338e059 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -24,6 +24,9 @@ class KitIpdCrawlerSection(HttpCrawlerSection):
         if not target.startswith("https://"):
             self.invalid_value("target", target, "Should be a URL")
 
+        if not target.endswith("/"):
+            target = target + "/"
+
         return target
 
     def link_regex(self) -> Pattern[str]:

From 37b51a66d87d368afc3bef2b81edf1629f95cd57 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 26 Oct 2022 18:22:37 +0200
Subject: [PATCH 101/224] Update changelog

---
 CHANGELOG.md | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 24d9fa6..2bb0231 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,10 +22,16 @@ ambiguous situations.
 
 ## Unreleased
 
+### Added
+- Recognize and crawl content pages in cards
+- Recognize and ignore surveys
+
 ### Fixed
-- Forum crawling crashing when parsing empty (= 0 messages) threads
+- Forum crawling crashing when a thread has no messages at all
 - Forum crawling crashing when a forum has no threads at all
-- kit-ipd crawler if URL did not end with a trailing slash
+- Ilias login failing in some cases
+- Crawling of paginated future meetings
+- IPD crawler handling of URLs without trailing slash
 
 ## 3.4.1 - 2022-08-17
 

From 259cfc20cccae68a2f34984796405a35a7f31707 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 26 Oct 2022 18:26:17 +0200
Subject: [PATCH 102/224] Bump version to 3.4.2

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2bb0231..9ecddf7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.4.2 - 2022-10-26
+
 ### Added
 - Recognize and crawl content pages in cards
 - Recognize and ignore surveys
diff --git a/PFERD/version.py b/PFERD/version.py
index 8832a51..0ef5d89 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.4.1"
+VERSION = "3.4.2"

From c020cccc64f152882688b119416f0582ec94e074 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 29 Oct 2022 14:08:29 +0200
Subject: [PATCH 103/224] Include found paths in "second path found" warning

---
 CHANGELOG.md                               | 3 +++
 PFERD/crawl/ilias/kit_ilias_html.py        | 2 +-
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 8 +++++---
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9ecddf7..3dd25b8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Changed
+- Clear up error message shown when multiple paths are found to an element
+
 ## 3.4.2 - 2022-10-26
 
 ### Added
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index ee0364a..56dcf7b 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -134,7 +134,7 @@ class IliasPage:
 
         thread_ids = [f["value"] for f in form.find_all(attrs={"name": "thread_ids[]"})]
 
-        form_data: Dict[str, Union[str, List[ſtr]]] = {
+        form_data: Dict[str, Union[str, List[str]]] = {
             "thread_ids[]": thread_ids,
             "selected_cmd2": "html",
             "select_cmd2": "Ausführen",
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 9295e93..e3719b8 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -194,7 +194,7 @@ instance's greatest bottleneck.
         self._links = section.links()
         self._videos = section.videos()
         self._forums = section.forums()
-        self._visited_urls: Set[str] = set()
+        self._visited_urls: Dict[str, PurePath] = dict()
 
     async def _run(self) -> None:
         if isinstance(self._target, int):
@@ -348,9 +348,11 @@ instance's greatest bottleneck.
     ) -> Optional[Coroutine[Any, Any, None]]:
         if element.url in self._visited_urls:
             raise CrawlWarning(
-                f"Found second path to element {element.name!r} at {element.url!r}. Aborting subpath"
+                f"Found second path to element {element.name!r} at {element.url!r}. "
+                + f"First path: {fmt_path(self._visited_urls[element.url])}. "
+                + f"Second path: {fmt_path(parent_path)}."
             )
-        self._visited_urls.add(element.url)
+        self._visited_urls[element.url] = parent_path
 
         element_path = PurePath(parent_path, element.name)
 

From 07200bbde5fb72f2f846101b92b440724c8c7959 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Mon, 31 Oct 2022 14:10:45 +0100
Subject: [PATCH 104/224] Document ilias web crawler's forums option

---
 CHANGELOG.md | 3 +++
 CONFIG.md    | 1 +
 2 files changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3dd25b8..e5e81d6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Added
+- Missing documentation for `forums` option
+
 ### Changed
 - Clear up error message shown when multiple paths are found to an element
 
diff --git a/CONFIG.md b/CONFIG.md
index 0f114ed..1ca43c4 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -181,6 +181,7 @@ script once per day should be fine.
   redirect to the actual URL. Set to a negative value to disable the automatic
   redirect. (Default: `-1`)
 - `videos`: Whether to download videos. (Default: `no`)
+- `forums`: Whether to download forum threads. (Default: `no`)
 - `http_timeout`: The timeout (in seconds) for all HTTP requests. (Default:
   `20.0`)
 

From e69b55b3496d58bc19d76429ca0078ab10f23074 Mon Sep 17 00:00:00 2001
From: Pavel Zwerschke <pavelzw@gmail.com>
Date: Fri, 4 Nov 2022 12:18:26 +0100
Subject: [PATCH 105/224] Add more unofficial package managers (#66)

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index ce917b0..31a3475 100644
--- a/README.md
+++ b/README.md
@@ -30,7 +30,10 @@ The use of [venv](https://docs.python.org/3/library/venv.html) is recommended.
 
 Unofficial packages are available for:
 - [AUR](https://aur.archlinux.org/packages/pferd)
+- [brew](https://formulae.brew.sh/formula/pferd)
+- [conda-forge](https://github.com/conda-forge/pferd-feedstock)
 - [nixpkgs](https://github.com/NixOS/nixpkgs/blob/master/pkgs/tools/misc/pferd/default.nix)
+- [PyPi](https://pypi.org/project/pferd)
 
 See also PFERD's [repology page](https://repology.org/project/pferd/versions).
 

From 635caa765decd9a747d8b313252fd6b56cea0951 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 15 Nov 2022 17:17:55 +0100
Subject: [PATCH 106/224] Fix typo

Thanks, burg113
---
 CONFIG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CONFIG.md b/CONFIG.md
index 1ca43c4..640e4af 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -290,7 +290,7 @@ path matches `SOURCE`, it is renamed to `TARGET`.
 Example: `foo/bar --> baz`
 - Doesn't match `foo`, `a/foo/bar` or `foo/baz`
 - Converts `foo/bar` into `baz`
-- Converts `foo/bar/wargl` into `bar/wargl`
+- Converts `foo/bar/wargl` into `baz/wargl`
 
 Example: `foo/bar --> !`
 - Doesn't match `foo`, `a/foo/bar` or `foo/baz`

From c0d6d8b22975234b0c9141a22307c8036698566c Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Mon, 21 Nov 2022 17:53:30 +0100
Subject: [PATCH 107/224] Use url after redirect for relative links

---
 CHANGELOG.md                   |  3 +++
 PFERD/crawl/kit_ipd_crawler.py | 27 ++++++++++++---------------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e5e81d6..5bbefd4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -28,6 +28,9 @@ ambiguous situations.
 ### Changed
 - Clear up error message shown when multiple paths are found to an element
 
+### Fixed
+- IPD crawler unnecessarily appending trailing slashes
+
 ## 3.4.2 - 2022-10-26
 
 ### Added
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index 338e059..c852be0 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -2,7 +2,7 @@ import os
 import re
 from dataclasses import dataclass
 from pathlib import PurePath
-from typing import Awaitable, List, Optional, Pattern, Set, Union
+from typing import Awaitable, List, Optional, Pattern, Set, Tuple, Union
 from urllib.parse import urljoin
 
 from bs4 import BeautifulSoup, Tag
@@ -24,9 +24,6 @@ class KitIpdCrawlerSection(HttpCrawlerSection):
         if not target.startswith("https://"):
             self.invalid_value("target", target, "Should be a URL")
 
-        if not target.endswith("/"):
-            target = target + "/"
-
         return target
 
     def link_regex(self) -> Pattern[str]:
@@ -102,32 +99,32 @@ class KitIpdCrawler(HttpCrawler):
             await self._stream_from_url(file.url, sink, bar)
 
     async def _fetch_items(self) -> Set[Union[KitIpdFile, KitIpdFolder]]:
-        page = await self.get_page()
+        page, url = await self.get_page()
         elements: List[Tag] = self._find_file_links(page)
         items: Set[Union[KitIpdFile, KitIpdFolder]] = set()
 
         for element in elements:
             folder_label = self._find_folder_label(element)
             if folder_label:
-                folder = self._extract_folder(folder_label)
+                folder = self._extract_folder(folder_label, url)
                 if folder not in items:
                     items.add(folder)
                     folder.explain()
             else:
-                file = self._extract_file(element)
+                file = self._extract_file(element, url)
                 items.add(file)
                 log.explain_topic(f"Orphan file {file.name!r} (href={file.url!r})")
                 log.explain("Attributing it to root folder")
 
         return items
 
-    def _extract_folder(self, folder_tag: Tag) -> KitIpdFolder:
+    def _extract_folder(self, folder_tag: Tag, url: str) -> KitIpdFolder:
         files: List[KitIpdFile] = []
         name = folder_tag.getText().strip()
 
         container: Tag = folder_tag.findNextSibling(name="table")
         for link in self._find_file_links(container):
-            files.append(self._extract_file(link))
+            files.append(self._extract_file(link, url))
 
         return KitIpdFolder(name, files)
 
@@ -138,16 +135,16 @@ class KitIpdCrawler(HttpCrawler):
             return None
         return enclosing_table.findPreviousSibling(name=re.compile("^h[1-6]$"))
 
-    def _extract_file(self, link: Tag) -> KitIpdFile:
-        url = self._abs_url_from_link(link)
+    def _extract_file(self, link: Tag, url: str) -> KitIpdFile:
+        url = self._abs_url_from_link(url, link)
         name = os.path.basename(url)
         return KitIpdFile(name, url)
 
     def _find_file_links(self, tag: Union[Tag, BeautifulSoup]) -> List[Tag]:
         return tag.findAll(name="a", attrs={"href": self._file_regex})
 
-    def _abs_url_from_link(self, link_tag: Tag) -> str:
-        return urljoin(self._url, link_tag.get("href"))
+    def _abs_url_from_link(self, url: str, link_tag: Tag) -> str:
+        return urljoin(url, link_tag.get("href"))
 
     async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar) -> None:
         async with self.session.get(url, allow_redirects=False) as resp:
@@ -162,7 +159,7 @@ class KitIpdCrawler(HttpCrawler):
 
             sink.done()
 
-    async def get_page(self) -> BeautifulSoup:
+    async def get_page(self) -> Tuple[BeautifulSoup, str]:
         async with self.session.get(self._url) as request:
             # The web page for Algorithmen für Routenplanung contains some
             # weird comments that beautifulsoup doesn't parse correctly. This
@@ -170,4 +167,4 @@ class KitIpdCrawler(HttpCrawler):
             # cause issues on other pages.
             content = (await request.read()).decode("utf-8")
             content = re.sub(r"<!--.*?-->", "", content)
-            return soupify(content.encode("utf-8"))
+            return soupify(content.encode("utf-8")), str(request.url)

From 55a2de6b88bbd2ee0cb031271e7045f53caa1702 Mon Sep 17 00:00:00 2001
From: c0derMo <jaydeveloper@outlook.de>
Date: Fri, 25 Nov 2022 10:25:22 +0000
Subject: [PATCH 108/224] Fix crawling English opencast

---
 CHANGELOG.md                        | 1 +
 PFERD/crawl/ilias/kit_ilias_html.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5bbefd4..1dc5abc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,6 +30,7 @@ ambiguous situations.
 
 ### Fixed
 - IPD crawler unnecessarily appending trailing slashes
+- Crawling opencast when ILIAS is set to English
 
 ## 3.4.2 - 2022-10-26
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 56dcf7b..c0ebdc9 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -366,7 +366,7 @@ class IliasPage:
         """
         # Video start links are marked with an "Abspielen" link
         video_links: List[Tag] = self._soup.findAll(
-            name="a", text=re.compile(r"\s*Abspielen\s*")
+            name="a", text=re.compile(r"\s*(Abspielen|Play)\s*")
         )
 
         results: List[IliasPageElement] = []

From 6d44aac2783c69031e7686263fc0a2285912376f Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 29 Nov 2022 18:22:19 +0100
Subject: [PATCH 109/224] Bump version to 3.4.3

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1dc5abc..8793d43 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.4.3 - 2022-11-29
+
 ### Added
 - Missing documentation for `forums` option
 
diff --git a/PFERD/version.py b/PFERD/version.py
index 0ef5d89..7043d78 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.4.2"
+VERSION = "3.4.3"

From 722d2eb393913e770aff17da6b5b3b6603d1ee67 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Fri, 25 Nov 2022 12:49:36 +0100
Subject: [PATCH 110/224] Fix crawling of courses with preselected timeline tab

---
 CHANGELOG.md                        |  3 +++
 PFERD/crawl/ilias/kit_ilias_html.py | 23 +++++++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8793d43..b1d18cd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Fixed
+- Crawling of courses with the timeline view as the default tab
+
 ## 3.4.3 - 2022-11-29
 
 ### Added
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index c0ebdc9..44e44d9 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -158,6 +158,8 @@ class IliasPage:
         if self._contains_collapsed_future_meetings():
             log.explain("Requesting *all* future meetings")
             return self._uncollapse_future_meetings_url()
+        if not self._is_content_tab_selected():
+            return self._select_content_page_url()
         return None
 
     def _is_forum_page(self) -> bool:
@@ -220,6 +222,27 @@ class IliasPage:
         link = self._abs_url_from_link(element)
         return IliasPageElement(IliasElementType.FOLDER, link, "show all meetings")
 
+    def _is_content_tab_selected(self) -> bool:
+        return self._select_content_page_url() is None
+
+    def _select_content_page_url(self) -> Optional[IliasPageElement]:
+        tab = self._soup.find(
+            id="tab_view_content",
+            attrs={"class": lambda x: x is not None and "active" not in x}
+        )
+        # Already selected (or not found)
+        if not tab:
+            return None
+        link = tab.find("a")
+        if link:
+            link = self._abs_url_from_link(link)
+            return IliasPageElement(IliasElementType.FOLDER, link, "select content page")
+
+        _unexpected_html_warning()
+        log.warn_contd(f"Could not find content tab URL on {self._page_url!r}.")
+        log.warn_contd("PFERD might not find content on the course's main page.")
+        return None
+
     def _player_to_video(self) -> List[IliasPageElement]:
         # Fetch the actual video page. This is a small wrapper page initializing a javscript
         # player. Sadly we can not execute that JS. The actual video stream url is nowhere

From 467fc526e8411d4a5113dbb78747aa119981c476 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 21 Mar 2023 23:52:24 +0100
Subject: [PATCH 111/224] Fix crawling of file/video cards

---
 CHANGELOG.md                        | 1 +
 PFERD/crawl/ilias/kit_ilias_html.py | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b1d18cd..c27059b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ ambiguous situations.
 
 ### Fixed
 - Crawling of courses with the timeline view as the default tab
+- Crawling of file and custom opencast cards
 
 ## 3.4.3 - 2022-11-29
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 44e44d9..079cfd6 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -738,7 +738,7 @@ class IliasPage:
 
         icon: Tag = card_root.select_one(".il-card-repository-head .icon")
 
-        if "opencast" in icon["class"]:
+        if "opencast" in icon["class"] or "xoct" in icon["class"]:
             return IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED
         if "exc" in icon["class"]:
             return IliasElementType.EXERCISE
@@ -758,6 +758,8 @@ class IliasPage:
             return IliasElementType.FOLDER
         if "svy" in icon["class"]:
             return IliasElementType.SURVEY
+        if "file" in icon["class"]:
+            return IliasElementType.FILE
 
         _unexpected_html_warning()
         log.warn_contd(f"Could not extract type from {icon} for card title {card_title}")

From 6f30c6583d6512c92042c581e86027a4341ddc89 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 21 Mar 2023 23:52:33 +0100
Subject: [PATCH 112/224] Fix crawling of cards without descriptions

---
 CHANGELOG.md                        | 1 +
 PFERD/crawl/ilias/kit_ilias_html.py | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c27059b..7a5f654 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ ambiguous situations.
 ### Fixed
 - Crawling of courses with the timeline view as the default tab
 - Crawling of file and custom opencast cards
+- Crawling of button cards without descriptions
 
 ## 3.4.3 - 2022-11-29
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 079cfd6..efe6757 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -708,7 +708,11 @@ class IliasPage:
                 "div",
                 attrs={"class": lambda x: x and "caption" in x},
             )
-            description = caption_parent.find_next_sibling("div").getText().strip()
+            caption_container = caption_parent.find_next_sibling("div")
+            if caption_container:
+                description = caption_container.getText().strip()
+            else:
+                description = None
 
             if not type:
                 _unexpected_html_warning()

From 0294ceb7d5ff074dcc2566872d6b5f64f99c598f Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 22 Mar 2023 00:08:19 +0100
Subject: [PATCH 113/224] Update github action versions

---
 .github/workflows/build-and-release.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/build-and-release.yml b/.github/workflows/build-and-release.yml
index 090ac7e..83a36e4 100644
--- a/.github/workflows/build-and-release.yml
+++ b/.github/workflows/build-and-release.yml
@@ -17,9 +17,9 @@ jobs:
         python: ["3.9"]
     steps:
 
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
 
-      - uses: actions/setup-python@v2
+      - uses: actions/setup-python@v4
         with:
           python-version: ${{ matrix.python }}
 
@@ -45,7 +45,7 @@ jobs:
         run: mv dist/pferd* dist/pferd-${{ matrix.os }}
 
       - name: Upload binary
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v3
         with:
           name: Binaries
           path: dist/pferd-${{ matrix.os }}
@@ -57,7 +57,7 @@ jobs:
     steps:
 
       - name: Download binaries
-        uses: actions/download-artifact@v2
+        uses: actions/download-artifact@v3
         with:
           name: Binaries
 

From 443f7fe83913bcb82a42d7b70d4d05df65f05278 Mon Sep 17 00:00:00 2001
From: "Mr. Pine" <git@mr-pine.de>
Date: Sat, 29 Jul 2023 17:54:42 +0200
Subject: [PATCH 114/224] Add `no-delete-prompt-overwrite` crawler conflict
 resolution option (#75)

---
 CHANGELOG.md        |  3 +++
 CONFIG.md           |  2 ++
 LICENSE             |  3 ++-
 PFERD/output_dir.py | 11 ++++++-----
 4 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7a5f654..22522e2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,9 @@ ambiguous situations.
 - Crawling of file and custom opencast cards
 - Crawling of button cards without descriptions
 
+### Added
+- `no-delete-prompt-override` conflict resolution strategy
+
 ## 3.4.3 - 2022-11-29
 
 ### Added
diff --git a/CONFIG.md b/CONFIG.md
index 640e4af..84ee885 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -75,6 +75,8 @@ common to all crawlers:
       using `prompt` and always choosing "yes".
     - `no-delete`: Never delete local files, but overwrite local files if the
       remote file is different.
+    - `no-delete-prompt-overwrite`: Never delete local files, but prompt to overwrite local files if the
+      remote file is different.
 - `transform`: Rules for renaming and excluding certain files and directories.
   For more details, see [this section](#transformation-rules). (Default: empty)
 - `tasks`: The maximum number of concurrent tasks (such as crawling or
diff --git a/LICENSE b/LICENSE
index fe2293f..d81e827 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,5 +1,6 @@
 Copyright 2019-2021 Garmelon, I-Al-Istannen, danstooamerican, pavelzw,
-                    TheChristophe, Scriptim, thelukasprobst, Toorero
+                    TheChristophe, Scriptim, thelukasprobst, Toorero,
+                    Mr-Pine
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index c92f4a6..38d1288 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -44,6 +44,7 @@ class OnConflict(Enum):
     LOCAL_FIRST = "local-first"
     REMOTE_FIRST = "remote-first"
     NO_DELETE = "no-delete"
+    NO_DELETE_PROMPT_OVERWRITE = "no-delete-prompt-overwrite"
 
     @staticmethod
     def from_string(string: str) -> "OnConflict":
@@ -51,7 +52,7 @@ class OnConflict(Enum):
             return OnConflict(string)
         except ValueError:
             raise ValueError("must be one of 'prompt', 'local-first',"
-                             " 'remote-first', 'no-delete'")
+                             " 'remote-first', 'no-delete', 'no-delete-prompt-overwrite'")
 
 
 @dataclass
@@ -264,7 +265,7 @@ class OutputDirectory:
             on_conflict: OnConflict,
             path: PurePath,
     ) -> bool:
-        if on_conflict == OnConflict.PROMPT:
+        if on_conflict in {OnConflict.PROMPT, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
             async with log.exclusive_output():
                 prompt = f"Replace {fmt_path(path)} with remote file?"
                 return await prompt_yes_no(prompt, default=False)
@@ -283,7 +284,7 @@ class OutputDirectory:
             on_conflict: OnConflict,
             path: PurePath,
     ) -> bool:
-        if on_conflict == OnConflict.PROMPT:
+        if on_conflict in {OnConflict.PROMPT, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
             async with log.exclusive_output():
                 prompt = f"Recursively delete {fmt_path(path)} and replace with remote file?"
                 return await prompt_yes_no(prompt, default=False)
@@ -303,7 +304,7 @@ class OutputDirectory:
             path: PurePath,
             parent: PurePath,
     ) -> bool:
-        if on_conflict == OnConflict.PROMPT:
+        if on_conflict in {OnConflict.PROMPT, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
             async with log.exclusive_output():
                 prompt = f"Delete {fmt_path(parent)} so remote file {fmt_path(path)} can be downloaded?"
                 return await prompt_yes_no(prompt, default=False)
@@ -330,7 +331,7 @@ class OutputDirectory:
             return False
         elif on_conflict == OnConflict.REMOTE_FIRST:
             return True
-        elif on_conflict == OnConflict.NO_DELETE:
+        elif on_conflict in {OnConflict.NO_DELETE, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
             return False
 
         # This should never be reached

From d204dac8ced63534ca2b4596e9a63c880b2077a3 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Fri, 2 Jun 2023 18:19:39 +0200
Subject: [PATCH 115/224] Detect unexpected root page redirects and abort
 operation

---
 CHANGELOG.md                               |  1 +
 PFERD/crawl/ilias/kit_ilias_html.py        | 10 ++++++++++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 20 ++++++++++++++++----
 3 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 22522e2..ee55659 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -26,6 +26,7 @@ ambiguous situations.
 - Crawling of courses with the timeline view as the default tab
 - Crawling of file and custom opencast cards
 - Crawling of button cards without descriptions
+- Abort crawling when encountering an unexpected ilias root page redirect
 
 ### Added
 - `no-delete-prompt-override` conflict resolution strategy
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index efe6757..aed2069 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -79,6 +79,16 @@ class IliasPage:
         self._page_type = source_element.type if source_element else None
         self._source_name = source_element.name if source_element else ""
 
+    @staticmethod
+    def is_root_page(soup: BeautifulSoup) -> bool:
+        permalink = soup.find(id="current_perma_link")
+        if permalink is None:
+            return False
+        value = permalink.attrs.get("value")
+        if value is None:
+            return False
+        return "goto.php?target=root_" in value
+
     def get_child_elements(self) -> List[IliasPageElement]:
         """
         Return all child page elements you can find here.
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index e3719b8..ae49edc 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -239,7 +239,7 @@ instance's greatest bottleneck.
 
                 # Duplicated code, but the root page is special - we want to avoid fetching it twice!
                 while next_stage_url:
-                    soup = await self._get_page(next_stage_url)
+                    soup = await self._get_page(next_stage_url, root_page_allowed=True)
 
                     if current_parent is None and expected_id is not None:
                         perma_link_element: Tag = soup.find(id="current_perma_link")
@@ -739,12 +739,12 @@ instance's greatest bottleneck.
             sink.file.write(content.encode("utf-8"))
             sink.done()
 
-    async def _get_page(self, url: str) -> BeautifulSoup:
+    async def _get_page(self, url: str, root_page_allowed: bool = False) -> BeautifulSoup:
         auth_id = await self._current_auth_id()
         async with self.session.get(url) as request:
             soup = soupify(await request.read())
             if self._is_logged_in(soup):
-                return soup
+                return self._verify_page(soup, url, root_page_allowed)
 
         # We weren't authenticated, so try to do that
         await self.authenticate(auth_id)
@@ -753,9 +753,21 @@ instance's greatest bottleneck.
         async with self.session.get(url) as request:
             soup = soupify(await request.read())
             if self._is_logged_in(soup):
-                return soup
+                return self._verify_page(soup, url, root_page_allowed)
         raise CrawlError("get_page failed even after authenticating")
 
+    def _verify_page(self, soup: BeautifulSoup, url: str, root_page_allowed: bool) -> BeautifulSoup:
+        if IliasPage.is_root_page(soup) and not root_page_allowed:
+            raise CrawlError(
+                "Unexpectedly encountered ILIAS root page. "
+                "This usually happens because the ILIAS instance is broken. "
+                "If so, wait a day or two and try again. "
+                "It could also happen because a crawled element links to the ILIAS root page. "
+                "If so, use a transform with a ! as target to ignore the particular element. "
+                f"The redirect came from {url}"
+            )
+        return soup
+
     async def _post_authenticated(
         self,
         url: str,

From 123a57beec37090310f76df3746e6ce107ceb299 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 29 Jul 2023 18:14:57 +0200
Subject: [PATCH 116/224] Fix mypy unreachable error in file_templates

---
 PFERD/crawl/ilias/file_templates.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index 151a41b..59123a2 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -102,24 +102,24 @@ class Links(Enum):
     INTERNET_SHORTCUT = "internet-shortcut"
 
     def template(self) -> Optional[str]:
-        if self == self.FANCY:
+        if self == Links.FANCY:
             return _link_template_fancy
-        elif self == self.PLAINTEXT:
+        elif self == Links.PLAINTEXT:
             return _link_template_plain
-        elif self == self.INTERNET_SHORTCUT:
+        elif self == Links.INTERNET_SHORTCUT:
             return _link_template_internet_shortcut
-        elif self == self.IGNORE:
+        elif self == Links.IGNORE:
             return None
         raise ValueError("Missing switch case")
 
     def extension(self) -> Optional[str]:
-        if self == self.FANCY:
+        if self == Links.FANCY:
             return ".html"
-        elif self == self.PLAINTEXT:
+        elif self == Links.PLAINTEXT:
             return ".txt"
-        elif self == self.INTERNET_SHORTCUT:
+        elif self == Links.INTERNET_SHORTCUT:
             return ".url"
-        elif self == self.IGNORE:
+        elif self == Links.IGNORE:
             return None
         raise ValueError("Missing switch case")
 

From 68c398f1fea5cfefd86d11e79f2f6582d50e6563 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 29 Jul 2023 23:23:10 +0200
Subject: [PATCH 117/224] Add support for ILIAS learning modules

---
 CHANGELOG.md                               |   1 +
 PFERD/crawl/ilias/file_templates.py        |  69 +++++++++
 PFERD/crawl/ilias/ilias_html_cleaner.py    |   2 +-
 PFERD/crawl/ilias/kit_ilias_html.py        |  46 ++++++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 160 ++++++++++++++++++++-
 5 files changed, 272 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ee55659..6e3925c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,6 +30,7 @@ ambiguous situations.
 
 ### Added
 - `no-delete-prompt-override` conflict resolution strategy
+- support for ILIAS learning modules
 
 ## 3.4.3 - 2022-11-29
 
diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index 59123a2..b206461 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -1,6 +1,10 @@
 from enum import Enum
 from typing import Optional
 
+import bs4
+
+from PFERD.utils import soupify
+
 _link_template_plain = "{{link}}"
 _link_template_fancy = """
 <!DOCTYPE html>
@@ -94,6 +98,71 @@ _link_template_internet_shortcut = """
 URL={{link}}
 """.strip()
 
+_learning_module_template = """
+<!DOCTYPE html>
+<html lang="en">
+    <head>
+        <meta charset="UTF-8">
+        <title>{{name}}</title>
+    </head>
+
+    <style>
+    * {
+        box-sizing: border-box;
+    }
+    .center-flex {
+        display: flex;
+        align-items: center;
+        justify-content: center;
+    }
+    .nav {
+        display: flex;
+        justify-content: space-between;
+    }
+    </style>
+    <body class="center-flex">
+{{body}}
+    </body>
+</html>
+"""
+
+
+def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next: Optional[str]) -> str:
+    # Seems to be comments, ignore those.
+    for elem in body.select(".il-copg-mob-fullscreen-modal"):
+        elem.decompose()
+
+    nav_template = """
+        <div class="nav">
+            {{left}}
+            {{right}}
+        </div>
+    """
+    if prev and body.select_one(".ilc_page_lnav_LeftNavigation"):
+        text = body.select_one(".ilc_page_lnav_LeftNavigation").getText().strip()
+        left = f'<a href="{prev}">{text}</a>'
+    else:
+        left = "<span></span>"
+
+    if next and body.select_one(".ilc_page_rnav_RightNavigation"):
+        text = body.select_one(".ilc_page_rnav_RightNavigation").getText().strip()
+        right = f'<a href="{next}">{text}</a>'
+    else:
+        right = "<span></span>"
+
+    if top_nav := body.select_one(".ilc_page_tnav_TopNavigation"):
+        top_nav.replace_with(
+            soupify(nav_template.replace("{{left}}", left).replace("{{right}}", right).encode())
+        )
+
+    if bot_nav := body.select_one(".ilc_page_bnav_BottomNavigation"):
+        bot_nav.replace_with(soupify(nav_template.replace(
+            "{{left}}", left).replace("{{right}}", right).encode())
+        )
+
+    body = body.prettify()
+    return _learning_module_template.replace("{{body}}", body).replace("{{name}}", name)
+
 
 class Links(Enum):
     IGNORE = "ignore"
diff --git a/PFERD/crawl/ilias/ilias_html_cleaner.py b/PFERD/crawl/ilias/ilias_html_cleaner.py
index 5952309..5495304 100644
--- a/PFERD/crawl/ilias/ilias_html_cleaner.py
+++ b/PFERD/crawl/ilias/ilias_html_cleaner.py
@@ -82,7 +82,7 @@ def clean(soup: BeautifulSoup) -> BeautifulSoup:
             dummy.decompose()
         if len(children) > 1:
             continue
-        if type(children[0]) == Comment:
+        if isinstance(type(children[0]), Comment):
             dummy.decompose()
 
     for hrule_imposter in soup.find_all(class_="ilc_section_Separator"):
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index aed2069..46a8073 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -22,6 +22,7 @@ class IliasElementType(Enum):
     FOLDER = "folder"
     FORUM = "forum"
     LINK = "link"
+    LEARNING_MODULE = "learning_module"
     BOOKING = "booking"
     MEETING = "meeting"
     SURVEY = "survey"
@@ -71,6 +72,14 @@ class IliasForumThread:
     mtime: Optional[datetime]
 
 
+@dataclass
+class IliasLearningModulePage:
+    title: str
+    content: Tag
+    next_url: Optional[str]
+    previous_url: Optional[str]
+
+
 class IliasPage:
 
     def __init__(self, soup: BeautifulSoup, _page_url: str, source_element: Optional[IliasPageElement]):
@@ -136,6 +145,34 @@ class IliasPage:
 
         return BeautifulSoup(raw_html, "html.parser")
 
+    def get_learning_module_data(self) -> Optional[IliasLearningModulePage]:
+        if not self._is_learning_module_page():
+            return None
+        content = self._soup.select_one("#ilLMPageContent")
+        title = self._soup.select_one(".ilc_page_title_PageTitle").getText().strip()
+        return IliasLearningModulePage(
+            title=title,
+            content=content,
+            next_url=self._find_learning_module_next(),
+            previous_url=self._find_learning_module_prev()
+        )
+
+    def _find_learning_module_next(self) -> Optional[str]:
+        for link in self._soup.select("a.ilc_page_rnavlink_RightNavigationLink"):
+            url = self._abs_url_from_link(link)
+            if "baseClass=ilLMPresentationGUI" not in url:
+                continue
+            return url
+        return None
+
+    def _find_learning_module_prev(self) -> Optional[str]:
+        for link in self._soup.select("a.ilc_page_lnavlink_LeftNavigationLink"):
+            url = self._abs_url_from_link(link)
+            if "baseClass=ilLMPresentationGUI" not in url:
+                continue
+            return url
+        return None
+
     def get_download_forum_data(self) -> Optional[IliasDownloadForumData]:
         form = self._soup.find("form", attrs={"action": lambda x: x and "fallbackCmd=showThreads" in x})
         if not form:
@@ -222,6 +259,12 @@ class IliasPage:
             return False
         return "target=copa_" in link.get("value")
 
+    def _is_learning_module_page(self) -> bool:
+        link = self._soup.find(id="current_perma_link")
+        if not link:
+            return False
+        return "target=pg_" in link.get("value")
+
     def _contains_collapsed_future_meetings(self) -> bool:
         return self._uncollapse_future_meetings_url() is not None
 
@@ -812,6 +855,9 @@ class IliasPage:
         if "cmdClass=ilobjtestgui" in parsed_url.query:
             return IliasElementType.TEST
 
+        if "baseClass=ilLMPresentationGUI" in parsed_url.query:
+            return IliasElementType.LEARNING_MODULE
+
         # Booking and Meeting can not be detected based on the link. They do have a ref_id though, so
         # try to guess it from the image.
 
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index ae49edc..f82d684 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -1,8 +1,11 @@
 import asyncio
+import base64
+import os
 import re
 from collections.abc import Awaitable, Coroutine
 from pathlib import PurePath
-from typing import Any, Callable, Dict, List, Optional, Set, Union, cast
+from typing import Any, Callable, Dict, List, Literal, Optional, Set, Union, cast
+from urllib.parse import urljoin
 
 import aiohttp
 import yarl
@@ -16,10 +19,10 @@ from ...output_dir import FileSink, Redownload
 from ...utils import fmt_path, soupify, url_set_query_param
 from ..crawler import AWrapped, CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
 from ..http_crawler import HttpCrawler, HttpCrawlerSection
-from .file_templates import Links
+from .file_templates import Links, learning_module_template
 from .ilias_html_cleaner import clean, insert_base_markup
-from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasPage, IliasPageElement,
-                             _sanitize_path_name, parse_ilias_forum_export)
+from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
+                             IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
 
 TargetType = Union[str, int]
 
@@ -394,6 +397,8 @@ instance's greatest bottleneck.
                 "[bright_black](surveys contain no relevant data)"
             )
             return None
+        elif element.type == IliasElementType.LEARNING_MODULE:
+            return await self._handle_learning_module(element, element_path)
         elif element.type == IliasElementType.LINK:
             return await self._handle_link(element, element_path)
         elif element.type == IliasElementType.BOOKING:
@@ -739,6 +744,135 @@ instance's greatest bottleneck.
             sink.file.write(content.encode("utf-8"))
             sink.done()
 
+    async def _handle_learning_module(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Coroutine[Any, Any, None]]:
+        maybe_cl = await self.crawl(element_path)
+        if not maybe_cl:
+            return None
+        return self._crawl_learning_module(element, maybe_cl)
+
+    @_iorepeat(3, "crawling learning module")
+    @anoncritical
+    async def _crawl_learning_module(self, element: IliasPageElement, cl: CrawlToken) -> None:
+        elements: List[IliasLearningModulePage] = []
+
+        async with cl:
+            log.explain_topic(f"Parsing initial HTML page for {fmt_path(cl.path)}")
+            log.explain(f"URL: {element.url}")
+            soup = await self._get_page(element.url)
+            page = IliasPage(soup, element.url, None)
+            if next := page.get_learning_module_data():
+                elements.extend(await self._crawl_learning_module_direction(
+                    cl.path, next.previous_url, "left"
+                ))
+                elements.append(next)
+                elements.extend(await self._crawl_learning_module_direction(
+                    cl.path, next.next_url, "right"
+                ))
+
+        # Reflect their natural ordering in the file names
+        for index, lm_element in enumerate(elements):
+            lm_element.title = f"{index:02}_{lm_element.title}"
+
+        tasks: List[Awaitable[None]] = []
+        for index, elem in enumerate(elements):
+            prev_url = elements[index - 1].title if index > 0 else None
+            next_url = elements[index + 1].title if index < len(elements) - 1 else None
+            tasks.append(asyncio.create_task(
+                self._download_learning_module_page(cl.path, elem, prev_url, next_url)
+            ))
+
+        # And execute them
+        await self.gather(tasks)
+
+    async def _crawl_learning_module_direction(
+        self,
+        path: PurePath,
+        start_url: Optional[str],
+        dir: Union[Literal["left"], Literal["right"]]
+    ) -> List[IliasLearningModulePage]:
+        elements: List[IliasLearningModulePage] = []
+
+        if not start_url:
+            return elements
+
+        next_element_url: Optional[str] = start_url
+        counter = 0
+        while next_element_url:
+            log.explain_topic(f"Parsing HTML page for {fmt_path(path)} ({dir}-{counter})")
+            log.explain(f"URL: {next_element_url}")
+            soup = await self._get_page(next_element_url)
+            page = IliasPage(soup, next_element_url, None)
+            if next := page.get_learning_module_data():
+                elements.append(next)
+                if dir == "left":
+                    next_element_url = next.previous_url
+                else:
+                    next_element_url = next.next_url
+            counter += 1
+
+        return elements
+
+    @anoncritical
+    @_iorepeat(3, "saving learning module page")
+    async def _download_learning_module_page(
+        self,
+        parent_path: PurePath,
+        element: IliasLearningModulePage,
+        prev: Optional[str],
+        next: Optional[str]
+    ) -> None:
+        path = parent_path / (_sanitize_path_name(element.title) + ".html")
+        maybe_dl = await self.download(path)
+        if not maybe_dl:
+            return
+        my_path = self._transformer.transform(maybe_dl.path)
+        if not my_path:
+            return
+
+        if prev:
+            prev_p = self._transformer.transform(parent_path / (_sanitize_path_name(prev) + ".html"))
+            if prev_p:
+                prev = os.path.relpath(prev_p, my_path.parent)
+            else:
+                prev = None
+        if next:
+            next_p = self._transformer.transform(parent_path / (_sanitize_path_name(next) + ".html"))
+            if next_p:
+                next = os.path.relpath(next_p, my_path.parent)
+            else:
+                next = None
+
+        async with maybe_dl as (bar, sink):
+            content = element.content
+            content = await self.internalize_images(content)
+            sink.file.write(learning_module_template(content, maybe_dl.path.name, prev, next).encode("utf-8"))
+            sink.done()
+
+    async def internalize_images(self, tag: Tag) -> Tag:
+        """
+        Tries to fetch ILIAS images and embed them as base64 data.
+        """
+        log.explain_topic("Internalizing images")
+        for elem in tag.find_all(recursive=True):
+            if not isinstance(elem, Tag):
+                continue
+            if elem.name == "img":
+                if src := elem.attrs.get("src", None):
+                    url = urljoin(_ILIAS_URL, src)
+                    if not url.startswith(_ILIAS_URL):
+                        continue
+                    log.explain(f"Internalizing {url!r}")
+                    img = await self._get_authenticated(url)
+                    elem.attrs["src"] = "data:;base64," + base64.b64encode(img).decode()
+            if elem.name == "iframe" and elem.attrs.get("src", "").startswith("//"):
+                # For unknown reasons the protocol seems to be stripped.
+                elem.attrs["src"] = "https:" + elem.attrs["src"]
+        return tag
+
     async def _get_page(self, url: str, root_page_allowed: bool = False) -> BeautifulSoup:
         auth_id = await self._current_auth_id()
         async with self.session.get(url) as request:
@@ -772,7 +906,7 @@ instance's greatest bottleneck.
         self,
         url: str,
         data: dict[str, Union[str, List[str]]]
-    ) -> BeautifulSoup:
+    ) -> bytes:
         auth_id = await self._current_auth_id()
 
         form_data = aiohttp.FormData()
@@ -792,6 +926,22 @@ instance's greatest bottleneck.
                 return await request.read()
         raise CrawlError("post_authenticated failed even after authenticating")
 
+    async def _get_authenticated(self, url: str) -> bytes:
+        auth_id = await self._current_auth_id()
+
+        async with self.session.get(url, allow_redirects=False) as request:
+            if request.status == 200:
+                return await request.read()
+
+        # We weren't authenticated, so try to do that
+        await self.authenticate(auth_id)
+
+        # Retry once after authenticating. If this fails, we will die.
+        async with self.session.get(url, allow_redirects=False) as request:
+            if request.status == 200:
+                return await request.read()
+        raise CrawlError("get_authenticated failed even after authenticating")
+
     # We repeat this as the login method in shibboleth doesn't handle I/O errors.
     # Shibboleth is quite reliable as well, the repeat is likely not critical here.
     @ _iorepeat(3, "Login", failure_is_error=True)

From dbc2553b119c39c7a8ad196c6858fc8109f746a9 Mon Sep 17 00:00:00 2001
From: "Mr. Pine" <50425705+Mr-Pine@users.noreply.github.com>
Date: Wed, 15 Mar 2023 15:33:42 +0100
Subject: [PATCH 118/224] Add default `show-not-deleted` option If set to `no`,
 PFERD won't print status or report messages for not deleted files

---
 CHANGELOG.md        |  3 +++
 CONFIG.md           |  8 ++++++--
 PFERD/__main__.py   |  4 ++++
 PFERD/cli/parser.py |  7 +++++++
 PFERD/config.py     |  3 +++
 PFERD/logging.py    | 20 ++++++++++++++++++++
 PFERD/output_dir.py |  2 +-
 PFERD/pferd.py      |  2 +-
 8 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6e3925c..85513d2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -31,6 +31,9 @@ ambiguous situations.
 ### Added
 - `no-delete-prompt-override` conflict resolution strategy
 - support for ILIAS learning modules
+- `show_not_deleted` option to stop printing the "Not Deleted" status or report
+  message. This combines nicely with the `no-delete-prompt-override` strategy,
+  causing PFERD to mostly ignore local-only files.
 
 ## 3.4.3 - 2022-11-29
 
diff --git a/CONFIG.md b/CONFIG.md
index 84ee885..5f62749 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -26,6 +26,9 @@ default values for the other sections.
   `Added ...`) while running a crawler. (Default: `yes`)
 - `report`: Whether PFERD should print a report of added, changed and deleted
    local files for all crawlers before exiting. (Default: `yes`)
+- `show_not_deleted`: Whether PFERD should print messages in status and report
+   when a local-only file wasn't deleted. Combines nicely with the
+   `no-delete-prompt-override` conflict resolution strategy.
 - `share_cookies`: Whether crawlers should share cookies where applicable. For
   example, some crawlers share cookies if they crawl the same website using the
   same account. (Default: `yes`)
@@ -75,8 +78,9 @@ common to all crawlers:
       using `prompt` and always choosing "yes".
     - `no-delete`: Never delete local files, but overwrite local files if the
       remote file is different.
-    - `no-delete-prompt-overwrite`: Never delete local files, but prompt to overwrite local files if the
-      remote file is different.
+    - `no-delete-prompt-overwrite`: Never delete local files, but prompt to
+      overwrite local files if the remote file is different. Combines nicely
+      with the `show_not_deleted` option.
 - `transform`: Rules for renaming and excluding certain files and directories.
   For more details, see [this section](#transformation-rules). (Default: empty)
 - `tasks`: The maximum number of concurrent tasks (such as crawling or
diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index 4faeb13..cb8c67c 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -47,6 +47,8 @@ def configure_logging_from_args(args: argparse.Namespace) -> None:
         log.output_explain = args.explain
     if args.status is not None:
         log.output_status = args.status
+    if args.show_not_deleted is not None:
+        log.output_not_deleted = args.show_not_deleted
     if args.report is not None:
         log.output_report = args.report
 
@@ -72,6 +74,8 @@ def configure_logging_from_config(args: argparse.Namespace, config: Config) -> N
             log.output_status = config.default_section.status()
         if args.report is None:
             log.output_report = config.default_section.report()
+        if args.show_not_deleted is None:
+            log.output_not_deleted = config.default_section.show_not_deleted()
     except ConfigOptionError as e:
         log.error(str(e))
         sys.exit(1)
diff --git a/PFERD/cli/parser.py b/PFERD/cli/parser.py
index e753023..be483fd 100644
--- a/PFERD/cli/parser.py
+++ b/PFERD/cli/parser.py
@@ -215,6 +215,11 @@ PARSER.add_argument(
     action=BooleanOptionalAction,
     help="whether crawlers should share cookies where applicable"
 )
+PARSER.add_argument(
+    "--show-not-deleted",
+    action=BooleanOptionalAction,
+    help="print messages in status and report when PFERD did not delete a local only file"
+)
 
 
 def load_default_section(
@@ -233,6 +238,8 @@ def load_default_section(
         section["report"] = "yes" if args.report else "no"
     if args.share_cookies is not None:
         section["share_cookies"] = "yes" if args.share_cookies else "no"
+    if args.show_not_deleted is not None:
+        section["show_not_deleted"] = "yes" if args.show_not_deleted else "no"
 
 
 SUBPARSERS = PARSER.add_subparsers(title="crawlers")
diff --git a/PFERD/config.py b/PFERD/config.py
index 8f7e682..b2cff4e 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -82,6 +82,9 @@ class DefaultSection(Section):
     def report(self) -> bool:
         return self.s.getboolean("report", fallback=True)
 
+    def show_not_deleted(self) -> bool:
+        return self.s.getboolean("show_not_deleted", fallback=True)
+
     def share_cookies(self) -> bool:
         return self.s.getboolean("share_cookies", fallback=True)
 
diff --git a/PFERD/logging.py b/PFERD/logging.py
index 340b21f..b958fb2 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -59,6 +59,7 @@ class Log:
         # Whether different parts of the output are enabled or disabled
         self.output_explain = False
         self.output_status = True
+        self.output_not_deleted = True
         self.output_report = True
 
     def _update_live(self) -> None:
@@ -207,6 +208,17 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
             action = escape(f"{action:<{self.STATUS_WIDTH}}")
             self.print(f"{style}{action}[/] {escape(text)} {suffix}")
 
+    def not_deleted(self, style: str, action: str, text: str, suffix: str = "") -> None:
+        """
+        Print a message for a local only file that wasn't
+        deleted while crawling. Allows markup in the "style"
+        argument which will be applied to the "action" string.
+        """
+
+        if self.output_status and self.output_not_deleted:
+            action = escape(f"{action:<{self.STATUS_WIDTH}}")
+            self.print(f"{style}{action}[/] {escape(text)} {suffix}")
+
     def report(self, text: str) -> None:
         """
         Print a report after crawling. Allows markup.
@@ -215,6 +227,14 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
         if self.output_report:
             self.print(text)
 
+    def report_not_deleted(self, text: str) -> None:
+        """
+        Print a report for a local only file that wasn't deleted after crawling. Allows markup.
+        """
+
+        if self.output_report and self.output_not_deleted:
+            self.print(text)
+
     @contextmanager
     def _bar(
             self,
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 38d1288..e9e9b93 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -496,7 +496,7 @@ class OutputDirectory:
             except OSError:
                 pass
         else:
-            log.status("[bold bright_magenta]", "Not deleted", fmt_path(pure))
+            log.not_deleted("[bold bright_magenta]", "Not deleted", fmt_path(pure))
             self._report.not_delete_file(pure)
 
     def load_prev_report(self) -> None:
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index 079053b..b30a04a 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -180,7 +180,7 @@ class Pferd:
                 log.report(f"  [bold bright_magenta]Deleted[/] {fmt_path(path)}")
             for path in sorted(crawler.report.not_deleted_files):
                 something_changed = True
-                log.report(f"  [bold bright_magenta]Not deleted[/] {fmt_path(path)}")
+                log.report_not_deleted(f"  [bold bright_magenta]Not deleted[/] {fmt_path(path)}")
 
             for warning in crawler.report.encountered_warnings:
                 something_changed = True

From b3d412360baeed6992535e6957d0bc1e368c337f Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 26 Aug 2023 23:48:14 +0200
Subject: [PATCH 119/224] Add Nix flake

---
 flake.lock | 27 +++++++++++++++++++++++++++
 flake.nix  | 41 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+)
 create mode 100644 flake.lock
 create mode 100644 flake.nix

diff --git a/flake.lock b/flake.lock
new file mode 100644
index 0000000..914c58b
--- /dev/null
+++ b/flake.lock
@@ -0,0 +1,27 @@
+{
+  "nodes": {
+    "nixpkgs": {
+      "locked": {
+        "lastModified": 1692986144,
+        "narHash": "sha256-M4VFpy7Av9j+33HF5nIGm0k2+DXXW4qSSKdidIKg5jY=",
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "74e5bdc5478ebbe7ba5849f0d765f92757bb9dbf",
+        "type": "github"
+      },
+      "original": {
+        "owner": "NixOS",
+        "ref": "nixos-23.05",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "root": {
+      "inputs": {
+        "nixpkgs": "nixpkgs"
+      }
+    }
+  },
+  "root": "root",
+  "version": 7
+}
diff --git a/flake.nix b/flake.nix
new file mode 100644
index 0000000..e3d52af
--- /dev/null
+++ b/flake.nix
@@ -0,0 +1,41 @@
+{
+  description = "Tool for downloading course-related files from ILIAS";
+
+  inputs = {
+    nixpkgs.url = "github:NixOS/nixpkgs/nixos-23.05";
+  };
+
+  outputs = { self, nixpkgs }:
+    let
+      # Helper function to generate an attrset '{ x86_64-linux = f "x86_64-linux"; ... }'.
+      forAllSystems = nixpkgs.lib.genAttrs nixpkgs.lib.systems.flakeExposed;
+    in
+    {
+      packages = forAllSystems (system:
+        let pkgs = import nixpkgs { inherit system; };
+        in
+        rec {
+          default = pkgs.python3Packages.buildPythonApplication rec {
+            pname = "pferd";
+            # Performing black magic
+            # Don't worry, I sacrificed enough goats for the next few years
+            version = (pkgs.lib.importTOML ./PFERD/version.py).VERSION;
+            format = "pyproject";
+
+            src = ./.;
+
+            nativeBuildInputs = with pkgs.python3Packages; [
+              setuptools
+            ];
+
+            propagatedBuildInputs = with pkgs.python3Packages; [
+              aiohttp
+              beautifulsoup4
+              rich
+              keyring
+              certifi
+            ];
+          };
+        });
+    };
+}

From 2184ac804018e836e439e365ae2b0d184adae26d Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 26 Aug 2023 19:39:40 +0200
Subject: [PATCH 120/224] Add support for ILIAS mediacast listings

---
 CHANGELOG.md                               |   1 +
 PFERD/crawl/ilias/kit_ilias_html.py        | 110 +++++++++++++++------
 PFERD/crawl/ilias/kit_ilias_web_crawler.py |  45 +++++----
 3 files changed, 107 insertions(+), 49 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 85513d2..d58ea18 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -34,6 +34,7 @@ ambiguous situations.
 - `show_not_deleted` option to stop printing the "Not Deleted" status or report
   message. This combines nicely with the `no-delete-prompt-override` strategy,
   causing PFERD to mostly ignore local-only files.
+- support for mediacast video listings
 
 ## 3.4.3 - 2022-11-29
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 46a8073..d5ea76d 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -3,7 +3,7 @@ import re
 from dataclasses import dataclass
 from datetime import date, datetime, timedelta
 from enum import Enum
-from typing import Dict, List, Optional, Union
+from typing import Dict, List, Optional, Union, cast
 from urllib.parse import urljoin, urlparse
 
 from bs4 import BeautifulSoup, Tag
@@ -26,10 +26,12 @@ class IliasElementType(Enum):
     BOOKING = "booking"
     MEETING = "meeting"
     SURVEY = "survey"
-    VIDEO = "video"
-    VIDEO_PLAYER = "video_player"
-    VIDEO_FOLDER = "video_folder"
-    VIDEO_FOLDER_MAYBE_PAGINATED = "video_folder_maybe_paginated"
+    MEDIACAST_VIDEO_FOLDER = "mediacast_video_folder"
+    MEDIACAST_VIDEO = "mediacast_video"
+    OPENCAST_VIDEO = "opencast_video"
+    OPENCAST_VIDEO_PLAYER = "opencast_video_player"
+    OPENCAST_VIDEO_FOLDER = "opencast_video_folder"
+    OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED = "opencast_video_folder_maybe_paginated"
 
 
 @dataclass
@@ -45,7 +47,8 @@ class IliasPageElement:
             r"eid=(?P<id>[0-9a-z\-]+)",
             r"file_(?P<id>\d+)",
             r"ref_id=(?P<id>\d+)",
-            r"target=[a-z]+_(?P<id>\d+)"
+            r"target=[a-z]+_(?P<id>\d+)",
+            r"mm_(?P<id>\d+)"
         ]
 
         for regex in regexes:
@@ -105,9 +108,9 @@ class IliasPage:
         if self._is_video_player():
             log.explain("Page is a video player, extracting URL")
             return self._player_to_video()
-        if self._is_video_listing():
-            log.explain("Page is a video listing, searching for elements")
-            return self._find_video_entries()
+        if self._is_opencast_video_listing():
+            log.explain("Page is an opencast video listing, searching for elements")
+            return self._find_opencast_video_entries()
         if self._is_exercise_file():
             log.explain("Page is an exercise, searching for elements")
             return self._find_exercise_entries()
@@ -199,9 +202,9 @@ class IliasPage:
         if self._is_ilias_opencast_embedding():
             log.explain("Unwrapping opencast embedding")
             return self.get_child_elements()[0]
-        if self._page_type == IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED:
+        if self._page_type == IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED:
             log.explain("Unwrapping video pagination")
-            return self._find_video_entries_paginated()[0]
+            return self._find_opencast_video_entries_paginated()[0]
         if self._contains_collapsed_future_meetings():
             log.explain("Requesting *all* future meetings")
             return self._uncollapse_future_meetings_url()
@@ -219,7 +222,7 @@ class IliasPage:
     def _is_video_player(self) -> bool:
         return "paella_config_file" in str(self._soup)
 
-    def _is_video_listing(self) -> bool:
+    def _is_opencast_video_listing(self) -> bool:
         if self._is_ilias_opencast_embedding():
             return True
 
@@ -319,14 +322,14 @@ class IliasPage:
         # and just fetch the lone video url!
         if len(streams) == 1:
             video_url = streams[0]["sources"]["mp4"][0]["src"]
-            return [IliasPageElement(IliasElementType.VIDEO, video_url, self._source_name)]
+            return [IliasPageElement(IliasElementType.OPENCAST_VIDEO, video_url, self._source_name)]
 
         log.explain(f"Found multiple videos for stream at {self._source_name}")
         items = []
         for stream in sorted(streams, key=lambda stream: stream["content"]):
             full_name = f"{self._source_name.replace('.mp4', '')} ({stream['content']}).mp4"
             video_url = stream["sources"]["mp4"][0]["src"]
-            items.append(IliasPageElement(IliasElementType.VIDEO, video_url, full_name))
+            items.append(IliasPageElement(IliasElementType.OPENCAST_VIDEO, video_url, full_name))
 
         return items
 
@@ -385,7 +388,7 @@ class IliasPage:
 
         return items
 
-    def _find_video_entries(self) -> List[IliasPageElement]:
+    def _find_opencast_video_entries(self) -> List[IliasPageElement]:
         # ILIAS has three stages for video pages
         # 1. The initial dummy page without any videos. This page contains the link to the listing
         # 2. The video listing which might be paginated
@@ -405,27 +408,27 @@ class IliasPage:
             query_params = {"limit": "800", "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
             url = url_set_query_params(url, query_params)
             log.explain("Found ILIAS video frame page, fetching actual content next")
-            return [IliasPageElement(IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED, url, "")]
+            return [IliasPageElement(IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED, url, "")]
 
         is_paginated = self._soup.find(id=re.compile(r"tab_page_sel.+")) is not None
 
-        if is_paginated and not self._page_type == IliasElementType.VIDEO_FOLDER:
+        if is_paginated and not self._page_type == IliasElementType.OPENCAST_VIDEO_FOLDER:
             # We are in stage 2 - try to break pagination
-            return self._find_video_entries_paginated()
+            return self._find_opencast_video_entries_paginated()
 
-        return self._find_video_entries_no_paging()
+        return self._find_opencast_video_entries_no_paging()
 
-    def _find_video_entries_paginated(self) -> List[IliasPageElement]:
+    def _find_opencast_video_entries_paginated(self) -> List[IliasPageElement]:
         table_element: Tag = self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
 
         if table_element is None:
             log.warn("Couldn't increase elements per page (table not found). I might miss elements.")
-            return self._find_video_entries_no_paging()
+            return self._find_opencast_video_entries_no_paging()
 
         id_match = re.match(r"tbl_xoct_(.+)", table_element.attrs["id"])
         if id_match is None:
             log.warn("Couldn't increase elements per page (table id not found). I might miss elements.")
-            return self._find_video_entries_no_paging()
+            return self._find_opencast_video_entries_no_paging()
 
         table_id = id_match.group(1)
 
@@ -434,9 +437,9 @@ class IliasPage:
         url = url_set_query_params(self._page_url, query_params)
 
         log.explain("Disabled pagination, retrying folder as a new entry")
-        return [IliasPageElement(IliasElementType.VIDEO_FOLDER, url, "")]
+        return [IliasPageElement(IliasElementType.OPENCAST_VIDEO_FOLDER, url, "")]
 
-    def _find_video_entries_no_paging(self) -> List[IliasPageElement]:
+    def _find_opencast_video_entries_no_paging(self) -> List[IliasPageElement]:
         """
         Crawls the "second stage" video page. This page contains the actual video urls.
         """
@@ -448,11 +451,11 @@ class IliasPage:
         results: List[IliasPageElement] = []
 
         for link in video_links:
-            results.append(self._listed_video_to_element(link))
+            results.append(self._listed_opencast_video_to_element(link))
 
         return results
 
-    def _listed_video_to_element(self, link: Tag) -> IliasPageElement:
+    def _listed_opencast_video_to_element(self, link: Tag) -> IliasPageElement:
         # The link is part of a table with multiple columns, describing metadata.
         # 6th or 7th child (1 indexed) is the modification time string. Try to find it
         # by parsing backwards from the end and finding something that looks like a date
@@ -479,7 +482,9 @@ class IliasPage:
         video_url = self._abs_url_from_link(link)
 
         log.explain(f"Found video {video_name!r} at {video_url}")
-        return IliasPageElement(IliasElementType.VIDEO_PLAYER, video_url, video_name, modification_time)
+        return IliasPageElement(
+            IliasElementType.OPENCAST_VIDEO_PLAYER, video_url, video_name, modification_time
+        )
 
     def _find_exercise_entries(self) -> List[IliasPageElement]:
         if self._soup.find(id="tab_submission"):
@@ -622,9 +627,48 @@ class IliasPage:
             result.append(IliasPageElement(element_type, abs_url, element_name, description=description))
 
         result += self._find_cards()
+        result += self._find_mediacast_videos()
 
         return result
 
+    def _find_mediacast_videos(self) -> List[IliasPageElement]:
+        videos: List[IliasPageElement] = []
+
+        for elem in cast(List[Tag], self._soup.select(".ilPlayerPreviewOverlayOuter")):
+            element_name = _sanitize_path_name(
+                elem.select_one(".ilPlayerPreviewDescription").getText().strip()
+            )
+            if not element_name.endswith(".mp4"):
+                # just to make sure it has some kinda-alrightish ending
+                element_name = element_name + ".mp4"
+            video_element = elem.find(name="video")
+            if not video_element:
+                _unexpected_html_warning()
+                log.warn_contd(f"No <video> element found for mediacast video '{element_name}'")
+                continue
+
+            videos.append(IliasPageElement(
+                type=IliasElementType.MEDIACAST_VIDEO,
+                url=self._abs_url_from_relative(video_element.get("src")),
+                name=element_name,
+                mtime=self._find_mediacast_video_mtime(elem.findParent(name="td"))
+            ))
+
+        return videos
+
+    def _find_mediacast_video_mtime(self, enclosing_td: Tag) -> Optional[datetime]:
+        description_td: Tag = enclosing_td.findPreviousSibling("td")
+        if not description_td:
+            return None
+
+        meta_tag: Tag = description_td.find_all("p")[-1]
+        if not meta_tag:
+            return None
+
+        updated_str = meta_tag.getText().strip().replace("\n", " ")
+        updated_str = re.sub(".+?: ", "", updated_str)
+        return demangle_date(updated_str)
+
     def _is_in_expanded_meeting(self, tag: Tag) -> bool:
         """
         Returns whether a file is part of an expanded meeting.
@@ -796,7 +840,7 @@ class IliasPage:
         icon: Tag = card_root.select_one(".il-card-repository-head .icon")
 
         if "opencast" in icon["class"] or "xoct" in icon["class"]:
-            return IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED
+            return IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED
         if "exc" in icon["class"]:
             return IliasElementType.EXERCISE
         if "webr" in icon["class"]:
@@ -817,6 +861,8 @@ class IliasPage:
             return IliasElementType.SURVEY
         if "file" in icon["class"]:
             return IliasElementType.FILE
+        if "mcst" in icon["class"]:
+            return IliasElementType.MEDIACAST_VIDEO_FOLDER
 
         _unexpected_html_warning()
         log.warn_contd(f"Could not extract type from {icon} for card title {card_title}")
@@ -858,6 +904,9 @@ class IliasPage:
         if "baseClass=ilLMPresentationGUI" in parsed_url.query:
             return IliasElementType.LEARNING_MODULE
 
+        if "baseClass=ilMediaCastHandlerGUI" in parsed_url.query:
+            return IliasElementType.MEDIACAST_VIDEO_FOLDER
+
         # Booking and Meeting can not be detected based on the link. They do have a ref_id though, so
         # try to guess it from the image.
 
@@ -909,7 +958,7 @@ class IliasPage:
             return None
 
         if "opencast" in str(img_tag["alt"]).lower():
-            return IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED
+            return IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED
 
         if str(img_tag["src"]).endswith("icon_exc.svg"):
             return IliasElementType.EXERCISE
@@ -929,6 +978,9 @@ class IliasPage:
         if str(img_tag["src"]).endswith("icon_tst.svg"):
             return IliasElementType.TEST
 
+        if str(img_tag["src"]).endswith("icon_mcst.svg"):
+            return IliasElementType.MEDIACAST_VIDEO_FOLDER
+
         return IliasElementType.FOLDER
 
     @staticmethod
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index f82d684..eef3373 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -86,15 +86,18 @@ _DIRECTORY_PAGES: Set[IliasElementType] = set([
     IliasElementType.EXERCISE_FILES,
     IliasElementType.FOLDER,
     IliasElementType.MEETING,
-    IliasElementType.VIDEO_FOLDER,
-    IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED,
+    IliasElementType.MEDIACAST_VIDEO_FOLDER,
+    IliasElementType.OPENCAST_VIDEO_FOLDER,
+    IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
 ])
 
 _VIDEO_ELEMENTS: Set[IliasElementType] = set([
-    IliasElementType.VIDEO,
-    IliasElementType.VIDEO_PLAYER,
-    IliasElementType.VIDEO_FOLDER,
-    IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED,
+    IliasElementType.MEDIACAST_VIDEO_FOLDER,
+    IliasElementType.MEDIACAST_VIDEO,
+    IliasElementType.OPENCAST_VIDEO,
+    IliasElementType.OPENCAST_VIDEO_PLAYER,
+    IliasElementType.OPENCAST_VIDEO_FOLDER,
+    IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
 ])
 
 
@@ -403,10 +406,12 @@ instance's greatest bottleneck.
             return await self._handle_link(element, element_path)
         elif element.type == IliasElementType.BOOKING:
             return await self._handle_booking(element, element_path)
-        elif element.type == IliasElementType.VIDEO:
+        elif element.type == IliasElementType.OPENCAST_VIDEO:
+            return await self._handle_file(element, element_path)
+        elif element.type == IliasElementType.OPENCAST_VIDEO_PLAYER:
+            return await self._handle_opencast_video(element, element_path)
+        elif element.type == IliasElementType.MEDIACAST_VIDEO:
             return await self._handle_file(element, element_path)
-        elif element.type == IliasElementType.VIDEO_PLAYER:
-            return await self._handle_video(element, element_path)
         elif element.type in _DIRECTORY_PAGES:
             return await self._handle_ilias_page(element.url, element, element_path)
         else:
@@ -523,7 +528,7 @@ instance's greatest bottleneck.
 
         raise CrawlError("resolve_link_target failed even after authenticating")
 
-    async def _handle_video(
+    async def _handle_opencast_video(
         self,
         element: IliasPageElement,
         element_path: PurePath,
@@ -544,18 +549,18 @@ instance's greatest bottleneck.
 
         # If we do not want to crawl it (user filter) or we have every file
         # from the cached mapping already, we can ignore this and bail
-        if not maybe_dl or self._all_videos_locally_present(element_path):
+        if not maybe_dl or self._all_opencast_videos_locally_present(element_path):
             # Mark all existing cideos as known so they do not get deleted
             # during dleanup. We "downloaded" them, just without actually making
             # a network request as we assumed they did not change.
-            for video in self._previous_contained_videos(element_path):
+            for video in self._previous_contained_opencast_videos(element_path):
                 await self.download(video)
 
             return None
 
-        return self._download_video(element_path, element, maybe_dl)
+        return self._download_opencast_video(element_path, element, maybe_dl)
 
-    def _previous_contained_videos(self, video_path: PurePath) -> List[PurePath]:
+    def _previous_contained_opencast_videos(self, video_path: PurePath) -> List[PurePath]:
         if not self.prev_report:
             return []
         custom_value = self.prev_report.get_custom_value(str(video_path))
@@ -565,12 +570,12 @@ instance's greatest bottleneck.
         folder = video_path.parent
         return [PurePath(folder, name) for name in names]
 
-    def _all_videos_locally_present(self, video_path: PurePath) -> bool:
-        if contained_videos := self._previous_contained_videos(video_path):
+    def _all_opencast_videos_locally_present(self, video_path: PurePath) -> bool:
+        if contained_videos := self._previous_contained_opencast_videos(video_path):
             log.explain_topic(f"Checking local cache for video {video_path.name}")
             all_found_locally = True
             for video in contained_videos:
-                transformed_path = self._to_local_video_path(video)
+                transformed_path = self._to_local_opencast_video_path(video)
                 if transformed_path:
                     exists_locally = self._output_dir.resolve(transformed_path).exists()
                     all_found_locally = all_found_locally and exists_locally
@@ -580,14 +585,14 @@ instance's greatest bottleneck.
             log.explain("Missing at least one video, continuing with requests!")
         return False
 
-    def _to_local_video_path(self, path: PurePath) -> Optional[PurePath]:
+    def _to_local_opencast_video_path(self, path: PurePath) -> Optional[PurePath]:
         if transformed := self._transformer.transform(path):
             return self._deduplicator.fixup_path(transformed)
         return None
 
     @anoncritical
     @_iorepeat(3, "downloading video")
-    async def _download_video(
+    async def _download_opencast_video(
         self,
         original_path: PurePath,
         element: IliasPageElement,
@@ -604,7 +609,7 @@ instance's greatest bottleneck.
                 log.explain(f"Using single video mode for {element.name}")
                 stream_element = stream_elements[0]
 
-                transformed_path = self._to_local_video_path(original_path)
+                transformed_path = self._to_local_opencast_video_path(original_path)
                 if not transformed_path:
                     raise CrawlError(f"Download returned a path but transform did not for {original_path}")
 

From b54b3b979c41204a51f0d7f02de7f55a0031ba3e Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 27 Aug 2023 11:42:25 +0200
Subject: [PATCH 121/224] Remove size suffix for content pages

---
 CHANGELOG.md                        | 1 +
 PFERD/crawl/ilias/kit_ilias_html.py | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d58ea18..0e93f01 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,7 @@ ambiguous situations.
 - Crawling of file and custom opencast cards
 - Crawling of button cards without descriptions
 - Abort crawling when encountering an unexpected ilias root page redirect
+- Remove size suffix for files in content pages
 
 ### Added
 - `no-delete-prompt-override` conflict resolution strategy
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index d5ea76d..c0807d3 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -377,7 +377,8 @@ class IliasPage:
 
         for link in links:
             url = self._abs_url_from_link(link)
-            name = _sanitize_path_name(link.getText().strip().replace("\t", ""))
+            name = re.sub(r"\([\d,.]+ [MK]B\)", "", link.getText()).strip().replace("\t", "")
+            name = _sanitize_path_name(name)
 
             if "file_id" not in url:
                 _unexpected_html_warning()

From 87b67e9271bd843397542aef75d75557762f641b Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 29 Aug 2023 11:52:16 +0200
Subject: [PATCH 122/224] Crawl files in the info tab

---
 CHANGELOG.md                               |  1 +
 PFERD/crawl/ilias/kit_ilias_html.py        | 44 +++++++++++++++++++++-
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 16 +++++---
 3 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0e93f01..3c675f2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -36,6 +36,7 @@ ambiguous situations.
   message. This combines nicely with the `no-delete-prompt-override` strategy,
   causing PFERD to mostly ignore local-only files.
 - support for mediacast video listings
+- crawling of files in info tab
 
 ## 3.4.3 - 2022-11-29
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index c0807d3..a8fcecb 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -22,6 +22,7 @@ class IliasElementType(Enum):
     FOLDER = "folder"
     FORUM = "forum"
     LINK = "link"
+    INFO_TAB = "info_tab"
     LEARNING_MODULE = "learning_module"
     BOOKING = "booking"
     MEETING = "meeting"
@@ -120,9 +121,25 @@ class IliasPage:
         if self._is_content_page():
             log.explain("Page is a content page, searching for elements")
             return self._find_copa_entries()
+        if self._is_info_tab():
+            log.explain("Page is info tab, searching for elements")
+            return self._find_info_tab_entries()
         log.explain("Page is a normal folder, searching for elements")
         return self._find_normal_entries()
 
+    def get_info_tab(self) -> Optional[IliasPageElement]:
+        tab: Optional[Tag] = self._soup.find(
+            name="a",
+            attrs={"href": lambda x: x and "cmdClass=ilinfoscreengui" in x}
+        )
+        if tab is not None:
+            return IliasPageElement(
+                IliasElementType.INFO_TAB,
+                self._abs_url_from_link(tab),
+                "infos"
+            )
+        return None
+
     def get_description(self) -> Optional[BeautifulSoup]:
         def is_interesting_class(name: str) -> bool:
             return name in ["ilCOPageSection", "ilc_Paragraph", "ilc_va_ihcap_VAccordIHeadCap"]
@@ -209,7 +226,11 @@ class IliasPage:
             log.explain("Requesting *all* future meetings")
             return self._uncollapse_future_meetings_url()
         if not self._is_content_tab_selected():
-            return self._select_content_page_url()
+            if self._page_type != IliasElementType.INFO_TAB:
+                log.explain("Selecting content tab")
+                return self._select_content_page_url()
+            else:
+                log.explain("Crawling info tab, skipping content select")
         return None
 
     def _is_forum_page(self) -> bool:
@@ -281,6 +302,10 @@ class IliasPage:
     def _is_content_tab_selected(self) -> bool:
         return self._select_content_page_url() is None
 
+    def _is_info_tab(self) -> bool:
+        might_be_info = self._soup.find("form", attrs={"name": lambda x: x == "formInfoScreen"}) is not None
+        return self._page_type == IliasElementType.INFO_TAB and might_be_info
+
     def _select_content_page_url(self) -> Optional[IliasPageElement]:
         tab = self._soup.find(
             id="tab_view_content",
@@ -389,6 +414,23 @@ class IliasPage:
 
         return items
 
+    def _find_info_tab_entries(self) -> List[IliasPageElement]:
+        items = []
+        links: List[Tag] = self._soup.select("a.il_ContainerItemCommand")
+
+        for link in links:
+            if "cmdClass=ilobjcoursegui" not in link["href"]:
+                continue
+            if "cmd=sendfile" not in link["href"]:
+                continue
+            items.append(IliasPageElement(
+                IliasElementType.FILE,
+                self._abs_url_from_link(link),
+                _sanitize_path_name(link.getText())
+            ))
+
+        return items
+
     def _find_opencast_video_entries(self) -> List[IliasPageElement]:
         # ILIAS has three stages for video pages
         # 1. The initial dummy page without any videos. This page contains the link to the listing
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index eef3373..4f6cc74 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -85,6 +85,7 @@ _DIRECTORY_PAGES: Set[IliasElementType] = set([
     IliasElementType.EXERCISE,
     IliasElementType.EXERCISE_FILES,
     IliasElementType.FOLDER,
+    IliasElementType.INFO_TAB,
     IliasElementType.MEETING,
     IliasElementType.MEDIACAST_VIDEO_FOLDER,
     IliasElementType.OPENCAST_VIDEO_FOLDER,
@@ -262,6 +263,8 @@ instance's greatest bottleneck.
                         next_stage_url = None
 
                 elements.extend(page.get_child_elements())
+                if info_tab := page.get_info_tab():
+                    elements.append(info_tab)
                 if description_string := page.get_description():
                     description.append(description_string)
 
@@ -705,7 +708,7 @@ instance's greatest bottleneck.
                 log.explain(f"URL: {next_stage_url}")
 
                 soup = await self._get_page(next_stage_url)
-                page = IliasPage(soup, next_stage_url, None)
+                page = IliasPage(soup, next_stage_url, element)
 
                 if next := page.get_next_stage_element():
                     next_stage_url = next.url
@@ -768,14 +771,14 @@ instance's greatest bottleneck.
             log.explain_topic(f"Parsing initial HTML page for {fmt_path(cl.path)}")
             log.explain(f"URL: {element.url}")
             soup = await self._get_page(element.url)
-            page = IliasPage(soup, element.url, None)
+            page = IliasPage(soup, element.url, element)
             if next := page.get_learning_module_data():
                 elements.extend(await self._crawl_learning_module_direction(
-                    cl.path, next.previous_url, "left"
+                    cl.path, next.previous_url, "left", element
                 ))
                 elements.append(next)
                 elements.extend(await self._crawl_learning_module_direction(
-                    cl.path, next.next_url, "right"
+                    cl.path, next.next_url, "right", element
                 ))
 
         # Reflect their natural ordering in the file names
@@ -797,7 +800,8 @@ instance's greatest bottleneck.
         self,
         path: PurePath,
         start_url: Optional[str],
-        dir: Union[Literal["left"], Literal["right"]]
+        dir: Union[Literal["left"], Literal["right"]],
+        parent_element: IliasPageElement
     ) -> List[IliasLearningModulePage]:
         elements: List[IliasLearningModulePage] = []
 
@@ -810,7 +814,7 @@ instance's greatest bottleneck.
             log.explain_topic(f"Parsing HTML page for {fmt_path(path)} ({dir}-{counter})")
             log.explain(f"URL: {next_element_url}")
             soup = await self._get_page(next_element_url)
-            page = IliasPage(soup, next_element_url, None)
+            page = IliasPage(soup, next_element_url, parent_element)
             if next := page.get_learning_module_data():
                 elements.append(next)
                 if dir == "left":

From ad53185247aa7182e95f7ef486b557e5a342ba08 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 29 Aug 2023 12:25:16 +0200
Subject: [PATCH 123/224] Sanitize ascii control characters on windows

---
 CHANGELOG.md          | 1 +
 PFERD/deduplicator.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3c675f2..ae809e3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -28,6 +28,7 @@ ambiguous situations.
 - Crawling of button cards without descriptions
 - Abort crawling when encountering an unexpected ilias root page redirect
 - Remove size suffix for files in content pages
+- Sanitize ascii control characters on Windows
 
 ### Added
 - `no-delete-prompt-override` conflict resolution strategy
diff --git a/PFERD/deduplicator.py b/PFERD/deduplicator.py
index 7777f28..559addb 100644
--- a/PFERD/deduplicator.py
+++ b/PFERD/deduplicator.py
@@ -14,7 +14,7 @@ def name_variants(path: PurePath) -> Iterator[PurePath]:
 
 
 class Deduplicator:
-    FORBIDDEN_CHARS = '<>:"/\\|?*'
+    FORBIDDEN_CHARS = '<>:"/\\|?*' + "".join([chr(i) for i in range(0, 32)])
     FORBIDDEN_NAMES = {
         "CON", "PRN", "AUX", "NUL",
         "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9",

From df3514cd0350fd6ef9231cadb236c930c99b89db Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 29 Aug 2023 12:30:54 +0200
Subject: [PATCH 124/224] Crawl paginated past meetings

---
 CHANGELOG.md                        |  1 +
 PFERD/crawl/ilias/kit_ilias_html.py | 11 +++++++++--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ae809e3..3f318b2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ ambiguous situations.
 - Abort crawling when encountering an unexpected ilias root page redirect
 - Remove size suffix for files in content pages
 - Sanitize ascii control characters on Windows
+- Crawling of paginated past meetings
 
 ### Added
 - `no-delete-prompt-override` conflict resolution strategy
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index a8fcecb..5a94a0b 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -293,7 +293,10 @@ class IliasPage:
         return self._uncollapse_future_meetings_url() is not None
 
     def _uncollapse_future_meetings_url(self) -> Optional[IliasPageElement]:
-        element = self._soup.find("a", attrs={"href": lambda x: x and "crs_next_sess=1" in x})
+        element = self._soup.find(
+            "a",
+            attrs={"href": lambda x: x and ("crs_next_sess=1" in x or "crs_prev_sess=1" in x)}
+        )
         if not element:
             return None
         link = self._abs_url_from_link(element)
@@ -991,7 +994,11 @@ class IliasPage:
         if img_tag is None:
             img_tag = found_parent.select_one("img.icon")
 
-        if img_tag is None and found_parent.find("a", attrs={"href": lambda x: x and "crs_next_sess=" in x}):
+        is_session_expansion_button = found_parent.find(
+            "a",
+            attrs={"href": lambda x: x and ("crs_next_sess=" in x or "crs_prev_sess=" in x)}
+        )
+        if img_tag is None and is_session_expansion_button:
             log.explain("Found session expansion button, skipping it as it has no content")
             return None
 

From 50b50513c6d8bb01200104633d7ce312e17a0ba7 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 29 Aug 2023 13:51:19 +0200
Subject: [PATCH 125/224] Ignore SCORM learning modules

---
 CHANGELOG.md                               |  1 +
 PFERD/crawl/ilias/kit_ilias_html.py        |  7 +++++++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 10 +++++++++-
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3f318b2..47df846 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,6 +30,7 @@ ambiguous situations.
 - Remove size suffix for files in content pages
 - Sanitize ascii control characters on Windows
 - Crawling of paginated past meetings
+- Ignore SCORM learning modules
 
 ### Added
 - `no-delete-prompt-override` conflict resolution strategy
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 5a94a0b..2c37816 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -27,6 +27,7 @@ class IliasElementType(Enum):
     BOOKING = "booking"
     MEETING = "meeting"
     SURVEY = "survey"
+    SCORM_LEARNING_MODULE = "scorm_learning_module"
     MEDIACAST_VIDEO_FOLDER = "mediacast_video_folder"
     MEDIACAST_VIDEO = "mediacast_video"
     OPENCAST_VIDEO = "opencast_video"
@@ -953,6 +954,9 @@ class IliasPage:
         if "baseClass=ilMediaCastHandlerGUI" in parsed_url.query:
             return IliasElementType.MEDIACAST_VIDEO_FOLDER
 
+        if "baseClass=ilSAHSPresentationGUI" in parsed_url.query:
+            return IliasElementType.SCORM_LEARNING_MODULE
+
         # Booking and Meeting can not be detected based on the link. They do have a ref_id though, so
         # try to guess it from the image.
 
@@ -1031,6 +1035,9 @@ class IliasPage:
         if str(img_tag["src"]).endswith("icon_mcst.svg"):
             return IliasElementType.MEDIACAST_VIDEO_FOLDER
 
+        if str(img_tag["src"]).endswith("icon_sahs.svg"):
+            return IliasElementType.SCORM_LEARNING_MODULE
+
         return IliasElementType.FOLDER
 
     @staticmethod
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 4f6cc74..d5f6809 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -403,6 +403,14 @@ instance's greatest bottleneck.
                 "[bright_black](surveys contain no relevant data)"
             )
             return None
+        elif element.type == IliasElementType.SCORM_LEARNING_MODULE:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](scorm learning modules are not supported)"
+            )
+            return None
         elif element.type == IliasElementType.LEARNING_MODULE:
             return await self._handle_learning_module(element, element_path)
         elif element.type == IliasElementType.LINK:
@@ -897,7 +905,7 @@ instance's greatest bottleneck.
             soup = soupify(await request.read())
             if self._is_logged_in(soup):
                 return self._verify_page(soup, url, root_page_allowed)
-        raise CrawlError("get_page failed even after authenticating")
+        raise CrawlError(f"get_page failed even after authenticating on {url!r}")
 
     def _verify_page(self, soup: BeautifulSoup, url: str, root_page_allowed: bool) -> BeautifulSoup:
         if IliasPage.is_root_page(soup) and not root_page_allowed:

From 40f8a05ad66edb1951524a728eeb1a6f2819e4e5 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 13 Sep 2023 22:23:31 +0200
Subject: [PATCH 126/224] Add .idea to gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 455eaca..36ab590 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@
 /PFERD.egg-info/
 __pycache__/
 /.vscode/
+/.idea/
 
 # pyinstaller
 /pferd.spec

From 0113a0ca1027278eb4a8ecee3bf925ac1ffed201 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 13 Sep 2023 22:23:21 +0200
Subject: [PATCH 127/224] Update flake.lock

---
 flake.lock | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/flake.lock b/flake.lock
index 914c58b..1655107 100644
--- a/flake.lock
+++ b/flake.lock
@@ -2,11 +2,11 @@
   "nodes": {
     "nixpkgs": {
       "locked": {
-        "lastModified": 1692986144,
-        "narHash": "sha256-M4VFpy7Av9j+33HF5nIGm0k2+DXXW4qSSKdidIKg5jY=",
+        "lastModified": 1694499547,
+        "narHash": "sha256-R7xMz1Iia6JthWRHDn36s/E248WB1/je62ovC/dUVKI=",
         "owner": "NixOS",
         "repo": "nixpkgs",
-        "rev": "74e5bdc5478ebbe7ba5849f0d765f92757bb9dbf",
+        "rev": "e5f018cf150e29aac26c61dac0790ea023c46b24",
         "type": "github"
       },
       "original": {

From 533bc274395589459a5197462274a4e22e097914 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 13 Sep 2023 23:13:30 +0200
Subject: [PATCH 128/224] Bump version to 3.5.0

---
 CHANGELOG.md     | 24 ++++++++++++++----------
 PFERD/version.py |  2 +-
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 47df846..e902efa 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,25 +22,29 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.5.0 - 2023-09-13
+
+### Added
+- `no-delete-prompt-override` conflict resolution strategy
+- Support for ILIAS learning modules
+- `show_not_deleted` option to stop printing the "Not Deleted" status or report
+  message. This combines nicely with the `no-delete-prompt-override` strategy,
+  causing PFERD to mostly ignore local-only files.
+- Support for mediacast video listings
+- Crawling of files in info tab
+
+### Changed
+- Remove size suffix for files in content pages
+
 ### Fixed
 - Crawling of courses with the timeline view as the default tab
 - Crawling of file and custom opencast cards
 - Crawling of button cards without descriptions
 - Abort crawling when encountering an unexpected ilias root page redirect
-- Remove size suffix for files in content pages
 - Sanitize ascii control characters on Windows
 - Crawling of paginated past meetings
 - Ignore SCORM learning modules
 
-### Added
-- `no-delete-prompt-override` conflict resolution strategy
-- support for ILIAS learning modules
-- `show_not_deleted` option to stop printing the "Not Deleted" status or report
-  message. This combines nicely with the `no-delete-prompt-override` strategy,
-  causing PFERD to mostly ignore local-only files.
-- support for mediacast video listings
-- crawling of files in info tab
-
 ## 3.4.3 - 2022-11-29
 
 ### Added
diff --git a/PFERD/version.py b/PFERD/version.py
index 7043d78..5ee464d 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.4.3"
+VERSION = "3.5.0"

From 266812f90ea7b33e2cd195ee6d34dc2ba53c4926 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 16 Nov 2023 10:34:49 +0100
Subject: [PATCH 129/224] Move is_logged_in helper to kit_ilias_html

---
 PFERD/crawl/ilias/kit_ilias_html.py        | 28 +++++++++++++++++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 35 +++-------------------
 2 files changed, 32 insertions(+), 31 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 2c37816..d23141f 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -1067,6 +1067,34 @@ class IliasPage:
         rest_of_name = split_delimiter.join(meeting_name.split(split_delimiter)[1:])
         return datetime.strftime(date_portion, "%Y-%m-%d") + split_delimiter + rest_of_name
 
+    @staticmethod
+    def is_logged_in(soup: BeautifulSoup) -> bool:
+        # Normal ILIAS pages
+        mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")
+        if mainbar is not None:
+            login_button = mainbar.find(attrs={"href": lambda x: x and "login.php" in x})
+            shib_login = soup.find(id="button_shib_login")
+            return not login_button and not shib_login
+
+        # Personal Desktop
+        if soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x}):
+            return True
+
+        # Video listing embeds do not have complete ILIAS html. Try to match them by
+        # their video listing table
+        video_table = soup.find(
+            recursive=True,
+            name="table",
+            attrs={"id": lambda x: x is not None and x.startswith("tbl_xoct")}
+        )
+        if video_table is not None:
+            return True
+        # The individual video player wrapper page has nothing of the above.
+        # Match it by its playerContainer.
+        if soup.select_one("#playerContainer") is not None:
+            return True
+        return False
+
     def _abs_url_from_link(self, link_tag: Tag) -> str:
         """
         Create an absolute url from an <a> tag.
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index d5f6809..94b7b9e 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -894,7 +894,7 @@ instance's greatest bottleneck.
         auth_id = await self._current_auth_id()
         async with self.session.get(url) as request:
             soup = soupify(await request.read())
-            if self._is_logged_in(soup):
+            if IliasPage.is_logged_in(soup):
                 return self._verify_page(soup, url, root_page_allowed)
 
         # We weren't authenticated, so try to do that
@@ -903,11 +903,12 @@ instance's greatest bottleneck.
         # Retry once after authenticating. If this fails, we will die.
         async with self.session.get(url) as request:
             soup = soupify(await request.read())
-            if self._is_logged_in(soup):
+            if IliasPage.is_logged_in(soup):
                 return self._verify_page(soup, url, root_page_allowed)
         raise CrawlError(f"get_page failed even after authenticating on {url!r}")
 
-    def _verify_page(self, soup: BeautifulSoup, url: str, root_page_allowed: bool) -> BeautifulSoup:
+    @staticmethod
+    def _verify_page(soup: BeautifulSoup, url: str, root_page_allowed: bool) -> BeautifulSoup:
         if IliasPage.is_root_page(soup) and not root_page_allowed:
             raise CrawlError(
                 "Unexpectedly encountered ILIAS root page. "
@@ -965,34 +966,6 @@ instance's greatest bottleneck.
     async def _authenticate(self) -> None:
         await self._shibboleth_login.login(self.session)
 
-    @ staticmethod
-    def _is_logged_in(soup: BeautifulSoup) -> bool:
-        # Normal ILIAS pages
-        mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")
-        if mainbar is not None:
-            login_button = mainbar.find(attrs={"href": lambda x: x and "login.php" in x})
-            shib_login = soup.find(id="button_shib_login")
-            return not login_button and not shib_login
-
-        # Personal Desktop
-        if soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x}):
-            return True
-
-        # Video listing embeds do not have complete ILIAS html. Try to match them by
-        # their video listing table
-        video_table = soup.find(
-            recursive=True,
-            name="table",
-            attrs={"id": lambda x: x is not None and x.startswith("tbl_xoct")}
-        )
-        if video_table is not None:
-            return True
-        # The individual video player wrapper page has nothing of the above.
-        # Match it by its playerContainer.
-        if soup.select_one("#playerContainer") is not None:
-            return True
-        return False
-
 
 class KitShibbolethLogin:
     """

From e9f8901520356e23a7fe75c232e2abeb65e2d5a7 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 30 Nov 2023 20:50:53 +0100
Subject: [PATCH 130/224] Fix typos in ilias crawler and use set literals

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 94b7b9e..b9fb45a 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -81,7 +81,7 @@ class KitIliasWebCrawlerSection(HttpCrawlerSection):
         return self.s.getboolean("forums", fallback=False)
 
 
-_DIRECTORY_PAGES: Set[IliasElementType] = set([
+_DIRECTORY_PAGES: Set[IliasElementType] = {
     IliasElementType.EXERCISE,
     IliasElementType.EXERCISE_FILES,
     IliasElementType.FOLDER,
@@ -90,16 +90,16 @@ _DIRECTORY_PAGES: Set[IliasElementType] = set([
     IliasElementType.MEDIACAST_VIDEO_FOLDER,
     IliasElementType.OPENCAST_VIDEO_FOLDER,
     IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
-])
+}
 
-_VIDEO_ELEMENTS: Set[IliasElementType] = set([
+_VIDEO_ELEMENTS: Set[IliasElementType] = {
     IliasElementType.MEDIACAST_VIDEO_FOLDER,
     IliasElementType.MEDIACAST_VIDEO,
     IliasElementType.OPENCAST_VIDEO,
     IliasElementType.OPENCAST_VIDEO_PLAYER,
     IliasElementType.OPENCAST_VIDEO_FOLDER,
     IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
-])
+}
 
 
 def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Callable[[AWrapped], AWrapped]:
@@ -561,8 +561,8 @@ instance's greatest bottleneck.
         # If we do not want to crawl it (user filter) or we have every file
         # from the cached mapping already, we can ignore this and bail
         if not maybe_dl or self._all_opencast_videos_locally_present(element_path):
-            # Mark all existing cideos as known so they do not get deleted
-            # during dleanup. We "downloaded" them, just without actually making
+            # Mark all existing videos as known so they do not get deleted
+            # during cleanup. We "downloaded" them, just without actually making
             # a network request as we assumed they did not change.
             for video in self._previous_contained_opencast_videos(element_path):
                 await self.download(video)

From a117126389a6298d04944ddbcda35f9b537e960b Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 9 Dec 2023 23:01:59 +0100
Subject: [PATCH 131/224] Fix video name deduplication

---
 CHANGELOG.md                               |   3 +
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 117 +++++++++++----------
 2 files changed, 65 insertions(+), 55 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e902efa..0443d50 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Fixed
+- Video name deduplication
+
 ## 3.5.0 - 2023-09-13
 
 ### Added
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index b9fb45a..ac1f10d 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -140,6 +140,10 @@ def _wrap_io_in_warning(name: str) -> Callable[[AWrapped], AWrapped]:
     return _iorepeat(1, name)
 
 
+def _get_video_cache_key(element: IliasPageElement) -> str:
+    return f"ilias-video-cache-{element.id()}"
+
+
 # Crawler control flow:
 #
 #     crawl_desktop -+
@@ -547,8 +551,8 @@ instance's greatest bottleneck.
         # Copy old mapping as it is likely still relevant
         if self.prev_report:
             self.report.add_custom_value(
-                str(element_path),
-                self.prev_report.get_custom_value(str(element_path))
+                _get_video_cache_key(element),
+                self.prev_report.get_custom_value(_get_video_cache_key(element))
             )
 
         # A video might contain other videos, so let's "crawl" the video first
@@ -558,58 +562,69 @@ instance's greatest bottleneck.
         # to ensure backwards compatibility.
         maybe_dl = await self.download(element_path, mtime=element.mtime, redownload=Redownload.ALWAYS)
 
-        # If we do not want to crawl it (user filter) or we have every file
-        # from the cached mapping already, we can ignore this and bail
-        if not maybe_dl or self._all_opencast_videos_locally_present(element_path):
-            # Mark all existing videos as known so they do not get deleted
-            # during cleanup. We "downloaded" them, just without actually making
-            # a network request as we assumed they did not change.
-            for video in self._previous_contained_opencast_videos(element_path):
-                await self.download(video)
+        # If we do not want to crawl it (user filter), we can move on
+        if not maybe_dl:
+            return None
+
+        # If we have every file from the cached mapping already, we can ignore this and bail
+        if self._all_opencast_videos_locally_present(element, maybe_dl.path):
+            # Mark all existing videos as known to ensure they do not get deleted during cleanup.
+            # We "downloaded" them, just without actually making a network request as we assumed
+            # they did not change.
+            contained = self._previous_contained_opencast_videos(element, maybe_dl.path)
+            if len(contained) > 1:
+                # Only do this if we threw away the original dl token,
+                # to not download single-stream videos twice
+                for video in contained:
+                    await self.download(video)
 
             return None
 
-        return self._download_opencast_video(element_path, element, maybe_dl)
+        return self._download_opencast_video(element, maybe_dl)
 
-    def _previous_contained_opencast_videos(self, video_path: PurePath) -> List[PurePath]:
+    def _previous_contained_opencast_videos(
+        self, element: IliasPageElement, element_path: PurePath
+    ) -> List[PurePath]:
         if not self.prev_report:
             return []
-        custom_value = self.prev_report.get_custom_value(str(video_path))
+        custom_value = self.prev_report.get_custom_value(_get_video_cache_key(element))
         if not custom_value:
             return []
-        names = cast(List[str], custom_value)
-        folder = video_path.parent
-        return [PurePath(folder, name) for name in names]
+        cached_value = cast(dict[str, Any], custom_value)
+        if "known_paths" not in cached_value or "own_path" not in cached_value:
+            log.explain(f"'known_paths' or 'own_path' missing from cached value: {cached_value}")
+            return []
+        transformed_own_path = self._transformer.transform(element_path)
+        if cached_value["own_path"] != str(transformed_own_path):
+            log.explain(
+                f"own_path '{transformed_own_path}' does not match cached value: '{cached_value['own_path']}"
+            )
+            return []
+        return [PurePath(name) for name in cached_value["known_paths"]]
 
-    def _all_opencast_videos_locally_present(self, video_path: PurePath) -> bool:
-        if contained_videos := self._previous_contained_opencast_videos(video_path):
-            log.explain_topic(f"Checking local cache for video {video_path.name}")
-            all_found_locally = True
-            for video in contained_videos:
-                transformed_path = self._to_local_opencast_video_path(video)
-                if transformed_path:
-                    exists_locally = self._output_dir.resolve(transformed_path).exists()
-                    all_found_locally = all_found_locally and exists_locally
-            if all_found_locally:
-                log.explain("Found all videos locally, skipping enumeration request")
+    def _all_opencast_videos_locally_present(self, element: IliasPageElement, element_path: PurePath) -> bool:
+        log.explain_topic(f"Checking local cache for video {fmt_path(element_path)}")
+        if contained_videos := self._previous_contained_opencast_videos(element, element_path):
+            log.explain(
+                f"The following contained videos are known: {','.join(map(fmt_path, contained_videos))}"
+            )
+            if all(self._output_dir.resolve(path).exists() for path in contained_videos):
+                log.explain("Found all known videos locally, skipping enumeration request")
                 return True
             log.explain("Missing at least one video, continuing with requests!")
+        else:
+            log.explain("No local cache present")
         return False
 
-    def _to_local_opencast_video_path(self, path: PurePath) -> Optional[PurePath]:
-        if transformed := self._transformer.transform(path):
-            return self._deduplicator.fixup_path(transformed)
-        return None
-
     @anoncritical
     @_iorepeat(3, "downloading video")
-    async def _download_opencast_video(
-        self,
-        original_path: PurePath,
-        element: IliasPageElement,
-        dl: DownloadToken
-    ) -> None:
-        stream_elements: List[IliasPageElement] = []
+    async def _download_opencast_video(self, element: IliasPageElement, dl: DownloadToken) -> None:
+        def add_to_report(paths: list[str]) -> None:
+            self.report.add_custom_value(
+                _get_video_cache_key(element),
+                {"known_paths": paths, "own_path": str(self._transformer.transform(dl.path))}
+            )
+
         async with dl as (bar, sink):
             page = IliasPage(await self._get_page(element.url), element.url, element)
             stream_elements = page.get_child_elements()
@@ -620,32 +635,25 @@ instance's greatest bottleneck.
                 log.explain(f"Using single video mode for {element.name}")
                 stream_element = stream_elements[0]
 
-                transformed_path = self._to_local_opencast_video_path(original_path)
-                if not transformed_path:
-                    raise CrawlError(f"Download returned a path but transform did not for {original_path}")
-
                 # We do not have a local cache yet
-                if self._output_dir.resolve(transformed_path).exists():
-                    log.explain(f"Video for {element.name} existed locally")
-                else:
-                    await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
-                self.report.add_custom_value(str(original_path), [original_path.name])
+                await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
+                add_to_report([str(self._transformer.transform(dl.path))])
                 return
 
         contained_video_paths: List[str] = []
 
         for stream_element in stream_elements:
-            video_path = original_path.parent / stream_element.name
-            contained_video_paths.append(str(video_path))
+            video_path = dl.path.parent / stream_element.name
 
             maybe_dl = await self.download(video_path, mtime=element.mtime, redownload=Redownload.NEVER)
             if not maybe_dl:
                 continue
             async with maybe_dl as (bar, sink):
                 log.explain(f"Streaming video from real url {stream_element.url}")
+                contained_video_paths.append(str(self._transformer.transform(maybe_dl.path)))
                 await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
 
-        self.report.add_custom_value(str(original_path), contained_video_paths)
+        add_to_report(contained_video_paths)
 
     async def _handle_file(
         self,
@@ -657,8 +665,8 @@ instance's greatest bottleneck.
             return None
         return self._download_file(element, maybe_dl)
 
-    @anoncritical
     @_iorepeat(3, "downloading file")
+    @anoncritical
     async def _download_file(self, element: IliasPageElement, dl: DownloadToken) -> None:
         assert dl  # The function is only reached when dl is not None
         async with dl as (bar, sink):
@@ -728,7 +736,6 @@ instance's greatest bottleneck.
                 raise CrawlWarning("Failed to extract forum data")
             if download_data.empty:
                 log.explain("Forum had no threads")
-                elements = []
                 return
             html = await self._post_authenticated(download_data.url, download_data.form_data)
             elements = parse_ilias_forum_export(soupify(html))
@@ -962,7 +969,7 @@ instance's greatest bottleneck.
 
     # We repeat this as the login method in shibboleth doesn't handle I/O errors.
     # Shibboleth is quite reliable as well, the repeat is likely not critical here.
-    @ _iorepeat(3, "Login", failure_is_error=True)
+    @_iorepeat(3, "Login", failure_is_error=True)
     async def _authenticate(self) -> None:
         await self._shibboleth_login.login(self.session)
 
@@ -1112,7 +1119,7 @@ async def _shib_post(
         async with session.get(correct_url, allow_redirects=False) as response:
             location = response.headers.get("location")
             log.explain(f"Redirected to {location!r} with status {response.status}")
-            # If shib still still has a valid session, it will directly respond to the request
+            # If shib still has a valid session, it will directly respond to the request
             if location is None:
                 log.explain("Shib recognized us, returning its response directly")
                 return soupify(await response.read())

From ab0cb2d956129c51b67e4573da7c5e95372e9f5f Mon Sep 17 00:00:00 2001
From: TornaxO7 <tornax@proton.me>
Date: Tue, 27 Feb 2024 23:39:53 +0100
Subject: [PATCH 132/224] nix: bump nixpgs dependency

---
 flake.lock | 8 ++++----
 flake.nix  | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/flake.lock b/flake.lock
index 1655107..6428667 100644
--- a/flake.lock
+++ b/flake.lock
@@ -2,16 +2,16 @@
   "nodes": {
     "nixpkgs": {
       "locked": {
-        "lastModified": 1694499547,
-        "narHash": "sha256-R7xMz1Iia6JthWRHDn36s/E248WB1/je62ovC/dUVKI=",
+        "lastModified": 1708979614,
+        "narHash": "sha256-FWLWmYojIg6TeqxSnHkKpHu5SGnFP5um1uUjH+wRV6g=",
         "owner": "NixOS",
         "repo": "nixpkgs",
-        "rev": "e5f018cf150e29aac26c61dac0790ea023c46b24",
+        "rev": "b7ee09cf5614b02d289cd86fcfa6f24d4e078c2a",
         "type": "github"
       },
       "original": {
         "owner": "NixOS",
-        "ref": "nixos-23.05",
+        "ref": "nixos-23.11",
         "repo": "nixpkgs",
         "type": "github"
       }
diff --git a/flake.nix b/flake.nix
index e3d52af..4fc47b2 100644
--- a/flake.nix
+++ b/flake.nix
@@ -2,7 +2,7 @@
   description = "Tool for downloading course-related files from ILIAS";
 
   inputs = {
-    nixpkgs.url = "github:NixOS/nixpkgs/nixos-23.05";
+    nixpkgs.url = "github:NixOS/nixpkgs/nixos-23.11";
   };
 
   outputs = { self, nixpkgs }:

From eb0c956d32b9181c46d0ca8ce4f5d3f871e2c1df Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Fri, 5 Apr 2024 19:06:54 +0200
Subject: [PATCH 133/224] Add compatibility with ILIAS 8

---
 CHANGELOG.md                               |  1 +
 PFERD/crawl/ilias/kit_ilias_html.py        | 50 ++++++++++++----------
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 35 +++++++--------
 3 files changed, 46 insertions(+), 40 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0443d50..df4fcf5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ ambiguous situations.
 
 ### Fixed
 - Video name deduplication
+- Compatibility with ILIAS 8
 
 ## 3.5.0 - 2023-09-13
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index d23141f..0be6448 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -17,7 +17,7 @@ TargetType = Union[str, int]
 class IliasElementType(Enum):
     EXERCISE = "exercise"
     EXERCISE_FILES = "exercise_files"  # own submitted files
-    TEST = "test"                      # an online test. Will be ignored currently.
+    TEST = "test"  # an online test. Will be ignored currently.
     FILE = "file"
     FOLDER = "folder"
     FORUM = "forum"
@@ -95,13 +95,9 @@ class IliasPage:
 
     @staticmethod
     def is_root_page(soup: BeautifulSoup) -> bool:
-        permalink = soup.find(id="current_perma_link")
-        if permalink is None:
-            return False
-        value = permalink.attrs.get("value")
-        if value is None:
-            return False
-        return "goto.php?target=root_" in value
+        if permalink := IliasPage.get_soup_permalink(soup):
+            return "goto.php?target=root_" in permalink
+        return False
 
     def get_child_elements(self) -> List[IliasPageElement]:
         """
@@ -279,16 +275,14 @@ class IliasPage:
         return self._soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x})
 
     def _is_content_page(self) -> bool:
-        link = self._soup.find(id="current_perma_link")
-        if not link:
-            return False
-        return "target=copa_" in link.get("value")
+        if link := self.get_permalink():
+            return "target=copa_" in link
+        return False
 
     def _is_learning_module_page(self) -> bool:
-        link = self._soup.find(id="current_perma_link")
-        if not link:
-            return False
-        return "target=pg_" in link.get("value")
+        if link := self.get_permalink():
+            return "target=pg_" in link
+        return False
 
     def _contains_collapsed_future_meetings(self) -> bool:
         return self._uncollapse_future_meetings_url() is not None
@@ -513,8 +507,8 @@ class IliasPage:
             modification_string = link.parent.parent.parent.select_one(
                 f"td.std:nth-child({index})"
             ).getText().strip()
-            if re.search(r"\d+\.\d+.\d+ - \d+:\d+", modification_string):
-                modification_time = datetime.strptime(modification_string, "%d.%m.%Y - %H:%M")
+            if match := re.search(r"\d+\.\d+.\d+ \d+:\d+", modification_string):
+                modification_time = datetime.strptime(match.group(0), "%d.%m.%Y %H:%M")
                 break
 
         if modification_time is None:
@@ -613,7 +607,7 @@ class IliasPage:
             file_listings: List[Tag] = container.findAll(
                 name="a",
                 # download links contain the given command class
-                attrs={"href": lambda x: x and "cmdClass=ilexsubmissionfilegui" in x}
+                attrs={"href": lambda x: x and "cmdclass=ilexsubmissionfilegui" in x.lower()}
             )
 
             # Add each listing as a new
@@ -917,9 +911,9 @@ class IliasPage:
 
     @staticmethod
     def _find_type_from_link(
-            element_name: str,
-            link_element: Tag,
-            url: str
+        element_name: str,
+        link_element: Tag,
+        url: str
     ) -> Optional[IliasElementType]:
         """
         Decides which sub crawler to use for a given top level element.
@@ -1095,6 +1089,9 @@ class IliasPage:
             return True
         return False
 
+    def get_permalink(self) -> Optional[str]:
+        return IliasPage.get_soup_permalink(self._soup)
+
     def _abs_url_from_link(self, link_tag: Tag) -> str:
         """
         Create an absolute url from an <a> tag.
@@ -1107,6 +1104,13 @@ class IliasPage:
         """
         return urljoin(self._page_url, relative_url)
 
+    @staticmethod
+    def get_soup_permalink(soup: BeautifulSoup) -> Optional[str]:
+        perma_link_element: Tag = soup.select_one(".il-footer-permanent-url > a")
+        if not perma_link_element or not perma_link_element.get("href"):
+            return None
+        return perma_link_element.get("href")
+
 
 def _unexpected_html_warning() -> None:
     log.warn("Encountered unexpected HTML structure, ignoring element.")
@@ -1130,7 +1134,7 @@ def demangle_date(date_str: str, fail_silently: bool = False) -> Optional[dateti
 
         date_str = re.sub("Gestern|Yesterday", _format_date_english(_yesterday()), date_str, re.I)
         date_str = re.sub("Heute|Today", _format_date_english(date.today()), date_str, re.I)
-        date_str = re.sub("Morgen|Tomorrow",  _format_date_english(_tomorrow()), date_str, re.I)
+        date_str = re.sub("Morgen|Tomorrow", _format_date_english(_tomorrow()), date_str, re.I)
         date_str = date_str.strip()
         for german, english in zip(german_months, english_months):
             date_str = date_str.replace(german, english)
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index ac1f10d..52de793 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -12,17 +12,17 @@ import yarl
 from aiohttp import hdrs
 from bs4 import BeautifulSoup, Tag
 
+from .file_templates import Links, learning_module_template
+from .ilias_html_cleaner import clean, insert_base_markup
+from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
+                             IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
+from ..crawler import AWrapped, CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
+from ..http_crawler import HttpCrawler, HttpCrawlerSection
 from ...auth import Authenticator, TfaAuthenticator
 from ...config import Config
 from ...logging import ProgressBar, log
 from ...output_dir import FileSink, Redownload
 from ...utils import fmt_path, soupify, url_set_query_param
-from ..crawler import AWrapped, CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
-from ..http_crawler import HttpCrawler, HttpCrawlerSection
-from .file_templates import Links, learning_module_template
-from .ilias_html_cleaner import clean, insert_base_markup
-from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
-                             IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
 
 TargetType = Union[str, int]
 
@@ -130,6 +130,7 @@ def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Calla
             raise CrawlError("Impossible return in ilias _iorepeat")
 
         return wrapper  # type: ignore
+
     return decorator
 
 
@@ -177,11 +178,11 @@ def _get_video_cache_key(element: IliasPageElement) -> str:
 
 class KitIliasWebCrawler(HttpCrawler):
     def __init__(
-            self,
-            name: str,
-            section: KitIliasWebCrawlerSection,
-            config: Config,
-            authenticators: Dict[str, Authenticator]
+        self,
+        name: str,
+        section: KitIliasWebCrawlerSection,
+        config: Config,
+        authenticators: Dict[str, Authenticator]
     ):
         # Setting a main authenticator for cookie sharing
         auth = section.auth(authenticators)
@@ -253,8 +254,8 @@ instance's greatest bottleneck.
                     soup = await self._get_page(next_stage_url, root_page_allowed=True)
 
                     if current_parent is None and expected_id is not None:
-                        perma_link_element: Tag = soup.find(id="current_perma_link")
-                        if not perma_link_element or "crs_" not in perma_link_element.get("value"):
+                        perma_link = IliasPage.get_soup_permalink(soup)
+                        if not perma_link or "crs_" not in perma_link:
                             raise CrawlError("Invalid course id? Didn't find anything looking like a course")
 
                     log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
@@ -677,7 +678,7 @@ instance's greatest bottleneck.
             async with self.session.get(url, allow_redirects=is_video) as resp:
                 if not is_video:
                     # Redirect means we weren't authenticated
-                    if hdrs.LOCATION in resp.headers:
+                    if hdrs.LOCATION in resp.headers and "&cmd=sendfile" not in resp.headers[hdrs.LOCATION]:
                         return False
                 # we wanted a video but got HTML
                 if is_video and "html" in resp.content_type:
@@ -1052,9 +1053,9 @@ class KitShibbolethLogin:
         await sess.post(url, data=data)
 
     async def _authenticate_tfa(
-            self,
-            session: aiohttp.ClientSession,
-            soup: BeautifulSoup
+        self,
+        session: aiohttp.ClientSession,
+        soup: BeautifulSoup
     ) -> BeautifulSoup:
         if not self._tfa_auth:
             self._tfa_auth = TfaAuthenticator("ilias-anon-tfa")

From c1b592ac2930c1ced40dd7282ae8bca4d1b6109d Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 8 Apr 2024 17:52:13 +0200
Subject: [PATCH 134/224] Fix ILIAS 8 file downloads truncating to zero bytes

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 40 +++++++++++++++-------
 1 file changed, 28 insertions(+), 12 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 52de793..7d6b309 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -12,17 +12,17 @@ import yarl
 from aiohttp import hdrs
 from bs4 import BeautifulSoup, Tag
 
-from .file_templates import Links, learning_module_template
-from .ilias_html_cleaner import clean, insert_base_markup
-from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
-                             IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
-from ..crawler import AWrapped, CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
-from ..http_crawler import HttpCrawler, HttpCrawlerSection
 from ...auth import Authenticator, TfaAuthenticator
 from ...config import Config
 from ...logging import ProgressBar, log
 from ...output_dir import FileSink, Redownload
 from ...utils import fmt_path, soupify, url_set_query_param
+from ..crawler import AWrapped, CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
+from ..http_crawler import HttpCrawler, HttpCrawlerSection
+from .file_templates import Links, learning_module_template
+from .ilias_html_cleaner import clean, insert_base_markup
+from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
+                             IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
 
 TargetType = Union[str, int]
 
@@ -675,12 +675,28 @@ instance's greatest bottleneck.
 
     async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar, is_video: bool) -> None:
         async def try_stream() -> bool:
-            async with self.session.get(url, allow_redirects=is_video) as resp:
-                if not is_video:
-                    # Redirect means we weren't authenticated
-                    if hdrs.LOCATION in resp.headers and "&cmd=sendfile" not in resp.headers[hdrs.LOCATION]:
-                        return False
-                # we wanted a video but got HTML
+            next_url = url
+
+            # Normal files redirect to the magazine if we are not authenticated. As files could be HTML,
+            # we can not match on the content type here. Instead, we disallow redirects and inspect the
+            # new location. If we are redirected anywhere but the ILIAS 8 "sendfile" command, we assume
+            # our authentication expired.
+            if not is_video:
+                async with self.session.get(url, allow_redirects=False) as resp:
+                    # Redirect to anything except a "sendfile" means we weren't authenticated
+                    if hdrs.LOCATION in resp.headers:
+                        if "&cmd=sendfile" not in resp.headers[hdrs.LOCATION]:
+                            return False
+                        # Directly follow the redirect to not make a second, unnecessary request
+                        next_url = resp.headers[hdrs.LOCATION]
+
+            # Let's try this again and follow redirects
+            return await fetch_follow_redirects(next_url)
+
+        async def fetch_follow_redirects(file_url: str) -> bool:
+            async with self.session.get(file_url) as resp:
+                # We wanted a video but got HTML => Forbidden, auth expired. Logging in won't really
+                # solve that depending on the setup, but it is better than nothing.
                 if is_video and "html" in resp.content_type:
                     return False
 

From da627ff929abb3a1a3dff58ec46f29025e16c96b Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 9 Apr 2024 14:28:56 +0200
Subject: [PATCH 135/224] Bump version to 3.5.1

---
 CHANGELOG.md     | 6 +++++-
 PFERD/version.py | 2 +-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index df4fcf5..a76508e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,9 +22,13 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.5.1 - 2024-04-09
+
+### Added
+- Support for ILIAS 8
+
 ### Fixed
 - Video name deduplication
-- Compatibility with ILIAS 8
 
 ## 3.5.0 - 2023-09-13
 
diff --git a/PFERD/version.py b/PFERD/version.py
index 5ee464d..3f27494 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.5.0"
+VERSION = "3.5.1"

From 1cbc2b717a76751725f776483b611bd6b43525cf Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 10 Apr 2024 01:01:59 +0200
Subject: [PATCH 136/224] Fix personal desktop crawling with ILIAS 8

---
 CHANGELOG.md                               | 3 +++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a76508e..36768b0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Fixed
+- Crawling of personal desktop with ILIAS 8
+
 ## 3.5.1 - 2024-04-09
 
 ### Added
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 7d6b309..371ffb3 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -228,7 +228,7 @@ instance's greatest bottleneck.
         await self._crawl_url(root_url, expected_id=course_id)
 
     async def _crawl_desktop(self) -> None:
-        appendix = r"ILIAS\PersonalDesktop\PDMainBarProvider|mm_pd_sel_items"
+        appendix = r"ILIAS\Repository\Provider\RepositoryMainBarProvider|mm_pd_sel_items"
         appendix = appendix.encode("ASCII").hex()
         await self._crawl_url(self._base_url + "/gs_content.php?item=" + appendix)
 

From 4a5959fd58d9c063ea9a37089d0aaa01c23544bc Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 10 Apr 2024 11:12:48 +0200
Subject: [PATCH 137/224] Fix personal desktop crawling without favorites

---
 CHANGELOG.md                        | 1 +
 PFERD/crawl/ilias/kit_ilias_html.py | 8 ++++++++
 2 files changed, 9 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 36768b0..5212824 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ ambiguous situations.
 
 ### Fixed
 - Crawling of personal desktop with ILIAS 8
+- Crawling of empty personal desktops
 
 ## 3.5.1 - 2024-04-09
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 0be6448..aa00a87 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -1074,6 +1074,14 @@ class IliasPage:
         if soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x}):
             return True
 
+        # Empty personal desktop has zero (0) markers. Match on the text...
+        if alert := soup.select_one(".alert-info"):
+            text = alert.getText().lower()
+            if "you have not yet selected any favourites" in text:
+                return True
+            if "sie haben aktuell noch keine favoriten ausgewählt" in text:
+                return True
+
         # Video listing embeds do not have complete ILIAS html. Try to match them by
         # their video listing table
         video_table = soup.find(

From 3db186a9782e22cf1cd45b8d343b5cfa5124eb25 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 10 Apr 2024 11:12:55 +0200
Subject: [PATCH 138/224] Fix personal desktop crawling HTML warnings

---
 PFERD/crawl/ilias/kit_ilias_html.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index aa00a87..4cfec9b 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -378,6 +378,10 @@ class IliasPage:
             name = _sanitize_path_name(link.text.strip())
             url = self._abs_url_from_link(link)
 
+            if "cmd=manage" in url and "cmdClass=ilPDSelectedItemsBlockGUI" in url:
+                # Configure button/link does not have anything interesting
+                continue
+
             type = self._find_type_from_link(name, link, url)
             if not type:
                 _unexpected_html_warning()

From eb01aa86cbad96dd3a6dba86b92b73fdefd86eb0 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 14 Apr 2024 12:10:17 +0200
Subject: [PATCH 139/224] Bump version to 3.5.2

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5212824..e404d1d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.5.2 - 2024-04-14
+
 ### Fixed
 - Crawling of personal desktop with ILIAS 8
 - Crawling of empty personal desktops
diff --git a/PFERD/version.py b/PFERD/version.py
index 3f27494..47da4a6 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.5.1"
+VERSION = "3.5.2"

From bbcfe9c8dd5383463b4415d78e0a10ca8458b34d Mon Sep 17 00:00:00 2001
From: Florian Raith <37345813+florianraith@users.noreply.github.com>
Date: Fri, 19 Apr 2024 16:52:18 +0200
Subject: [PATCH 140/224] Fix typo in CONFIG.md (#89)

---
 CONFIG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CONFIG.md b/CONFIG.md
index 5f62749..25496e0 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -146,7 +146,7 @@ crawler simulate a slower, network-based crawler.
 
 This crawler crawls a KIT-IPD page by url. The root page can be crawled from
 outside the KIT network so you will be informed about any new/deleted files,
-but downloading files requires you to be within. Adding a show delay between
+but downloading files requires you to be within. Adding a short delay between
 requests is likely a good idea.
 
 - `target`: URL to a KIT-IPD page

From 3e831c7e23e9214e2cbbaf04709c153ee1fcb893 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 24 Apr 2024 22:32:26 +0200
Subject: [PATCH 141/224] Fix normalization of meeting names in cards

---
 CHANGELOG.md                        |   3 +
 PFERD/crawl/ilias/kit_ilias_html.py | 119 ++++++++++++++++------------
 2 files changed, 70 insertions(+), 52 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e404d1d..f244a9b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Fixed
+- Normalization of meeting names in cards
+
 ## 3.5.2 - 2024-04-14
 
 ### Fixed
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 4cfec9b..866f7c0 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -61,6 +61,47 @@ class IliasPageElement:
         log.warn(f"Didn't find identity for {self.name} - {self.url}. Please report this.")
         return self.url
 
+    @staticmethod
+    def create_new(
+        typ: IliasElementType,
+        url: str,
+        name: str,
+        mtime: Optional[datetime] = None,
+        description: Optional[str] = None
+    ) -> 'IliasPageElement':
+        if typ == IliasElementType.MEETING:
+            normalized = _sanitize_path_name(IliasPageElement._normalize_meeting_name(name))
+            log.explain(f"Normalized meeting name from {name!r} to {normalized!r}")
+            name = normalized
+        return IliasPageElement(typ, url, name, mtime, description)
+
+    @staticmethod
+    def _normalize_meeting_name(meeting_name: str) -> str:
+        """
+        Normalizes meeting names, which have a relative time as their first part,
+        to their date in ISO format.
+        """
+
+        # This checks whether we can reach a `:` without passing a `-`
+        if re.search(r"^[^-]+: ", meeting_name):
+            # Meeting name only contains date: "05. Jan 2000:"
+            split_delimiter = ":"
+        else:
+            # Meeting name contains date and start/end times: "05. Jan 2000, 16:00 - 17:30:"
+            split_delimiter = ", "
+
+        # We have a meeting day without time
+        date_portion_str = meeting_name.split(split_delimiter)[0]
+        date_portion = demangle_date(date_portion_str)
+
+        # We failed to parse the date, bail out
+        if not date_portion:
+            return meeting_name
+
+        # Replace the first section with the absolute date
+        rest_of_name = split_delimiter.join(meeting_name.split(split_delimiter)[1:])
+        return datetime.strftime(date_portion, "%Y-%m-%d") + split_delimiter + rest_of_name
+
 
 @dataclass
 class IliasDownloadForumData:
@@ -130,7 +171,7 @@ class IliasPage:
             attrs={"href": lambda x: x and "cmdClass=ilinfoscreengui" in x}
         )
         if tab is not None:
-            return IliasPageElement(
+            return IliasPageElement.create_new(
                 IliasElementType.INFO_TAB,
                 self._abs_url_from_link(tab),
                 "infos"
@@ -295,7 +336,7 @@ class IliasPage:
         if not element:
             return None
         link = self._abs_url_from_link(element)
-        return IliasPageElement(IliasElementType.FOLDER, link, "show all meetings")
+        return IliasPageElement.create_new(IliasElementType.FOLDER, link, "show all meetings")
 
     def _is_content_tab_selected(self) -> bool:
         return self._select_content_page_url() is None
@@ -315,7 +356,7 @@ class IliasPage:
         link = tab.find("a")
         if link:
             link = self._abs_url_from_link(link)
-            return IliasPageElement(IliasElementType.FOLDER, link, "select content page")
+            return IliasPageElement.create_new(IliasElementType.FOLDER, link, "select content page")
 
         _unexpected_html_warning()
         log.warn_contd(f"Could not find content tab URL on {self._page_url!r}.")
@@ -345,14 +386,16 @@ class IliasPage:
         # and just fetch the lone video url!
         if len(streams) == 1:
             video_url = streams[0]["sources"]["mp4"][0]["src"]
-            return [IliasPageElement(IliasElementType.OPENCAST_VIDEO, video_url, self._source_name)]
+            return [
+                IliasPageElement.create_new(IliasElementType.OPENCAST_VIDEO, video_url, self._source_name)
+            ]
 
         log.explain(f"Found multiple videos for stream at {self._source_name}")
         items = []
         for stream in sorted(streams, key=lambda stream: stream["content"]):
             full_name = f"{self._source_name.replace('.mp4', '')} ({stream['content']}).mp4"
             video_url = stream["sources"]["mp4"][0]["src"]
-            items.append(IliasPageElement(IliasElementType.OPENCAST_VIDEO, video_url, full_name))
+            items.append(IliasPageElement.create_new(IliasElementType.OPENCAST_VIDEO, video_url, full_name))
 
         return items
 
@@ -367,7 +410,7 @@ class IliasPage:
 
         link = self._abs_url_from_link(correct_link)
 
-        return IliasPageElement(IliasElementType.FORUM, link, "show all forum threads")
+        return IliasPageElement.create_new(IliasElementType.FORUM, link, "show all forum threads")
 
     def _find_personal_desktop_entries(self) -> List[IliasPageElement]:
         items: List[IliasPageElement] = []
@@ -394,7 +437,7 @@ class IliasPage:
                 url = re.sub(r"(target=file_\d+)", r"\1_download", url)
                 log.explain("Rewired file URL to include download part")
 
-            items.append(IliasPageElement(type, url, name))
+            items.append(IliasPageElement.create_new(type, url, name))
 
         return items
 
@@ -412,7 +455,7 @@ class IliasPage:
                 log.warn_contd(f"Found unknown content page item {name!r} with url {url!r}")
                 continue
 
-            items.append(IliasPageElement(IliasElementType.FILE, url, name))
+            items.append(IliasPageElement.create_new(IliasElementType.FILE, url, name))
 
         return items
 
@@ -425,7 +468,7 @@ class IliasPage:
                 continue
             if "cmd=sendfile" not in link["href"]:
                 continue
-            items.append(IliasPageElement(
+            items.append(IliasPageElement.create_new(
                 IliasElementType.FILE,
                 self._abs_url_from_link(link),
                 _sanitize_path_name(link.getText())
@@ -453,7 +496,9 @@ class IliasPage:
             query_params = {"limit": "800", "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
             url = url_set_query_params(url, query_params)
             log.explain("Found ILIAS video frame page, fetching actual content next")
-            return [IliasPageElement(IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED, url, "")]
+            return [
+                IliasPageElement.create_new(IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED, url, "")
+            ]
 
         is_paginated = self._soup.find(id=re.compile(r"tab_page_sel.+")) is not None
 
@@ -482,7 +527,7 @@ class IliasPage:
         url = url_set_query_params(self._page_url, query_params)
 
         log.explain("Disabled pagination, retrying folder as a new entry")
-        return [IliasPageElement(IliasElementType.OPENCAST_VIDEO_FOLDER, url, "")]
+        return [IliasPageElement.create_new(IliasElementType.OPENCAST_VIDEO_FOLDER, url, "")]
 
     def _find_opencast_video_entries_no_paging(self) -> List[IliasPageElement]:
         """
@@ -527,7 +572,7 @@ class IliasPage:
         video_url = self._abs_url_from_link(link)
 
         log.explain(f"Found video {video_name!r} at {video_url}")
-        return IliasPageElement(
+        return IliasPageElement.create_new(
             IliasElementType.OPENCAST_VIDEO_PLAYER, video_url, video_name, modification_time
         )
 
@@ -563,7 +608,7 @@ class IliasPage:
             if date is None:
                 log.warn(f"Date parsing failed for exercise entry {name!r}")
 
-            results.append(IliasPageElement(
+            results.append(IliasPageElement.create_new(
                 IliasElementType.FILE,
                 self._abs_url_from_link(link),
                 name,
@@ -600,7 +645,7 @@ class IliasPage:
                 url = self._abs_url_from_link(file_link)
 
                 log.explain(f"Found exercise entry {file_name!r}")
-                results.append(IliasPageElement(
+                results.append(IliasPageElement.create_new(
                     IliasElementType.FILE,
                     url,
                     container_name + "/" + file_name,
@@ -625,7 +670,7 @@ class IliasPage:
                 file_name = _sanitize_path_name(label_container.getText().strip())
                 url = self._abs_url_from_link(listing)
                 log.explain(f"Found exercise detail {file_name!r} at {url}")
-                results.append(IliasPageElement(
+                results.append(IliasPageElement.create_new(
                     IliasElementType.EXERCISE_FILES,
                     url,
                     container_name + "/" + file_name,
@@ -660,16 +705,13 @@ class IliasPage:
 
             if not element_type:
                 continue
-            if element_type == IliasElementType.MEETING:
-                normalized = _sanitize_path_name(self._normalize_meeting_name(element_name))
-                log.explain(f"Normalized meeting name from {element_name!r} to {normalized!r}")
-                element_name = normalized
             elif element_type == IliasElementType.FILE:
                 result.append(self._file_to_element(element_name, abs_url, link))
                 continue
 
             log.explain(f"Found {element_name!r}")
-            result.append(IliasPageElement(element_type, abs_url, element_name, description=description))
+            result.append(IliasPageElement.create_new(
+                element_type, abs_url, element_name, description=description))
 
         result += self._find_cards()
         result += self._find_mediacast_videos()
@@ -692,8 +734,8 @@ class IliasPage:
                 log.warn_contd(f"No <video> element found for mediacast video '{element_name}'")
                 continue
 
-            videos.append(IliasPageElement(
-                type=IliasElementType.MEDIACAST_VIDEO,
+            videos.append(IliasPageElement.create_new(
+                typ=IliasElementType.MEDIACAST_VIDEO,
                 url=self._abs_url_from_relative(video_element.get("src")),
                 name=element_name,
                 mtime=self._find_mediacast_video_mtime(elem.findParent(name="td"))
@@ -815,7 +857,7 @@ class IliasPage:
         full_path = name + "." + file_type
 
         log.explain(f"Found file {full_path!r}")
-        return IliasPageElement(IliasElementType.FILE, url, full_path, modification_date)
+        return IliasPageElement.create_new(IliasElementType.FILE, url, full_path, modification_date)
 
     def _find_cards(self) -> List[IliasPageElement]:
         result: List[IliasPageElement] = []
@@ -832,7 +874,7 @@ class IliasPage:
                 log.warn_contd(f"Could not extract type for {title}")
                 continue
 
-            result.append(IliasPageElement(type, url, name))
+            result.append(IliasPageElement.create_new(type, url, name))
 
         card_button_tiles: List[Tag] = self._soup.select(".card-title button")
 
@@ -861,7 +903,7 @@ class IliasPage:
                 log.warn_contd(f"Could not extract type for {button}")
                 continue
 
-            result.append(IliasPageElement(type, url, name, description=description))
+            result.append(IliasPageElement.create_new(type, url, name, description=description))
 
         return result
 
@@ -1038,33 +1080,6 @@ class IliasPage:
 
         return IliasElementType.FOLDER
 
-    @staticmethod
-    def _normalize_meeting_name(meeting_name: str) -> str:
-        """
-        Normalizes meeting names, which have a relative time as their first part,
-        to their date in ISO format.
-        """
-
-        # This checks whether we can reach a `:` without passing a `-`
-        if re.search(r"^[^-]+: ", meeting_name):
-            # Meeting name only contains date: "05. Jan 2000:"
-            split_delimiter = ":"
-        else:
-            # Meeting name contains date and start/end times: "05. Jan 2000, 16:00 - 17:30:"
-            split_delimiter = ", "
-
-        # We have a meeting day without time
-        date_portion_str = meeting_name.split(split_delimiter)[0]
-        date_portion = demangle_date(date_portion_str)
-
-        # We failed to parse the date, bail out
-        if not date_portion:
-            return meeting_name
-
-        # Replace the first section with the absolute date
-        rest_of_name = split_delimiter.join(meeting_name.split(split_delimiter)[1:])
-        return datetime.strftime(date_portion, "%Y-%m-%d") + split_delimiter + rest_of_name
-
     @staticmethod
     def is_logged_in(soup: BeautifulSoup) -> bool:
         # Normal ILIAS pages

From fc1f68ccd9a18f939b06908e32725d3ee70bc7ee Mon Sep 17 00:00:00 2001
From: Philipp Fruck <dev@p-fruck.de>
Date: Sat, 4 May 2024 17:44:18 +0200
Subject: [PATCH 142/224] refactor: Separate generic and KIT ilias functions

---
 PFERD/crawl/ilias/ilias_web_crawler.py     | 931 ++++++++++++++++++++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 958 +--------------------
 PFERD/utils.py                             |  36 +
 3 files changed, 977 insertions(+), 948 deletions(-)
 create mode 100644 PFERD/crawl/ilias/ilias_web_crawler.py

diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
new file mode 100644
index 0000000..ba7d564
--- /dev/null
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -0,0 +1,931 @@
+import asyncio
+import base64
+import os
+import re
+from collections.abc import Awaitable, Coroutine
+from pathlib import PurePath
+from typing import Any, Dict, List, Literal, Optional, Set, Union, cast
+from urllib.parse import urljoin
+
+import aiohttp
+from aiohttp import hdrs
+from bs4 import BeautifulSoup, Tag
+
+from ...auth import Authenticator
+from ...config import Config
+from ...logging import ProgressBar, log
+from ...output_dir import FileSink, Redownload
+from ...utils import _iorepeat, fmt_path, soupify, url_set_query_param
+from ..crawler import CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
+from ..http_crawler import HttpCrawler, HttpCrawlerSection
+from .file_templates import Links, learning_module_template
+from .ilias_html_cleaner import clean, insert_base_markup
+from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
+                             IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
+
+TargetType = Union[str, int]
+_ILIAS_URL = "https://ilias.studium.kit.edu"
+
+
+class IliasWebCrawlerSection(HttpCrawlerSection):
+    def target(self) -> TargetType:
+        target = self.s.get("target")
+        if not target:
+            self.missing_value("target")
+
+        if re.fullmatch(r"\d+", target):
+            # Course id
+            return int(target)
+        if target == "desktop":
+            # Full personal desktop
+            return target
+        if target.startswith(_ILIAS_URL):
+            # ILIAS URL
+            return target
+
+        self.invalid_value("target", target, "Should be <course id | desktop | kit ilias URL>")
+
+    def links(self) -> Links:
+        type_str: Optional[str] = self.s.get("links")
+
+        if type_str is None:
+            return Links.FANCY
+
+        try:
+            return Links.from_string(type_str)
+        except ValueError as e:
+            self.invalid_value("links", type_str, str(e).capitalize())
+
+    def link_redirect_delay(self) -> int:
+        return self.s.getint("link_redirect_delay", fallback=-1)
+
+    def videos(self) -> bool:
+        return self.s.getboolean("videos", fallback=False)
+
+    def forums(self) -> bool:
+        return self.s.getboolean("forums", fallback=False)
+
+
+_DIRECTORY_PAGES: Set[IliasElementType] = {
+    IliasElementType.EXERCISE,
+    IliasElementType.EXERCISE_FILES,
+    IliasElementType.FOLDER,
+    IliasElementType.INFO_TAB,
+    IliasElementType.MEETING,
+    IliasElementType.MEDIACAST_VIDEO_FOLDER,
+    IliasElementType.OPENCAST_VIDEO_FOLDER,
+    IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
+}
+
+_VIDEO_ELEMENTS: Set[IliasElementType] = {
+    IliasElementType.MEDIACAST_VIDEO_FOLDER,
+    IliasElementType.MEDIACAST_VIDEO,
+    IliasElementType.OPENCAST_VIDEO,
+    IliasElementType.OPENCAST_VIDEO_PLAYER,
+    IliasElementType.OPENCAST_VIDEO_FOLDER,
+    IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
+}
+
+
+def _get_video_cache_key(element: IliasPageElement) -> str:
+    return f"ilias-video-cache-{element.id()}"
+
+
+# Crawler control flow:
+#
+#     crawl_desktop -+
+#                    |
+#     crawl_course --+
+#                    |
+#     @_io_repeat    |        # retries internally (before the bar)
+#  +- crawl_url    <-+
+#  |
+#  |
+#  |  @_wrap_io_exception     # does not need to retry as children acquire bars
+#  +> crawl_ilias_element -+
+#  ^                       |
+#  |  @_io_repeat          |  # retries internally (before the bar)
+#  +- crawl_ilias_page <---+
+#  |                       |
+#  +> get_page             |  # Handles and retries authentication
+#                          |
+#     @_io_repeat          |  # retries internally (before the bar)
+#  +- download_link    <---+
+#  |                       |
+#  +> resolve_target       |  # Handles and retries authentication
+#                          |
+#     @_io_repeat          |  # retries internally (before the bar)
+#  +- download_video   <---+
+#  |                       |
+#  |  @_io_repeat          |  # retries internally (before the bar)
+#  +- download_file    <---+
+#  |
+#  +> stream_from_url         # Handles and retries authentication
+class IliasWebCrawler(HttpCrawler):
+    def __init__(
+        self,
+        name: str,
+        section: IliasWebCrawlerSection,
+        config: Config,
+        authenticators: Dict[str, Authenticator]
+    ):
+        # Setting a main authenticator for cookie sharing
+        auth = section.auth(authenticators)
+        super().__init__(name, section, config, shared_auth=auth)
+
+        if section.tasks() > 1:
+            log.warn("""
+Please avoid using too many parallel requests as these are the KIT ILIAS
+instance's greatest bottleneck.
+            """.strip())
+
+        self._auth = auth
+        self._base_url = _ILIAS_URL
+
+        self._target = section.target()
+        self._link_file_redirect_delay = section.link_redirect_delay()
+        self._links = section.links()
+        self._videos = section.videos()
+        self._forums = section.forums()
+        self._visited_urls: Dict[str, PurePath] = dict()
+
+    async def _run(self) -> None:
+        if isinstance(self._target, int):
+            log.explain_topic(f"Inferred crawl target: Course with id {self._target}")
+            await self._crawl_course(self._target)
+        elif self._target == "desktop":
+            log.explain_topic("Inferred crawl target: Personal desktop")
+            await self._crawl_desktop()
+        else:
+            log.explain_topic(f"Inferred crawl target: URL {self._target}")
+            await self._crawl_url(self._target)
+
+    async def _crawl_course(self, course_id: int) -> None:
+        # Start crawling at the given course
+        root_url = url_set_query_param(
+            self._base_url + "/goto.php", "target", f"crs_{course_id}"
+        )
+
+        await self._crawl_url(root_url, expected_id=course_id)
+
+    async def _crawl_desktop(self) -> None:
+        appendix = r"ILIAS\Repository\Provider\RepositoryMainBarProvider|mm_pd_sel_items"
+        appendix = appendix.encode("ASCII").hex()
+        await self._crawl_url(self._base_url + "/gs_content.php?item=" + appendix)
+
+    async def _crawl_url(self, url: str, expected_id: Optional[int] = None) -> None:
+        maybe_cl = await self.crawl(PurePath("."))
+        if not maybe_cl:
+            return
+        cl = maybe_cl  # Not mypy's fault, but explained here: https://github.com/python/mypy/issues/2608
+
+        elements: List[IliasPageElement] = []
+        # A list as variable redefinitions are not propagated to outer scopes
+        description: List[BeautifulSoup] = []
+
+        @_iorepeat(3, "crawling url")
+        async def gather_elements() -> None:
+            elements.clear()
+            async with cl:
+                next_stage_url: Optional[str] = url
+                current_parent = None
+
+                # Duplicated code, but the root page is special - we want to avoid fetching it twice!
+                while next_stage_url:
+                    soup = await self._get_page(next_stage_url, root_page_allowed=True)
+
+                    if current_parent is None and expected_id is not None:
+                        perma_link = IliasPage.get_soup_permalink(soup)
+                        if not perma_link or "crs_" not in perma_link:
+                            raise CrawlError("Invalid course id? Didn't find anything looking like a course")
+
+                    log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
+                    log.explain(f"URL: {next_stage_url}")
+                    page = IliasPage(soup, next_stage_url, current_parent)
+                    if next_element := page.get_next_stage_element():
+                        current_parent = next_element
+                        next_stage_url = next_element.url
+                    else:
+                        next_stage_url = None
+
+                elements.extend(page.get_child_elements())
+                if info_tab := page.get_info_tab():
+                    elements.append(info_tab)
+                if description_string := page.get_description():
+                    description.append(description_string)
+
+        # Fill up our task list with the found elements
+        await gather_elements()
+
+        if description:
+            await self._download_description(PurePath("."), description[0])
+
+        elements.sort(key=lambda e: e.id())
+
+        tasks: List[Awaitable[None]] = []
+        for element in elements:
+            if handle := await self._handle_ilias_element(PurePath("."), element):
+                tasks.append(asyncio.create_task(handle))
+
+        # And execute them
+        await self.gather(tasks)
+
+    async def _handle_ilias_page(
+        self,
+        url: str,
+        parent: IliasPageElement,
+        path: PurePath,
+    ) -> Optional[Coroutine[Any, Any, None]]:
+        maybe_cl = await self.crawl(path)
+        if not maybe_cl:
+            return None
+        return self._crawl_ilias_page(url, parent, maybe_cl)
+
+    @anoncritical
+    async def _crawl_ilias_page(
+        self,
+        url: str,
+        parent: IliasPageElement,
+        cl: CrawlToken,
+    ) -> None:
+        elements: List[IliasPageElement] = []
+        # A list as variable redefinitions are not propagated to outer scopes
+        description: List[BeautifulSoup] = []
+
+        @_iorepeat(3, "crawling folder")
+        async def gather_elements() -> None:
+            elements.clear()
+            async with cl:
+                next_stage_url: Optional[str] = url
+                current_parent = parent
+
+                while next_stage_url:
+                    soup = await self._get_page(next_stage_url)
+                    log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
+                    log.explain(f"URL: {next_stage_url}")
+                    page = IliasPage(soup, next_stage_url, current_parent)
+                    if next_element := page.get_next_stage_element():
+                        current_parent = next_element
+                        next_stage_url = next_element.url
+                    else:
+                        next_stage_url = None
+
+                elements.extend(page.get_child_elements())
+                if description_string := page.get_description():
+                    description.append(description_string)
+
+        # Fill up our task list with the found elements
+        await gather_elements()
+
+        if description:
+            await self._download_description(cl.path, description[0])
+
+        elements.sort(key=lambda e: e.id())
+
+        tasks: List[Awaitable[None]] = []
+        for element in elements:
+            if handle := await self._handle_ilias_element(cl.path, element):
+                tasks.append(asyncio.create_task(handle))
+
+        # And execute them
+        await self.gather(tasks)
+
+    # These decorators only apply *to this method* and *NOT* to the returned
+    # awaitables!
+    # This method does not await the handlers but returns them instead.
+    # This ensures one level is handled at a time and name deduplication
+    # works correctly.
+    @anoncritical
+    async def _handle_ilias_element(
+        self,
+        parent_path: PurePath,
+        element: IliasPageElement,
+    ) -> Optional[Coroutine[Any, Any, None]]:
+        if element.url in self._visited_urls:
+            raise CrawlWarning(
+                f"Found second path to element {element.name!r} at {element.url!r}. "
+                + f"First path: {fmt_path(self._visited_urls[element.url])}. "
+                + f"Second path: {fmt_path(parent_path)}."
+            )
+        self._visited_urls[element.url] = parent_path
+
+        element_path = PurePath(parent_path, element.name)
+
+        if element.type in _VIDEO_ELEMENTS:
+            if not self._videos:
+                log.status(
+                    "[bold bright_black]",
+                    "Ignored",
+                    fmt_path(element_path),
+                    "[bright_black](enable with option 'videos')"
+                )
+                return None
+
+        if element.type == IliasElementType.FILE:
+            return await self._handle_file(element, element_path)
+        elif element.type == IliasElementType.FORUM:
+            if not self._forums:
+                log.status(
+                    "[bold bright_black]",
+                    "Ignored",
+                    fmt_path(element_path),
+                    "[bright_black](enable with option 'forums')"
+                )
+                return None
+            return await self._handle_forum(element, element_path)
+        elif element.type == IliasElementType.TEST:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](tests contain no relevant data)"
+            )
+            return None
+        elif element.type == IliasElementType.SURVEY:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](surveys contain no relevant data)"
+            )
+            return None
+        elif element.type == IliasElementType.SCORM_LEARNING_MODULE:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](scorm learning modules are not supported)"
+            )
+            return None
+        elif element.type == IliasElementType.LEARNING_MODULE:
+            return await self._handle_learning_module(element, element_path)
+        elif element.type == IliasElementType.LINK:
+            return await self._handle_link(element, element_path)
+        elif element.type == IliasElementType.BOOKING:
+            return await self._handle_booking(element, element_path)
+        elif element.type == IliasElementType.OPENCAST_VIDEO:
+            return await self._handle_file(element, element_path)
+        elif element.type == IliasElementType.OPENCAST_VIDEO_PLAYER:
+            return await self._handle_opencast_video(element, element_path)
+        elif element.type == IliasElementType.MEDIACAST_VIDEO:
+            return await self._handle_file(element, element_path)
+        elif element.type in _DIRECTORY_PAGES:
+            return await self._handle_ilias_page(element.url, element, element_path)
+        else:
+            # This will retry it a few times, failing everytime. It doesn't make any network
+            # requests, so that's fine.
+            raise CrawlWarning(f"Unknown element type: {element.type!r}")
+
+    async def _handle_link(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Coroutine[Any, Any, None]]:
+        log.explain_topic(f"Decision: Crawl Link {fmt_path(element_path)}")
+        log.explain(f"Links type is {self._links}")
+
+        link_template_maybe = self._links.template()
+        link_extension = self._links.extension()
+        if not link_template_maybe or not link_extension:
+            log.explain("Answer: No")
+            return None
+        else:
+            log.explain("Answer: Yes")
+        element_path = element_path.with_name(element_path.name + link_extension)
+
+        maybe_dl = await self.download(element_path, mtime=element.mtime)
+        if not maybe_dl:
+            return None
+
+        return self._download_link(element, link_template_maybe, maybe_dl)
+
+    @anoncritical
+    @_iorepeat(3, "resolving link")
+    async def _download_link(self, element: IliasPageElement, link_template: str, dl: DownloadToken) -> None:
+        async with dl as (bar, sink):
+            export_url = element.url.replace("cmd=calldirectlink", "cmd=exportHTML")
+            real_url = await self._resolve_link_target(export_url)
+            self._write_link_content(link_template, real_url, element.name, element.description, sink)
+
+    def _write_link_content(
+        self,
+        link_template: str,
+        url: str,
+        name: str,
+        description: Optional[str],
+        sink: FileSink,
+    ) -> None:
+        content = link_template
+        content = content.replace("{{link}}", url)
+        content = content.replace("{{name}}", name)
+        content = content.replace("{{description}}", str(description))
+        content = content.replace("{{redirect_delay}}", str(self._link_file_redirect_delay))
+        sink.file.write(content.encode("utf-8"))
+        sink.done()
+
+    async def _handle_booking(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Coroutine[Any, Any, None]]:
+        log.explain_topic(f"Decision: Crawl Booking Link {fmt_path(element_path)}")
+        log.explain(f"Links type is {self._links}")
+
+        link_template_maybe = self._links.template()
+        link_extension = self._links.extension()
+        if not link_template_maybe or not link_extension:
+            log.explain("Answer: No")
+            return None
+        else:
+            log.explain("Answer: Yes")
+        element_path = element_path.with_name(element_path.name + link_extension)
+
+        maybe_dl = await self.download(element_path, mtime=element.mtime)
+        if not maybe_dl:
+            return None
+
+        return self._download_booking(element, link_template_maybe, maybe_dl)
+
+    @anoncritical
+    @_iorepeat(1, "downloading description")
+    async def _download_description(self, parent_path: PurePath, description: BeautifulSoup) -> None:
+        path = parent_path / "Description.html"
+        dl = await self.download(path, redownload=Redownload.ALWAYS)
+        if not dl:
+            return
+
+        async with dl as (bar, sink):
+            description = clean(insert_base_markup(description))
+            sink.file.write(description.prettify().encode("utf-8"))
+            sink.done()
+
+    @anoncritical
+    @_iorepeat(3, "resolving booking")
+    async def _download_booking(
+        self,
+        element: IliasPageElement,
+        link_template: str,
+        dl: DownloadToken,
+    ) -> None:
+        async with dl as (bar, sink):
+            self._write_link_content(link_template, element.url, element.name, element.description, sink)
+
+    async def _resolve_link_target(self, export_url: str) -> str:
+        async with self.session.get(export_url, allow_redirects=False) as resp:
+            # No redirect means we were authenticated
+            if hdrs.LOCATION not in resp.headers:
+                return soupify(await resp.read()).select_one("a").get("href").strip()
+
+        await self._authenticate()
+
+        async with self.session.get(export_url, allow_redirects=False) as resp:
+            # No redirect means we were authenticated
+            if hdrs.LOCATION not in resp.headers:
+                return soupify(await resp.read()).select_one("a").get("href").strip()
+
+        raise CrawlError("resolve_link_target failed even after authenticating")
+
+    async def _handle_opencast_video(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Coroutine[Any, Any, None]]:
+        # Copy old mapping as it is likely still relevant
+        if self.prev_report:
+            self.report.add_custom_value(
+                _get_video_cache_key(element),
+                self.prev_report.get_custom_value(_get_video_cache_key(element))
+            )
+
+        # A video might contain other videos, so let's "crawl" the video first
+        # to ensure rate limits apply. This must be a download as *this token*
+        # is re-used if the video consists of a single stream. In that case the
+        # file name is used and *not* the stream name the ilias html parser reported
+        # to ensure backwards compatibility.
+        maybe_dl = await self.download(element_path, mtime=element.mtime, redownload=Redownload.ALWAYS)
+
+        # If we do not want to crawl it (user filter), we can move on
+        if not maybe_dl:
+            return None
+
+        # If we have every file from the cached mapping already, we can ignore this and bail
+        if self._all_opencast_videos_locally_present(element, maybe_dl.path):
+            # Mark all existing videos as known to ensure they do not get deleted during cleanup.
+            # We "downloaded" them, just without actually making a network request as we assumed
+            # they did not change.
+            contained = self._previous_contained_opencast_videos(element, maybe_dl.path)
+            if len(contained) > 1:
+                # Only do this if we threw away the original dl token,
+                # to not download single-stream videos twice
+                for video in contained:
+                    await self.download(video)
+
+            return None
+
+        return self._download_opencast_video(element, maybe_dl)
+
+    def _previous_contained_opencast_videos(
+        self, element: IliasPageElement, element_path: PurePath
+    ) -> List[PurePath]:
+        if not self.prev_report:
+            return []
+        custom_value = self.prev_report.get_custom_value(_get_video_cache_key(element))
+        if not custom_value:
+            return []
+        cached_value = cast(dict[str, Any], custom_value)
+        if "known_paths" not in cached_value or "own_path" not in cached_value:
+            log.explain(f"'known_paths' or 'own_path' missing from cached value: {cached_value}")
+            return []
+        transformed_own_path = self._transformer.transform(element_path)
+        if cached_value["own_path"] != str(transformed_own_path):
+            log.explain(
+                f"own_path '{transformed_own_path}' does not match cached value: '{cached_value['own_path']}"
+            )
+            return []
+        return [PurePath(name) for name in cached_value["known_paths"]]
+
+    def _all_opencast_videos_locally_present(self, element: IliasPageElement, element_path: PurePath) -> bool:
+        log.explain_topic(f"Checking local cache for video {fmt_path(element_path)}")
+        if contained_videos := self._previous_contained_opencast_videos(element, element_path):
+            log.explain(
+                f"The following contained videos are known: {','.join(map(fmt_path, contained_videos))}"
+            )
+            if all(self._output_dir.resolve(path).exists() for path in contained_videos):
+                log.explain("Found all known videos locally, skipping enumeration request")
+                return True
+            log.explain("Missing at least one video, continuing with requests!")
+        else:
+            log.explain("No local cache present")
+        return False
+
+    @anoncritical
+    @_iorepeat(3, "downloading video")
+    async def _download_opencast_video(self, element: IliasPageElement, dl: DownloadToken) -> None:
+        def add_to_report(paths: list[str]) -> None:
+            self.report.add_custom_value(
+                _get_video_cache_key(element),
+                {"known_paths": paths, "own_path": str(self._transformer.transform(dl.path))}
+            )
+
+        async with dl as (bar, sink):
+            page = IliasPage(await self._get_page(element.url), element.url, element)
+            stream_elements = page.get_child_elements()
+
+            if len(stream_elements) > 1:
+                log.explain(f"Found multiple video streams for {element.name}")
+            else:
+                log.explain(f"Using single video mode for {element.name}")
+                stream_element = stream_elements[0]
+
+                # We do not have a local cache yet
+                await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
+                add_to_report([str(self._transformer.transform(dl.path))])
+                return
+
+        contained_video_paths: List[str] = []
+
+        for stream_element in stream_elements:
+            video_path = dl.path.parent / stream_element.name
+
+            maybe_dl = await self.download(video_path, mtime=element.mtime, redownload=Redownload.NEVER)
+            if not maybe_dl:
+                continue
+            async with maybe_dl as (bar, sink):
+                log.explain(f"Streaming video from real url {stream_element.url}")
+                contained_video_paths.append(str(self._transformer.transform(maybe_dl.path)))
+                await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
+
+        add_to_report(contained_video_paths)
+
+    async def _handle_file(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Coroutine[Any, Any, None]]:
+        maybe_dl = await self.download(element_path, mtime=element.mtime)
+        if not maybe_dl:
+            return None
+        return self._download_file(element, maybe_dl)
+
+    @_iorepeat(3, "downloading file")
+    @anoncritical
+    async def _download_file(self, element: IliasPageElement, dl: DownloadToken) -> None:
+        assert dl  # The function is only reached when dl is not None
+        async with dl as (bar, sink):
+            await self._stream_from_url(element.url, sink, bar, is_video=False)
+
+    async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar, is_video: bool) -> None:
+        async def try_stream() -> bool:
+            next_url = url
+
+            # Normal files redirect to the magazine if we are not authenticated. As files could be HTML,
+            # we can not match on the content type here. Instead, we disallow redirects and inspect the
+            # new location. If we are redirected anywhere but the ILIAS 8 "sendfile" command, we assume
+            # our authentication expired.
+            if not is_video:
+                async with self.session.get(url, allow_redirects=False) as resp:
+                    # Redirect to anything except a "sendfile" means we weren't authenticated
+                    if hdrs.LOCATION in resp.headers:
+                        if "&cmd=sendfile" not in resp.headers[hdrs.LOCATION]:
+                            return False
+                        # Directly follow the redirect to not make a second, unnecessary request
+                        next_url = resp.headers[hdrs.LOCATION]
+
+            # Let's try this again and follow redirects
+            return await fetch_follow_redirects(next_url)
+
+        async def fetch_follow_redirects(file_url: str) -> bool:
+            async with self.session.get(file_url) as resp:
+                # We wanted a video but got HTML => Forbidden, auth expired. Logging in won't really
+                # solve that depending on the setup, but it is better than nothing.
+                if is_video and "html" in resp.content_type:
+                    return False
+
+                if resp.content_length:
+                    bar.set_total(resp.content_length)
+
+                async for data in resp.content.iter_chunked(1024):
+                    sink.file.write(data)
+                    bar.advance(len(data))
+
+                sink.done()
+            return True
+
+        auth_id = await self._current_auth_id()
+        if await try_stream():
+            return
+
+        await self.authenticate(auth_id)
+
+        if not await try_stream():
+            raise CrawlError("File streaming failed after authenticate()")
+
+    async def _handle_forum(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Coroutine[Any, Any, None]]:
+        maybe_cl = await self.crawl(element_path)
+        if not maybe_cl:
+            return None
+        return self._crawl_forum(element, maybe_cl)
+
+    @_iorepeat(3, "crawling forum")
+    @anoncritical
+    async def _crawl_forum(self, element: IliasPageElement, cl: CrawlToken) -> None:
+        elements: List[IliasForumThread] = []
+
+        async with cl:
+            next_stage_url = element.url
+            while next_stage_url:
+                log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
+                log.explain(f"URL: {next_stage_url}")
+
+                soup = await self._get_page(next_stage_url)
+                page = IliasPage(soup, next_stage_url, element)
+
+                if next := page.get_next_stage_element():
+                    next_stage_url = next.url
+                else:
+                    break
+
+            download_data = page.get_download_forum_data()
+            if not download_data:
+                raise CrawlWarning("Failed to extract forum data")
+            if download_data.empty:
+                log.explain("Forum had no threads")
+                return
+            html = await self._post_authenticated(download_data.url, download_data.form_data)
+            elements = parse_ilias_forum_export(soupify(html))
+
+        elements.sort(key=lambda elem: elem.title)
+
+        tasks: List[Awaitable[None]] = []
+        for elem in elements:
+            tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, elem)))
+
+        # And execute them
+        await self.gather(tasks)
+
+    @anoncritical
+    @_iorepeat(3, "saving forum thread")
+    async def _download_forum_thread(
+        self,
+        parent_path: PurePath,
+        element: IliasForumThread,
+    ) -> None:
+        path = parent_path / (_sanitize_path_name(element.title) + ".html")
+        maybe_dl = await self.download(path, mtime=element.mtime)
+        if not maybe_dl:
+            return
+
+        async with maybe_dl as (bar, sink):
+            content = element.title_tag.prettify()
+            content += element.content_tag.prettify()
+            sink.file.write(content.encode("utf-8"))
+            sink.done()
+
+    async def _handle_learning_module(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Coroutine[Any, Any, None]]:
+        maybe_cl = await self.crawl(element_path)
+        if not maybe_cl:
+            return None
+        return self._crawl_learning_module(element, maybe_cl)
+
+    @_iorepeat(3, "crawling learning module")
+    @anoncritical
+    async def _crawl_learning_module(self, element: IliasPageElement, cl: CrawlToken) -> None:
+        elements: List[IliasLearningModulePage] = []
+
+        async with cl:
+            log.explain_topic(f"Parsing initial HTML page for {fmt_path(cl.path)}")
+            log.explain(f"URL: {element.url}")
+            soup = await self._get_page(element.url)
+            page = IliasPage(soup, element.url, element)
+            if next := page.get_learning_module_data():
+                elements.extend(await self._crawl_learning_module_direction(
+                    cl.path, next.previous_url, "left", element
+                ))
+                elements.append(next)
+                elements.extend(await self._crawl_learning_module_direction(
+                    cl.path, next.next_url, "right", element
+                ))
+
+        # Reflect their natural ordering in the file names
+        for index, lm_element in enumerate(elements):
+            lm_element.title = f"{index:02}_{lm_element.title}"
+
+        tasks: List[Awaitable[None]] = []
+        for index, elem in enumerate(elements):
+            prev_url = elements[index - 1].title if index > 0 else None
+            next_url = elements[index + 1].title if index < len(elements) - 1 else None
+            tasks.append(asyncio.create_task(
+                self._download_learning_module_page(cl.path, elem, prev_url, next_url)
+            ))
+
+        # And execute them
+        await self.gather(tasks)
+
+    async def _crawl_learning_module_direction(
+        self,
+        path: PurePath,
+        start_url: Optional[str],
+        dir: Union[Literal["left"], Literal["right"]],
+        parent_element: IliasPageElement
+    ) -> List[IliasLearningModulePage]:
+        elements: List[IliasLearningModulePage] = []
+
+        if not start_url:
+            return elements
+
+        next_element_url: Optional[str] = start_url
+        counter = 0
+        while next_element_url:
+            log.explain_topic(f"Parsing HTML page for {fmt_path(path)} ({dir}-{counter})")
+            log.explain(f"URL: {next_element_url}")
+            soup = await self._get_page(next_element_url)
+            page = IliasPage(soup, next_element_url, parent_element)
+            if next := page.get_learning_module_data():
+                elements.append(next)
+                if dir == "left":
+                    next_element_url = next.previous_url
+                else:
+                    next_element_url = next.next_url
+            counter += 1
+
+        return elements
+
+    @anoncritical
+    @_iorepeat(3, "saving learning module page")
+    async def _download_learning_module_page(
+        self,
+        parent_path: PurePath,
+        element: IliasLearningModulePage,
+        prev: Optional[str],
+        next: Optional[str]
+    ) -> None:
+        path = parent_path / (_sanitize_path_name(element.title) + ".html")
+        maybe_dl = await self.download(path)
+        if not maybe_dl:
+            return
+        my_path = self._transformer.transform(maybe_dl.path)
+        if not my_path:
+            return
+
+        if prev:
+            prev_p = self._transformer.transform(parent_path / (_sanitize_path_name(prev) + ".html"))
+            if prev_p:
+                prev = os.path.relpath(prev_p, my_path.parent)
+            else:
+                prev = None
+        if next:
+            next_p = self._transformer.transform(parent_path / (_sanitize_path_name(next) + ".html"))
+            if next_p:
+                next = os.path.relpath(next_p, my_path.parent)
+            else:
+                next = None
+
+        async with maybe_dl as (bar, sink):
+            content = element.content
+            content = await self.internalize_images(content)
+            sink.file.write(learning_module_template(content, maybe_dl.path.name, prev, next).encode("utf-8"))
+            sink.done()
+
+    async def internalize_images(self, tag: Tag) -> Tag:
+        """
+        Tries to fetch ILIAS images and embed them as base64 data.
+        """
+        log.explain_topic("Internalizing images")
+        for elem in tag.find_all(recursive=True):
+            if not isinstance(elem, Tag):
+                continue
+            if elem.name == "img":
+                if src := elem.attrs.get("src", None):
+                    url = urljoin(_ILIAS_URL, src)
+                    if not url.startswith(_ILIAS_URL):
+                        continue
+                    log.explain(f"Internalizing {url!r}")
+                    img = await self._get_authenticated(url)
+                    elem.attrs["src"] = "data:;base64," + base64.b64encode(img).decode()
+            if elem.name == "iframe" and elem.attrs.get("src", "").startswith("//"):
+                # For unknown reasons the protocol seems to be stripped.
+                elem.attrs["src"] = "https:" + elem.attrs["src"]
+        return tag
+
+    async def _get_page(self, url: str, root_page_allowed: bool = False) -> BeautifulSoup:
+        auth_id = await self._current_auth_id()
+        async with self.session.get(url) as request:
+            soup = soupify(await request.read())
+            if IliasPage.is_logged_in(soup):
+                return self._verify_page(soup, url, root_page_allowed)
+
+        # We weren't authenticated, so try to do that
+        await self.authenticate(auth_id)
+
+        # Retry once after authenticating. If this fails, we will die.
+        async with self.session.get(url) as request:
+            soup = soupify(await request.read())
+            if IliasPage.is_logged_in(soup):
+                return self._verify_page(soup, url, root_page_allowed)
+        raise CrawlError(f"get_page failed even after authenticating on {url!r}")
+
+    @staticmethod
+    def _verify_page(soup: BeautifulSoup, url: str, root_page_allowed: bool) -> BeautifulSoup:
+        if IliasPage.is_root_page(soup) and not root_page_allowed:
+            raise CrawlError(
+                "Unexpectedly encountered ILIAS root page. "
+                "This usually happens because the ILIAS instance is broken. "
+                "If so, wait a day or two and try again. "
+                "It could also happen because a crawled element links to the ILIAS root page. "
+                "If so, use a transform with a ! as target to ignore the particular element. "
+                f"The redirect came from {url}"
+            )
+        return soup
+
+    async def _post_authenticated(
+        self,
+        url: str,
+        data: dict[str, Union[str, List[str]]]
+    ) -> bytes:
+        auth_id = await self._current_auth_id()
+
+        form_data = aiohttp.FormData()
+        for key, val in data.items():
+            form_data.add_field(key, val)
+
+        async with self.session.post(url, data=form_data(), allow_redirects=False) as request:
+            if request.status == 200:
+                return await request.read()
+
+        # We weren't authenticated, so try to do that
+        await self.authenticate(auth_id)
+
+        # Retry once after authenticating. If this fails, we will die.
+        async with self.session.post(url, data=data, allow_redirects=False) as request:
+            if request.status == 200:
+                return await request.read()
+        raise CrawlError("post_authenticated failed even after authenticating")
+
+    async def _get_authenticated(self, url: str) -> bytes:
+        auth_id = await self._current_auth_id()
+
+        async with self.session.get(url, allow_redirects=False) as request:
+            if request.status == 200:
+                return await request.read()
+
+        # We weren't authenticated, so try to do that
+        await self.authenticate(auth_id)
+
+        # Retry once after authenticating. If this fails, we will die.
+        async with self.session.get(url, allow_redirects=False) as request:
+            if request.status == 200:
+                return await request.read()
+        raise CrawlError("get_authenticated failed even after authenticating")
+
+    # ToDo: Is this still required?
+    @_iorepeat(3, "Login", failure_is_error=True)
+    async def _authenticate(self) -> None:
+        pass
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 371ffb3..e9d1475 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -1,28 +1,15 @@
-import asyncio
-import base64
-import os
-import re
-from collections.abc import Awaitable, Coroutine
-from pathlib import PurePath
-from typing import Any, Callable, Dict, List, Literal, Optional, Set, Union, cast
-from urllib.parse import urljoin
+from typing import Any, Dict, Optional, Union
 
 import aiohttp
 import yarl
-from aiohttp import hdrs
-from bs4 import BeautifulSoup, Tag
+from bs4 import BeautifulSoup
 
 from ...auth import Authenticator, TfaAuthenticator
 from ...config import Config
-from ...logging import ProgressBar, log
-from ...output_dir import FileSink, Redownload
-from ...utils import fmt_path, soupify, url_set_query_param
-from ..crawler import AWrapped, CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
-from ..http_crawler import HttpCrawler, HttpCrawlerSection
-from .file_templates import Links, learning_module_template
-from .ilias_html_cleaner import clean, insert_base_markup
-from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
-                             IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
+from ...logging import log
+from ...utils import _iorepeat, soupify
+from ..crawler import CrawlError, CrawlWarning
+from .ilias_web_crawler import IliasWebCrawler, IliasWebCrawlerSection
 
 TargetType = Union[str, int]
 
@@ -33,24 +20,7 @@ class KitShibbolethBackgroundLoginSuccessful():
     pass
 
 
-class KitIliasWebCrawlerSection(HttpCrawlerSection):
-    def target(self) -> TargetType:
-        target = self.s.get("target")
-        if not target:
-            self.missing_value("target")
-
-        if re.fullmatch(r"\d+", target):
-            # Course id
-            return int(target)
-        if target == "desktop":
-            # Full personal desktop
-            return target
-        if target.startswith(_ILIAS_URL):
-            # ILIAS URL
-            return target
-
-        self.invalid_value("target", target, "Should be <course id | desktop | kit ilias URL>")
-
+class KitIliasWebCrawlerSection(IliasWebCrawlerSection):
     def tfa_auth(self, authenticators: Dict[str, Authenticator]) -> Optional[Authenticator]:
         value: Optional[str] = self.s.get("tfa_auth")
         if value is None:
@@ -60,123 +30,8 @@ class KitIliasWebCrawlerSection(HttpCrawlerSection):
             self.invalid_value("tfa_auth", value, "No such auth section exists")
         return auth
 
-    def links(self) -> Links:
-        type_str: Optional[str] = self.s.get("links")
 
-        if type_str is None:
-            return Links.FANCY
-
-        try:
-            return Links.from_string(type_str)
-        except ValueError as e:
-            self.invalid_value("links", type_str, str(e).capitalize())
-
-    def link_redirect_delay(self) -> int:
-        return self.s.getint("link_redirect_delay", fallback=-1)
-
-    def videos(self) -> bool:
-        return self.s.getboolean("videos", fallback=False)
-
-    def forums(self) -> bool:
-        return self.s.getboolean("forums", fallback=False)
-
-
-_DIRECTORY_PAGES: Set[IliasElementType] = {
-    IliasElementType.EXERCISE,
-    IliasElementType.EXERCISE_FILES,
-    IliasElementType.FOLDER,
-    IliasElementType.INFO_TAB,
-    IliasElementType.MEETING,
-    IliasElementType.MEDIACAST_VIDEO_FOLDER,
-    IliasElementType.OPENCAST_VIDEO_FOLDER,
-    IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
-}
-
-_VIDEO_ELEMENTS: Set[IliasElementType] = {
-    IliasElementType.MEDIACAST_VIDEO_FOLDER,
-    IliasElementType.MEDIACAST_VIDEO,
-    IliasElementType.OPENCAST_VIDEO,
-    IliasElementType.OPENCAST_VIDEO_PLAYER,
-    IliasElementType.OPENCAST_VIDEO_FOLDER,
-    IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
-}
-
-
-def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Callable[[AWrapped], AWrapped]:
-    def decorator(f: AWrapped) -> AWrapped:
-        async def wrapper(*args: Any, **kwargs: Any) -> Optional[Any]:
-            last_exception: Optional[BaseException] = None
-            for round in range(attempts):
-                try:
-                    return await f(*args, **kwargs)
-                except aiohttp.ContentTypeError:  # invalid content type
-                    raise CrawlWarning("ILIAS returned an invalid content type")
-                except aiohttp.TooManyRedirects:
-                    raise CrawlWarning("Got stuck in a redirect loop")
-                except aiohttp.ClientPayloadError as e:  # encoding or not enough bytes
-                    last_exception = e
-                except aiohttp.ClientConnectionError as e:  # e.g. timeout, disconnect, resolve failed, etc.
-                    last_exception = e
-                except asyncio.exceptions.TimeoutError as e:  # explicit http timeouts in HttpCrawler
-                    last_exception = e
-                log.explain_topic(f"Retrying operation {name}. Retries left: {attempts - 1 - round}")
-
-            if last_exception:
-                message = f"Error in I/O Operation: {last_exception}"
-                if failure_is_error:
-                    raise CrawlError(message) from last_exception
-                else:
-                    raise CrawlWarning(message) from last_exception
-            raise CrawlError("Impossible return in ilias _iorepeat")
-
-        return wrapper  # type: ignore
-
-    return decorator
-
-
-def _wrap_io_in_warning(name: str) -> Callable[[AWrapped], AWrapped]:
-    """
-    Wraps any I/O exception in a CrawlWarning.
-    """
-    return _iorepeat(1, name)
-
-
-def _get_video_cache_key(element: IliasPageElement) -> str:
-    return f"ilias-video-cache-{element.id()}"
-
-
-# Crawler control flow:
-#
-#     crawl_desktop -+
-#                    |
-#     crawl_course --+
-#                    |
-#     @_io_repeat    |        # retries internally (before the bar)
-#  +- crawl_url    <-+
-#  |
-#  |
-#  |  @_wrap_io_exception     # does not need to retry as children acquire bars
-#  +> crawl_ilias_element -+
-#  ^                       |
-#  |  @_io_repeat          |  # retries internally (before the bar)
-#  +- crawl_ilias_page <---+
-#  |                       |
-#  +> get_page             |  # Handles and retries authentication
-#                          |
-#     @_io_repeat          |  # retries internally (before the bar)
-#  +- download_link    <---+
-#  |                       |
-#  +> resolve_target       |  # Handles and retries authentication
-#                          |
-#     @_io_repeat          |  # retries internally (before the bar)
-#  +- download_video   <---+
-#  |                       |
-#  |  @_io_repeat          |  # retries internally (before the bar)
-#  +- download_file    <---+
-#  |
-#  +> stream_from_url         # Handles and retries authentication
-
-class KitIliasWebCrawler(HttpCrawler):
+class KitIliasWebCrawler(IliasWebCrawler):
     def __init__(
         self,
         name: str,
@@ -184,806 +39,13 @@ class KitIliasWebCrawler(HttpCrawler):
         config: Config,
         authenticators: Dict[str, Authenticator]
     ):
-        # Setting a main authenticator for cookie sharing
-        auth = section.auth(authenticators)
-        super().__init__(name, section, config, shared_auth=auth)
-
-        if section.tasks() > 1:
-            log.warn("""
-Please avoid using too many parallel requests as these are the KIT ILIAS
-instance's greatest bottleneck.
-            """.strip())
+        super().__init__(name, section, config, authenticators)
 
         self._shibboleth_login = KitShibbolethLogin(
-            auth,
+            self._auth,
             section.tfa_auth(authenticators),
         )
 
-        self._base_url = _ILIAS_URL
-
-        self._target = section.target()
-        self._link_file_redirect_delay = section.link_redirect_delay()
-        self._links = section.links()
-        self._videos = section.videos()
-        self._forums = section.forums()
-        self._visited_urls: Dict[str, PurePath] = dict()
-
-    async def _run(self) -> None:
-        if isinstance(self._target, int):
-            log.explain_topic(f"Inferred crawl target: Course with id {self._target}")
-            await self._crawl_course(self._target)
-        elif self._target == "desktop":
-            log.explain_topic("Inferred crawl target: Personal desktop")
-            await self._crawl_desktop()
-        else:
-            log.explain_topic(f"Inferred crawl target: URL {self._target}")
-            await self._crawl_url(self._target)
-
-    async def _crawl_course(self, course_id: int) -> None:
-        # Start crawling at the given course
-        root_url = url_set_query_param(
-            self._base_url + "/goto.php", "target", f"crs_{course_id}"
-        )
-
-        await self._crawl_url(root_url, expected_id=course_id)
-
-    async def _crawl_desktop(self) -> None:
-        appendix = r"ILIAS\Repository\Provider\RepositoryMainBarProvider|mm_pd_sel_items"
-        appendix = appendix.encode("ASCII").hex()
-        await self._crawl_url(self._base_url + "/gs_content.php?item=" + appendix)
-
-    async def _crawl_url(self, url: str, expected_id: Optional[int] = None) -> None:
-        maybe_cl = await self.crawl(PurePath("."))
-        if not maybe_cl:
-            return
-        cl = maybe_cl  # Not mypy's fault, but explained here: https://github.com/python/mypy/issues/2608
-
-        elements: List[IliasPageElement] = []
-        # A list as variable redefinitions are not propagated to outer scopes
-        description: List[BeautifulSoup] = []
-
-        @_iorepeat(3, "crawling url")
-        async def gather_elements() -> None:
-            elements.clear()
-            async with cl:
-                next_stage_url: Optional[str] = url
-                current_parent = None
-
-                # Duplicated code, but the root page is special - we want to avoid fetching it twice!
-                while next_stage_url:
-                    soup = await self._get_page(next_stage_url, root_page_allowed=True)
-
-                    if current_parent is None and expected_id is not None:
-                        perma_link = IliasPage.get_soup_permalink(soup)
-                        if not perma_link or "crs_" not in perma_link:
-                            raise CrawlError("Invalid course id? Didn't find anything looking like a course")
-
-                    log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
-                    log.explain(f"URL: {next_stage_url}")
-                    page = IliasPage(soup, next_stage_url, current_parent)
-                    if next_element := page.get_next_stage_element():
-                        current_parent = next_element
-                        next_stage_url = next_element.url
-                    else:
-                        next_stage_url = None
-
-                elements.extend(page.get_child_elements())
-                if info_tab := page.get_info_tab():
-                    elements.append(info_tab)
-                if description_string := page.get_description():
-                    description.append(description_string)
-
-        # Fill up our task list with the found elements
-        await gather_elements()
-
-        if description:
-            await self._download_description(PurePath("."), description[0])
-
-        elements.sort(key=lambda e: e.id())
-
-        tasks: List[Awaitable[None]] = []
-        for element in elements:
-            if handle := await self._handle_ilias_element(PurePath("."), element):
-                tasks.append(asyncio.create_task(handle))
-
-        # And execute them
-        await self.gather(tasks)
-
-    async def _handle_ilias_page(
-        self,
-        url: str,
-        parent: IliasPageElement,
-        path: PurePath,
-    ) -> Optional[Coroutine[Any, Any, None]]:
-        maybe_cl = await self.crawl(path)
-        if not maybe_cl:
-            return None
-        return self._crawl_ilias_page(url, parent, maybe_cl)
-
-    @anoncritical
-    async def _crawl_ilias_page(
-        self,
-        url: str,
-        parent: IliasPageElement,
-        cl: CrawlToken,
-    ) -> None:
-        elements: List[IliasPageElement] = []
-        # A list as variable redefinitions are not propagated to outer scopes
-        description: List[BeautifulSoup] = []
-
-        @_iorepeat(3, "crawling folder")
-        async def gather_elements() -> None:
-            elements.clear()
-            async with cl:
-                next_stage_url: Optional[str] = url
-                current_parent = parent
-
-                while next_stage_url:
-                    soup = await self._get_page(next_stage_url)
-                    log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
-                    log.explain(f"URL: {next_stage_url}")
-                    page = IliasPage(soup, next_stage_url, current_parent)
-                    if next_element := page.get_next_stage_element():
-                        current_parent = next_element
-                        next_stage_url = next_element.url
-                    else:
-                        next_stage_url = None
-
-                elements.extend(page.get_child_elements())
-                if description_string := page.get_description():
-                    description.append(description_string)
-
-        # Fill up our task list with the found elements
-        await gather_elements()
-
-        if description:
-            await self._download_description(cl.path, description[0])
-
-        elements.sort(key=lambda e: e.id())
-
-        tasks: List[Awaitable[None]] = []
-        for element in elements:
-            if handle := await self._handle_ilias_element(cl.path, element):
-                tasks.append(asyncio.create_task(handle))
-
-        # And execute them
-        await self.gather(tasks)
-
-    # These decorators only apply *to this method* and *NOT* to the returned
-    # awaitables!
-    # This method does not await the handlers but returns them instead.
-    # This ensures one level is handled at a time and name deduplication
-    # works correctly.
-    @anoncritical
-    async def _handle_ilias_element(
-        self,
-        parent_path: PurePath,
-        element: IliasPageElement,
-    ) -> Optional[Coroutine[Any, Any, None]]:
-        if element.url in self._visited_urls:
-            raise CrawlWarning(
-                f"Found second path to element {element.name!r} at {element.url!r}. "
-                + f"First path: {fmt_path(self._visited_urls[element.url])}. "
-                + f"Second path: {fmt_path(parent_path)}."
-            )
-        self._visited_urls[element.url] = parent_path
-
-        element_path = PurePath(parent_path, element.name)
-
-        if element.type in _VIDEO_ELEMENTS:
-            if not self._videos:
-                log.status(
-                    "[bold bright_black]",
-                    "Ignored",
-                    fmt_path(element_path),
-                    "[bright_black](enable with option 'videos')"
-                )
-                return None
-
-        if element.type == IliasElementType.FILE:
-            return await self._handle_file(element, element_path)
-        elif element.type == IliasElementType.FORUM:
-            if not self._forums:
-                log.status(
-                    "[bold bright_black]",
-                    "Ignored",
-                    fmt_path(element_path),
-                    "[bright_black](enable with option 'forums')"
-                )
-                return None
-            return await self._handle_forum(element, element_path)
-        elif element.type == IliasElementType.TEST:
-            log.status(
-                "[bold bright_black]",
-                "Ignored",
-                fmt_path(element_path),
-                "[bright_black](tests contain no relevant data)"
-            )
-            return None
-        elif element.type == IliasElementType.SURVEY:
-            log.status(
-                "[bold bright_black]",
-                "Ignored",
-                fmt_path(element_path),
-                "[bright_black](surveys contain no relevant data)"
-            )
-            return None
-        elif element.type == IliasElementType.SCORM_LEARNING_MODULE:
-            log.status(
-                "[bold bright_black]",
-                "Ignored",
-                fmt_path(element_path),
-                "[bright_black](scorm learning modules are not supported)"
-            )
-            return None
-        elif element.type == IliasElementType.LEARNING_MODULE:
-            return await self._handle_learning_module(element, element_path)
-        elif element.type == IliasElementType.LINK:
-            return await self._handle_link(element, element_path)
-        elif element.type == IliasElementType.BOOKING:
-            return await self._handle_booking(element, element_path)
-        elif element.type == IliasElementType.OPENCAST_VIDEO:
-            return await self._handle_file(element, element_path)
-        elif element.type == IliasElementType.OPENCAST_VIDEO_PLAYER:
-            return await self._handle_opencast_video(element, element_path)
-        elif element.type == IliasElementType.MEDIACAST_VIDEO:
-            return await self._handle_file(element, element_path)
-        elif element.type in _DIRECTORY_PAGES:
-            return await self._handle_ilias_page(element.url, element, element_path)
-        else:
-            # This will retry it a few times, failing everytime. It doesn't make any network
-            # requests, so that's fine.
-            raise CrawlWarning(f"Unknown element type: {element.type!r}")
-
-    async def _handle_link(
-        self,
-        element: IliasPageElement,
-        element_path: PurePath,
-    ) -> Optional[Coroutine[Any, Any, None]]:
-        log.explain_topic(f"Decision: Crawl Link {fmt_path(element_path)}")
-        log.explain(f"Links type is {self._links}")
-
-        link_template_maybe = self._links.template()
-        link_extension = self._links.extension()
-        if not link_template_maybe or not link_extension:
-            log.explain("Answer: No")
-            return None
-        else:
-            log.explain("Answer: Yes")
-        element_path = element_path.with_name(element_path.name + link_extension)
-
-        maybe_dl = await self.download(element_path, mtime=element.mtime)
-        if not maybe_dl:
-            return None
-
-        return self._download_link(element, link_template_maybe, maybe_dl)
-
-    @anoncritical
-    @_iorepeat(3, "resolving link")
-    async def _download_link(self, element: IliasPageElement, link_template: str, dl: DownloadToken) -> None:
-        async with dl as (bar, sink):
-            export_url = element.url.replace("cmd=calldirectlink", "cmd=exportHTML")
-            real_url = await self._resolve_link_target(export_url)
-            self._write_link_content(link_template, real_url, element.name, element.description, sink)
-
-    def _write_link_content(
-        self,
-        link_template: str,
-        url: str,
-        name: str,
-        description: Optional[str],
-        sink: FileSink,
-    ) -> None:
-        content = link_template
-        content = content.replace("{{link}}", url)
-        content = content.replace("{{name}}", name)
-        content = content.replace("{{description}}", str(description))
-        content = content.replace("{{redirect_delay}}", str(self._link_file_redirect_delay))
-        sink.file.write(content.encode("utf-8"))
-        sink.done()
-
-    async def _handle_booking(
-        self,
-        element: IliasPageElement,
-        element_path: PurePath,
-    ) -> Optional[Coroutine[Any, Any, None]]:
-        log.explain_topic(f"Decision: Crawl Booking Link {fmt_path(element_path)}")
-        log.explain(f"Links type is {self._links}")
-
-        link_template_maybe = self._links.template()
-        link_extension = self._links.extension()
-        if not link_template_maybe or not link_extension:
-            log.explain("Answer: No")
-            return None
-        else:
-            log.explain("Answer: Yes")
-        element_path = element_path.with_name(element_path.name + link_extension)
-
-        maybe_dl = await self.download(element_path, mtime=element.mtime)
-        if not maybe_dl:
-            return None
-
-        return self._download_booking(element, link_template_maybe, maybe_dl)
-
-    @anoncritical
-    @_iorepeat(1, "downloading description")
-    async def _download_description(self, parent_path: PurePath, description: BeautifulSoup) -> None:
-        path = parent_path / "Description.html"
-        dl = await self.download(path, redownload=Redownload.ALWAYS)
-        if not dl:
-            return
-
-        async with dl as (bar, sink):
-            description = clean(insert_base_markup(description))
-            sink.file.write(description.prettify().encode("utf-8"))
-            sink.done()
-
-    @anoncritical
-    @_iorepeat(3, "resolving booking")
-    async def _download_booking(
-        self,
-        element: IliasPageElement,
-        link_template: str,
-        dl: DownloadToken,
-    ) -> None:
-        async with dl as (bar, sink):
-            self._write_link_content(link_template, element.url, element.name, element.description, sink)
-
-    async def _resolve_link_target(self, export_url: str) -> str:
-        async with self.session.get(export_url, allow_redirects=False) as resp:
-            # No redirect means we were authenticated
-            if hdrs.LOCATION not in resp.headers:
-                return soupify(await resp.read()).select_one("a").get("href").strip()
-
-        await self._authenticate()
-
-        async with self.session.get(export_url, allow_redirects=False) as resp:
-            # No redirect means we were authenticated
-            if hdrs.LOCATION not in resp.headers:
-                return soupify(await resp.read()).select_one("a").get("href").strip()
-
-        raise CrawlError("resolve_link_target failed even after authenticating")
-
-    async def _handle_opencast_video(
-        self,
-        element: IliasPageElement,
-        element_path: PurePath,
-    ) -> Optional[Coroutine[Any, Any, None]]:
-        # Copy old mapping as it is likely still relevant
-        if self.prev_report:
-            self.report.add_custom_value(
-                _get_video_cache_key(element),
-                self.prev_report.get_custom_value(_get_video_cache_key(element))
-            )
-
-        # A video might contain other videos, so let's "crawl" the video first
-        # to ensure rate limits apply. This must be a download as *this token*
-        # is re-used if the video consists of a single stream. In that case the
-        # file name is used and *not* the stream name the ilias html parser reported
-        # to ensure backwards compatibility.
-        maybe_dl = await self.download(element_path, mtime=element.mtime, redownload=Redownload.ALWAYS)
-
-        # If we do not want to crawl it (user filter), we can move on
-        if not maybe_dl:
-            return None
-
-        # If we have every file from the cached mapping already, we can ignore this and bail
-        if self._all_opencast_videos_locally_present(element, maybe_dl.path):
-            # Mark all existing videos as known to ensure they do not get deleted during cleanup.
-            # We "downloaded" them, just without actually making a network request as we assumed
-            # they did not change.
-            contained = self._previous_contained_opencast_videos(element, maybe_dl.path)
-            if len(contained) > 1:
-                # Only do this if we threw away the original dl token,
-                # to not download single-stream videos twice
-                for video in contained:
-                    await self.download(video)
-
-            return None
-
-        return self._download_opencast_video(element, maybe_dl)
-
-    def _previous_contained_opencast_videos(
-        self, element: IliasPageElement, element_path: PurePath
-    ) -> List[PurePath]:
-        if not self.prev_report:
-            return []
-        custom_value = self.prev_report.get_custom_value(_get_video_cache_key(element))
-        if not custom_value:
-            return []
-        cached_value = cast(dict[str, Any], custom_value)
-        if "known_paths" not in cached_value or "own_path" not in cached_value:
-            log.explain(f"'known_paths' or 'own_path' missing from cached value: {cached_value}")
-            return []
-        transformed_own_path = self._transformer.transform(element_path)
-        if cached_value["own_path"] != str(transformed_own_path):
-            log.explain(
-                f"own_path '{transformed_own_path}' does not match cached value: '{cached_value['own_path']}"
-            )
-            return []
-        return [PurePath(name) for name in cached_value["known_paths"]]
-
-    def _all_opencast_videos_locally_present(self, element: IliasPageElement, element_path: PurePath) -> bool:
-        log.explain_topic(f"Checking local cache for video {fmt_path(element_path)}")
-        if contained_videos := self._previous_contained_opencast_videos(element, element_path):
-            log.explain(
-                f"The following contained videos are known: {','.join(map(fmt_path, contained_videos))}"
-            )
-            if all(self._output_dir.resolve(path).exists() for path in contained_videos):
-                log.explain("Found all known videos locally, skipping enumeration request")
-                return True
-            log.explain("Missing at least one video, continuing with requests!")
-        else:
-            log.explain("No local cache present")
-        return False
-
-    @anoncritical
-    @_iorepeat(3, "downloading video")
-    async def _download_opencast_video(self, element: IliasPageElement, dl: DownloadToken) -> None:
-        def add_to_report(paths: list[str]) -> None:
-            self.report.add_custom_value(
-                _get_video_cache_key(element),
-                {"known_paths": paths, "own_path": str(self._transformer.transform(dl.path))}
-            )
-
-        async with dl as (bar, sink):
-            page = IliasPage(await self._get_page(element.url), element.url, element)
-            stream_elements = page.get_child_elements()
-
-            if len(stream_elements) > 1:
-                log.explain(f"Found multiple video streams for {element.name}")
-            else:
-                log.explain(f"Using single video mode for {element.name}")
-                stream_element = stream_elements[0]
-
-                # We do not have a local cache yet
-                await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
-                add_to_report([str(self._transformer.transform(dl.path))])
-                return
-
-        contained_video_paths: List[str] = []
-
-        for stream_element in stream_elements:
-            video_path = dl.path.parent / stream_element.name
-
-            maybe_dl = await self.download(video_path, mtime=element.mtime, redownload=Redownload.NEVER)
-            if not maybe_dl:
-                continue
-            async with maybe_dl as (bar, sink):
-                log.explain(f"Streaming video from real url {stream_element.url}")
-                contained_video_paths.append(str(self._transformer.transform(maybe_dl.path)))
-                await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
-
-        add_to_report(contained_video_paths)
-
-    async def _handle_file(
-        self,
-        element: IliasPageElement,
-        element_path: PurePath,
-    ) -> Optional[Coroutine[Any, Any, None]]:
-        maybe_dl = await self.download(element_path, mtime=element.mtime)
-        if not maybe_dl:
-            return None
-        return self._download_file(element, maybe_dl)
-
-    @_iorepeat(3, "downloading file")
-    @anoncritical
-    async def _download_file(self, element: IliasPageElement, dl: DownloadToken) -> None:
-        assert dl  # The function is only reached when dl is not None
-        async with dl as (bar, sink):
-            await self._stream_from_url(element.url, sink, bar, is_video=False)
-
-    async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar, is_video: bool) -> None:
-        async def try_stream() -> bool:
-            next_url = url
-
-            # Normal files redirect to the magazine if we are not authenticated. As files could be HTML,
-            # we can not match on the content type here. Instead, we disallow redirects and inspect the
-            # new location. If we are redirected anywhere but the ILIAS 8 "sendfile" command, we assume
-            # our authentication expired.
-            if not is_video:
-                async with self.session.get(url, allow_redirects=False) as resp:
-                    # Redirect to anything except a "sendfile" means we weren't authenticated
-                    if hdrs.LOCATION in resp.headers:
-                        if "&cmd=sendfile" not in resp.headers[hdrs.LOCATION]:
-                            return False
-                        # Directly follow the redirect to not make a second, unnecessary request
-                        next_url = resp.headers[hdrs.LOCATION]
-
-            # Let's try this again and follow redirects
-            return await fetch_follow_redirects(next_url)
-
-        async def fetch_follow_redirects(file_url: str) -> bool:
-            async with self.session.get(file_url) as resp:
-                # We wanted a video but got HTML => Forbidden, auth expired. Logging in won't really
-                # solve that depending on the setup, but it is better than nothing.
-                if is_video and "html" in resp.content_type:
-                    return False
-
-                if resp.content_length:
-                    bar.set_total(resp.content_length)
-
-                async for data in resp.content.iter_chunked(1024):
-                    sink.file.write(data)
-                    bar.advance(len(data))
-
-                sink.done()
-            return True
-
-        auth_id = await self._current_auth_id()
-        if await try_stream():
-            return
-
-        await self.authenticate(auth_id)
-
-        if not await try_stream():
-            raise CrawlError("File streaming failed after authenticate()")
-
-    async def _handle_forum(
-        self,
-        element: IliasPageElement,
-        element_path: PurePath,
-    ) -> Optional[Coroutine[Any, Any, None]]:
-        maybe_cl = await self.crawl(element_path)
-        if not maybe_cl:
-            return None
-        return self._crawl_forum(element, maybe_cl)
-
-    @_iorepeat(3, "crawling forum")
-    @anoncritical
-    async def _crawl_forum(self, element: IliasPageElement, cl: CrawlToken) -> None:
-        elements: List[IliasForumThread] = []
-
-        async with cl:
-            next_stage_url = element.url
-            while next_stage_url:
-                log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
-                log.explain(f"URL: {next_stage_url}")
-
-                soup = await self._get_page(next_stage_url)
-                page = IliasPage(soup, next_stage_url, element)
-
-                if next := page.get_next_stage_element():
-                    next_stage_url = next.url
-                else:
-                    break
-
-            download_data = page.get_download_forum_data()
-            if not download_data:
-                raise CrawlWarning("Failed to extract forum data")
-            if download_data.empty:
-                log.explain("Forum had no threads")
-                return
-            html = await self._post_authenticated(download_data.url, download_data.form_data)
-            elements = parse_ilias_forum_export(soupify(html))
-
-        elements.sort(key=lambda elem: elem.title)
-
-        tasks: List[Awaitable[None]] = []
-        for elem in elements:
-            tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, elem)))
-
-        # And execute them
-        await self.gather(tasks)
-
-    @anoncritical
-    @_iorepeat(3, "saving forum thread")
-    async def _download_forum_thread(
-        self,
-        parent_path: PurePath,
-        element: IliasForumThread,
-    ) -> None:
-        path = parent_path / (_sanitize_path_name(element.title) + ".html")
-        maybe_dl = await self.download(path, mtime=element.mtime)
-        if not maybe_dl:
-            return
-
-        async with maybe_dl as (bar, sink):
-            content = element.title_tag.prettify()
-            content += element.content_tag.prettify()
-            sink.file.write(content.encode("utf-8"))
-            sink.done()
-
-    async def _handle_learning_module(
-        self,
-        element: IliasPageElement,
-        element_path: PurePath,
-    ) -> Optional[Coroutine[Any, Any, None]]:
-        maybe_cl = await self.crawl(element_path)
-        if not maybe_cl:
-            return None
-        return self._crawl_learning_module(element, maybe_cl)
-
-    @_iorepeat(3, "crawling learning module")
-    @anoncritical
-    async def _crawl_learning_module(self, element: IliasPageElement, cl: CrawlToken) -> None:
-        elements: List[IliasLearningModulePage] = []
-
-        async with cl:
-            log.explain_topic(f"Parsing initial HTML page for {fmt_path(cl.path)}")
-            log.explain(f"URL: {element.url}")
-            soup = await self._get_page(element.url)
-            page = IliasPage(soup, element.url, element)
-            if next := page.get_learning_module_data():
-                elements.extend(await self._crawl_learning_module_direction(
-                    cl.path, next.previous_url, "left", element
-                ))
-                elements.append(next)
-                elements.extend(await self._crawl_learning_module_direction(
-                    cl.path, next.next_url, "right", element
-                ))
-
-        # Reflect their natural ordering in the file names
-        for index, lm_element in enumerate(elements):
-            lm_element.title = f"{index:02}_{lm_element.title}"
-
-        tasks: List[Awaitable[None]] = []
-        for index, elem in enumerate(elements):
-            prev_url = elements[index - 1].title if index > 0 else None
-            next_url = elements[index + 1].title if index < len(elements) - 1 else None
-            tasks.append(asyncio.create_task(
-                self._download_learning_module_page(cl.path, elem, prev_url, next_url)
-            ))
-
-        # And execute them
-        await self.gather(tasks)
-
-    async def _crawl_learning_module_direction(
-        self,
-        path: PurePath,
-        start_url: Optional[str],
-        dir: Union[Literal["left"], Literal["right"]],
-        parent_element: IliasPageElement
-    ) -> List[IliasLearningModulePage]:
-        elements: List[IliasLearningModulePage] = []
-
-        if not start_url:
-            return elements
-
-        next_element_url: Optional[str] = start_url
-        counter = 0
-        while next_element_url:
-            log.explain_topic(f"Parsing HTML page for {fmt_path(path)} ({dir}-{counter})")
-            log.explain(f"URL: {next_element_url}")
-            soup = await self._get_page(next_element_url)
-            page = IliasPage(soup, next_element_url, parent_element)
-            if next := page.get_learning_module_data():
-                elements.append(next)
-                if dir == "left":
-                    next_element_url = next.previous_url
-                else:
-                    next_element_url = next.next_url
-            counter += 1
-
-        return elements
-
-    @anoncritical
-    @_iorepeat(3, "saving learning module page")
-    async def _download_learning_module_page(
-        self,
-        parent_path: PurePath,
-        element: IliasLearningModulePage,
-        prev: Optional[str],
-        next: Optional[str]
-    ) -> None:
-        path = parent_path / (_sanitize_path_name(element.title) + ".html")
-        maybe_dl = await self.download(path)
-        if not maybe_dl:
-            return
-        my_path = self._transformer.transform(maybe_dl.path)
-        if not my_path:
-            return
-
-        if prev:
-            prev_p = self._transformer.transform(parent_path / (_sanitize_path_name(prev) + ".html"))
-            if prev_p:
-                prev = os.path.relpath(prev_p, my_path.parent)
-            else:
-                prev = None
-        if next:
-            next_p = self._transformer.transform(parent_path / (_sanitize_path_name(next) + ".html"))
-            if next_p:
-                next = os.path.relpath(next_p, my_path.parent)
-            else:
-                next = None
-
-        async with maybe_dl as (bar, sink):
-            content = element.content
-            content = await self.internalize_images(content)
-            sink.file.write(learning_module_template(content, maybe_dl.path.name, prev, next).encode("utf-8"))
-            sink.done()
-
-    async def internalize_images(self, tag: Tag) -> Tag:
-        """
-        Tries to fetch ILIAS images and embed them as base64 data.
-        """
-        log.explain_topic("Internalizing images")
-        for elem in tag.find_all(recursive=True):
-            if not isinstance(elem, Tag):
-                continue
-            if elem.name == "img":
-                if src := elem.attrs.get("src", None):
-                    url = urljoin(_ILIAS_URL, src)
-                    if not url.startswith(_ILIAS_URL):
-                        continue
-                    log.explain(f"Internalizing {url!r}")
-                    img = await self._get_authenticated(url)
-                    elem.attrs["src"] = "data:;base64," + base64.b64encode(img).decode()
-            if elem.name == "iframe" and elem.attrs.get("src", "").startswith("//"):
-                # For unknown reasons the protocol seems to be stripped.
-                elem.attrs["src"] = "https:" + elem.attrs["src"]
-        return tag
-
-    async def _get_page(self, url: str, root_page_allowed: bool = False) -> BeautifulSoup:
-        auth_id = await self._current_auth_id()
-        async with self.session.get(url) as request:
-            soup = soupify(await request.read())
-            if IliasPage.is_logged_in(soup):
-                return self._verify_page(soup, url, root_page_allowed)
-
-        # We weren't authenticated, so try to do that
-        await self.authenticate(auth_id)
-
-        # Retry once after authenticating. If this fails, we will die.
-        async with self.session.get(url) as request:
-            soup = soupify(await request.read())
-            if IliasPage.is_logged_in(soup):
-                return self._verify_page(soup, url, root_page_allowed)
-        raise CrawlError(f"get_page failed even after authenticating on {url!r}")
-
-    @staticmethod
-    def _verify_page(soup: BeautifulSoup, url: str, root_page_allowed: bool) -> BeautifulSoup:
-        if IliasPage.is_root_page(soup) and not root_page_allowed:
-            raise CrawlError(
-                "Unexpectedly encountered ILIAS root page. "
-                "This usually happens because the ILIAS instance is broken. "
-                "If so, wait a day or two and try again. "
-                "It could also happen because a crawled element links to the ILIAS root page. "
-                "If so, use a transform with a ! as target to ignore the particular element. "
-                f"The redirect came from {url}"
-            )
-        return soup
-
-    async def _post_authenticated(
-        self,
-        url: str,
-        data: dict[str, Union[str, List[str]]]
-    ) -> bytes:
-        auth_id = await self._current_auth_id()
-
-        form_data = aiohttp.FormData()
-        for key, val in data.items():
-            form_data.add_field(key, val)
-
-        async with self.session.post(url, data=form_data(), allow_redirects=False) as request:
-            if request.status == 200:
-                return await request.read()
-
-        # We weren't authenticated, so try to do that
-        await self.authenticate(auth_id)
-
-        # Retry once after authenticating. If this fails, we will die.
-        async with self.session.post(url, data=data, allow_redirects=False) as request:
-            if request.status == 200:
-                return await request.read()
-        raise CrawlError("post_authenticated failed even after authenticating")
-
-    async def _get_authenticated(self, url: str) -> bytes:
-        auth_id = await self._current_auth_id()
-
-        async with self.session.get(url, allow_redirects=False) as request:
-            if request.status == 200:
-                return await request.read()
-
-        # We weren't authenticated, so try to do that
-        await self.authenticate(auth_id)
-
-        # Retry once after authenticating. If this fails, we will die.
-        async with self.session.get(url, allow_redirects=False) as request:
-            if request.status == 200:
-                return await request.read()
-        raise CrawlError("get_authenticated failed even after authenticating")
-
     # We repeat this as the login method in shibboleth doesn't handle I/O errors.
     # Shibboleth is quite reliable as well, the repeat is likely not critical here.
     @_iorepeat(3, "Login", failure_is_error=True)
diff --git a/PFERD/utils.py b/PFERD/utils.py
index 7c7b6f4..9f5d4d5 100644
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -9,11 +9,47 @@ from types import TracebackType
 from typing import Any, Callable, Dict, Generic, Optional, Type, TypeVar
 from urllib.parse import parse_qs, urlencode, urlsplit, urlunsplit
 
+import aiohttp
 import bs4
 
+from .crawl.crawler import AWrapped, CrawlError, CrawlWarning
+from .logging import log
+
 T = TypeVar("T")
 
 
+def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Callable[[AWrapped], AWrapped]:
+    def decorator(f: AWrapped) -> AWrapped:
+        async def wrapper(*args: Any, **kwargs: Any) -> Optional[Any]:
+            last_exception: Optional[BaseException] = None
+            for round in range(attempts):
+                try:
+                    return await f(*args, **kwargs)
+                except aiohttp.ContentTypeError:  # invalid content type
+                    raise CrawlWarning("ILIAS returned an invalid content type")
+                except aiohttp.TooManyRedirects:
+                    raise CrawlWarning("Got stuck in a redirect loop")
+                except aiohttp.ClientPayloadError as e:  # encoding or not enough bytes
+                    last_exception = e
+                except aiohttp.ClientConnectionError as e:  # e.g. timeout, disconnect, resolve failed, etc.
+                    last_exception = e
+                except asyncio.exceptions.TimeoutError as e:  # explicit http timeouts in HttpCrawler
+                    last_exception = e
+                log.explain_topic(f"Retrying operation {name}. Retries left: {attempts - 1 - round}")
+
+            if last_exception:
+                message = f"Error in I/O Operation: {last_exception}"
+                if failure_is_error:
+                    raise CrawlError(message) from last_exception
+                else:
+                    raise CrawlWarning(message) from last_exception
+            raise CrawlError("Impossible return in ilias _iorepeat")
+
+        return wrapper  # type: ignore
+
+    return decorator
+
+
 async def in_daemon_thread(func: Callable[..., T], *args: Any, **kwargs: Any) -> T:
     loop = asyncio.get_running_loop()
     future: asyncio.Future[T] = asyncio.Future()

From df98153169257317301392e7c7ea5a24c183722e Mon Sep 17 00:00:00 2001
From: Philipp Fruck <dev@p-fruck.de>
Date: Mon, 8 Apr 2024 10:58:23 +0200
Subject: [PATCH 143/224] refactor: Extract generic settings from ilias command

Preparation for generic ilias_web command
---
 PFERD/cli/command_kit_ilias_web.py | 107 ++++-------------------------
 PFERD/cli/common_ilias_args.py     | 104 ++++++++++++++++++++++++++++
 2 files changed, 116 insertions(+), 95 deletions(-)
 create mode 100644 PFERD/cli/common_ilias_args.py

diff --git a/PFERD/cli/command_kit_ilias_web.py b/PFERD/cli/command_kit_ilias_web.py
index de74fc3..10797c2 100644
--- a/PFERD/cli/command_kit_ilias_web.py
+++ b/PFERD/cli/command_kit_ilias_web.py
@@ -1,120 +1,37 @@
 import argparse
 import configparser
-from pathlib import Path
 
-from ..crawl.ilias.file_templates import Links
 from ..logging import log
-from .parser import (CRAWLER_PARSER, SUBPARSERS, BooleanOptionalAction, ParserLoadError, load_crawler,
-                     show_value_error)
+from .common_ilias_args import configure_common_group_args, load_common
+from .parser import CRAWLER_PARSER, SUBPARSERS, load_crawler
+
+COMMAND_NAME = "kit-ilias-web"
 
 SUBPARSER = SUBPARSERS.add_parser(
-    "kit-ilias-web",
+    COMMAND_NAME,
     parents=[CRAWLER_PARSER],
 )
 
 GROUP = SUBPARSER.add_argument_group(
-    title="kit-ilias-web crawler arguments",
-    description="arguments for the 'kit-ilias-web' crawler",
-)
-GROUP.add_argument(
-    "target",
-    type=str,
-    metavar="TARGET",
-    help="course id, 'desktop', or ILIAS URL to crawl"
-)
-GROUP.add_argument(
-    "output",
-    type=Path,
-    metavar="OUTPUT",
-    help="output directory"
-)
-GROUP.add_argument(
-    "--username", "-u",
-    type=str,
-    metavar="USERNAME",
-    help="user name for authentication"
-)
-GROUP.add_argument(
-    "--keyring",
-    action=BooleanOptionalAction,
-    help="use the system keyring to store and retrieve passwords"
-)
-GROUP.add_argument(
-    "--credential-file",
-    type=Path,
-    metavar="PATH",
-    help="read username and password from a credential file"
-)
-GROUP.add_argument(
-    "--links",
-    type=show_value_error(Links.from_string),
-    metavar="OPTION",
-    help="how to represent external links"
-)
-GROUP.add_argument(
-    "--link-redirect-delay",
-    type=int,
-    metavar="SECONDS",
-    help="time before 'fancy' links redirect to to their target (-1 to disable)"
-)
-GROUP.add_argument(
-    "--videos",
-    action=BooleanOptionalAction,
-    help="crawl and download videos"
-)
-GROUP.add_argument(
-    "--forums",
-    action=BooleanOptionalAction,
-    help="crawl and download forum posts"
-)
-GROUP.add_argument(
-    "--http-timeout", "-t",
-    type=float,
-    metavar="SECONDS",
-    help="timeout for all HTTP requests"
+    title=f"{COMMAND_NAME} crawler arguments",
+    description=f"arguments for the '{COMMAND_NAME}' crawler",
 )
 
+configure_common_group_args(GROUP)
+
 
 def load(
         args: argparse.Namespace,
         parser: configparser.ConfigParser,
 ) -> None:
-    log.explain("Creating config for command 'kit-ilias-web'")
+    log.explain(f"Creating config for command '{COMMAND_NAME}'")
 
     parser["crawl:ilias"] = {}
     section = parser["crawl:ilias"]
     load_crawler(args, section)
 
-    section["type"] = "kit-ilias-web"
-    section["target"] = str(args.target)
-    section["output_dir"] = str(args.output)
-    section["auth"] = "auth:ilias"
-    if args.links is not None:
-        section["links"] = str(args.links.value)
-    if args.link_redirect_delay is not None:
-        section["link_redirect_delay"] = str(args.link_redirect_delay)
-    if args.videos is not None:
-        section["videos"] = "yes" if args.videos else "no"
-    if args.forums is not None:
-        section["forums"] = "yes" if args.forums else "no"
-    if args.http_timeout is not None:
-        section["http_timeout"] = str(args.http_timeout)
-
-    parser["auth:ilias"] = {}
-    auth_section = parser["auth:ilias"]
-    if args.credential_file is not None:
-        if args.username is not None:
-            raise ParserLoadError("--credential-file and --username can't be used together")
-        if args.keyring:
-            raise ParserLoadError("--credential-file and --keyring can't be used together")
-        auth_section["type"] = "credential-file"
-        auth_section["path"] = str(args.credential_file)
-    elif args.keyring:
-        auth_section["type"] = "keyring"
-    else:
-        auth_section["type"] = "simple"
-    if args.username is not None:
-        auth_section["username"] = args.username
+    section["type"] = COMMAND_NAME
+    load_common(section, args, parser)
 
 
 SUBPARSER.set_defaults(command=load)
diff --git a/PFERD/cli/common_ilias_args.py b/PFERD/cli/common_ilias_args.py
new file mode 100644
index 0000000..bbbbee5
--- /dev/null
+++ b/PFERD/cli/common_ilias_args.py
@@ -0,0 +1,104 @@
+import argparse
+import configparser
+from pathlib import Path
+
+from ..crawl.ilias.file_templates import Links
+from .parser import BooleanOptionalAction, ParserLoadError, show_value_error
+
+
+def configure_common_group_args(group: argparse._ArgumentGroup) -> None:
+    """These arguments are shared between the KIT and generic Ilias web command."""
+    group.add_argument(
+        "target",
+        type=str,
+        metavar="TARGET",
+        help="course id, 'desktop', or ILIAS URL to crawl"
+    )
+    group.add_argument(
+        "output",
+        type=Path,
+        metavar="OUTPUT",
+        help="output directory"
+    )
+    group.add_argument(
+        "--username", "-u",
+        type=str,
+        metavar="USERNAME",
+        help="user name for authentication"
+    )
+    group.add_argument(
+        "--keyring",
+        action=BooleanOptionalAction,
+        help="use the system keyring to store and retrieve passwords"
+    )
+    group.add_argument(
+        "--credential-file",
+        type=Path,
+        metavar="PATH",
+        help="read username and password from a credential file"
+    )
+    group.add_argument(
+        "--links",
+        type=show_value_error(Links.from_string),
+        metavar="OPTION",
+        help="how to represent external links"
+    )
+    group.add_argument(
+        "--link-redirect-delay",
+        type=int,
+        metavar="SECONDS",
+        help="time before 'fancy' links redirect to to their target (-1 to disable)"
+    )
+    group.add_argument(
+        "--videos",
+        action=BooleanOptionalAction,
+        help="crawl and download videos"
+    )
+    group.add_argument(
+        "--forums",
+        action=BooleanOptionalAction,
+        help="crawl and download forum posts"
+    )
+    group.add_argument(
+        "--http-timeout", "-t",
+        type=float,
+        metavar="SECONDS",
+        help="timeout for all HTTP requests"
+    )
+
+
+def load_common(
+    section: configparser.SectionProxy,
+    args: argparse.Namespace,
+    parser: configparser.ConfigParser,
+) -> None:
+    """Load common config between generic and KIT ilias web command"""
+    section["target"] = str(args.target)
+    section["output_dir"] = str(args.output)
+    section["auth"] = "auth:ilias"
+    if args.links is not None:
+        section["links"] = str(args.links.value)
+    if args.link_redirect_delay is not None:
+        section["link_redirect_delay"] = str(args.link_redirect_delay)
+    if args.videos is not None:
+        section["videos"] = "yes" if args.videos else "no"
+    if args.forums is not None:
+        section["forums"] = "yes" if args.forums else "no"
+    if args.http_timeout is not None:
+        section["http_timeout"] = str(args.http_timeout)
+
+    parser["auth:ilias"] = {}
+    auth_section = parser["auth:ilias"]
+    if args.credential_file is not None:
+        if args.username is not None:
+            raise ParserLoadError("--credential-file and --username can't be used together")
+        if args.keyring:
+            raise ParserLoadError("--credential-file and --keyring can't be used together")
+        auth_section["type"] = "credential-file"
+        auth_section["path"] = str(args.credential_file)
+    elif args.keyring:
+        auth_section["type"] = "keyring"
+    else:
+        auth_section["type"] = "simple"
+    if args.username is not None:
+        auth_section["username"] = args.username

From 5d0621420e3c7394506acb5db12d914c63f9dcbf Mon Sep 17 00:00:00 2001
From: Philipp Fruck <dev@p-fruck.de>
Date: Mon, 8 Apr 2024 11:03:09 +0200
Subject: [PATCH 144/224] feat: Generic ilias_web command

---
 PFERD/cli/__init__.py          |  1 +
 PFERD/cli/command_ilias_web.py | 56 ++++++++++++++++++++++++++++++++++
 PFERD/crawl/__init__.py        |  4 ++-
 PFERD/crawl/ilias/__init__.py  | 10 ++++--
 4 files changed, 68 insertions(+), 3 deletions(-)
 create mode 100644 PFERD/cli/command_ilias_web.py

diff --git a/PFERD/cli/__init__.py b/PFERD/cli/__init__.py
index efa8f00..c89f6f4 100644
--- a/PFERD/cli/__init__.py
+++ b/PFERD/cli/__init__.py
@@ -8,6 +8,7 @@
 # well.
 
 from . import command_local  # noqa: F401 imported but unused
+from . import command_ilias_web  # noqa: F401 imported but unused
 from . import command_kit_ilias_web  # noqa: F401 imported but unused
 from . import command_kit_ipd  # noqa: F401 imported but unused
 from .parser import PARSER, ParserLoadError, load_default_section  # noqa: F401 imported but unused
diff --git a/PFERD/cli/command_ilias_web.py b/PFERD/cli/command_ilias_web.py
new file mode 100644
index 0000000..58a7934
--- /dev/null
+++ b/PFERD/cli/command_ilias_web.py
@@ -0,0 +1,56 @@
+import argparse
+import configparser
+
+from ..logging import log
+from .common_ilias_args import configure_common_group_args, load_common
+from .parser import CRAWLER_PARSER, SUBPARSERS, load_crawler
+
+COMMAND_NAME = "ilias-web"
+
+SUBPARSER = SUBPARSERS.add_parser(
+    COMMAND_NAME,
+    parents=[CRAWLER_PARSER],
+)
+
+GROUP = SUBPARSER.add_argument_group(
+    title=f"{COMMAND_NAME} crawler arguments",
+    description=f"arguments for the '{COMMAND_NAME}' crawler",
+)
+
+GROUP.add_argument(
+    "--ilias-url",
+    type=str,
+    metavar="BASE_URL",
+    help="The base url of the ilias instance"
+)
+
+GROUP.add_argument(
+    "--client-id",
+    type=str,
+    metavar="CLIENT_ID",
+    help="The client id of the ilias instance"
+)
+
+configure_common_group_args(GROUP)
+
+
+def load(
+        args: argparse.Namespace,
+        parser: configparser.ConfigParser,
+) -> None:
+    log.explain(f"Creating config for command '{COMMAND_NAME}'")
+
+    parser["crawl:ilias"] = {}
+    section = parser["crawl:ilias"]
+    load_crawler(args, section)
+
+    section["type"] = COMMAND_NAME
+    if args.ilias_url is not None:
+        section["base_url"] = args.ilias_url
+    if args.client_id is not None:
+        section["client_id"] = args.client_id
+
+    load_common(section, args, parser)
+
+
+SUBPARSER.set_defaults(command=load)
diff --git a/PFERD/crawl/__init__.py b/PFERD/crawl/__init__.py
index 1f8bd59..9a0e080 100644
--- a/PFERD/crawl/__init__.py
+++ b/PFERD/crawl/__init__.py
@@ -4,7 +4,7 @@ from typing import Callable, Dict
 from ..auth import Authenticator
 from ..config import Config
 from .crawler import Crawler, CrawlError, CrawlerSection  # noqa: F401
-from .ilias import KitIliasWebCrawler, KitIliasWebCrawlerSection
+from .ilias import IliasWebCrawler, IliasWebCrawlerSection, KitIliasWebCrawler, KitIliasWebCrawlerSection
 from .kit_ipd_crawler import KitIpdCrawler, KitIpdCrawlerSection
 from .local_crawler import LocalCrawler, LocalCrawlerSection
 
@@ -18,6 +18,8 @@ CrawlerConstructor = Callable[[
 CRAWLERS: Dict[str, CrawlerConstructor] = {
     "local": lambda n, s, c, a:
         LocalCrawler(n, LocalCrawlerSection(s), c),
+    "ilias-web": lambda n, s, c, a:
+        IliasWebCrawler(n, IliasWebCrawlerSection(s), c, a),
     "kit-ilias-web": lambda n, s, c, a:
         KitIliasWebCrawler(n, KitIliasWebCrawlerSection(s), c, a),
     "kit-ipd": lambda n, s, c, a:
diff --git a/PFERD/crawl/ilias/__init__.py b/PFERD/crawl/ilias/__init__.py
index 26618a8..287bd3d 100644
--- a/PFERD/crawl/ilias/__init__.py
+++ b/PFERD/crawl/ilias/__init__.py
@@ -1,3 +1,9 @@
-from .kit_ilias_web_crawler import KitIliasWebCrawler, KitIliasWebCrawlerSection
+from .kit_ilias_web_crawler import (IliasWebCrawler, IliasWebCrawlerSection, KitIliasWebCrawler,
+                                    KitIliasWebCrawlerSection)
 
-__all__ = ["KitIliasWebCrawler", "KitIliasWebCrawlerSection"]
+__all__ = [
+    "IliasWebCrawler",
+    "IliasWebCrawlerSection",
+    "KitIliasWebCrawler",
+    "KitIliasWebCrawlerSection",
+]

From 7a00f73e0ec4de7008990cc836d24edda8cad69b Mon Sep 17 00:00:00 2001
From: Philipp Fruck <dev@p-fruck.de>
Date: Sat, 4 May 2024 17:51:38 +0200
Subject: [PATCH 145/224] feat: Add authentication to generic ilias dl

---
 PFERD/crawl/ilias/ilias_web_crawler.py | 98 +++++++++++++++++++++++---
 1 file changed, 89 insertions(+), 9 deletions(-)

diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index ba7d564..166034f 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -24,10 +24,34 @@ from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningMo
                              IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
 
 TargetType = Union[str, int]
-_ILIAS_URL = "https://ilias.studium.kit.edu"
+
+
+class IliasConfig():
+    def __init__(self, base_url: str, client_id: str):
+        self._base_url = base_url
+        self._client_id = client_id
+
+    @property
+    def base_url(self) -> str:
+        return self._base_url
+
+    @property
+    def client_id(self) -> str:
+        return self._client_id
 
 
 class IliasWebCrawlerSection(HttpCrawlerSection):
+    def conf(self) -> IliasConfig:
+        base_url = self.s.get("base_url")
+        if not base_url:
+            self.missing_value("base_url")
+
+        client_id = self.s.get("client_id")
+        if not client_id:
+            self.missing_value("client_id")
+
+        return IliasConfig(base_url, client_id)
+
     def target(self) -> TargetType:
         target = self.s.get("target")
         if not target:
@@ -39,7 +63,7 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
         if target == "desktop":
             # Full personal desktop
             return target
-        if target.startswith(_ILIAS_URL):
+        if target.startswith(self.conf().base_url):
             # ILIAS URL
             return target
 
@@ -140,7 +164,7 @@ instance's greatest bottleneck.
             """.strip())
 
         self._auth = auth
-        self._base_url = _ILIAS_URL
+        self._conf = section.conf()
 
         self._target = section.target()
         self._link_file_redirect_delay = section.link_redirect_delay()
@@ -163,7 +187,7 @@ instance's greatest bottleneck.
     async def _crawl_course(self, course_id: int) -> None:
         # Start crawling at the given course
         root_url = url_set_query_param(
-            self._base_url + "/goto.php", "target", f"crs_{course_id}"
+            self._conf.base_url + "/goto.php", "target", f"crs_{course_id}"
         )
 
         await self._crawl_url(root_url, expected_id=course_id)
@@ -171,7 +195,7 @@ instance's greatest bottleneck.
     async def _crawl_desktop(self) -> None:
         appendix = r"ILIAS\Repository\Provider\RepositoryMainBarProvider|mm_pd_sel_items"
         appendix = appendix.encode("ASCII").hex()
-        await self._crawl_url(self._base_url + "/gs_content.php?item=" + appendix)
+        await self._crawl_url(self._conf.base_url + "/gs_content.php?item=" + appendix)
 
     async def _crawl_url(self, url: str, expected_id: Optional[int] = None) -> None:
         maybe_cl = await self.crawl(PurePath("."))
@@ -844,8 +868,8 @@ instance's greatest bottleneck.
                 continue
             if elem.name == "img":
                 if src := elem.attrs.get("src", None):
-                    url = urljoin(_ILIAS_URL, src)
-                    if not url.startswith(_ILIAS_URL):
+                    url = urljoin(self._conf.base_url, src)
+                    if not url.startswith(self._conf.base_url):
                         continue
                     log.explain(f"Internalizing {url!r}")
                     img = await self._get_authenticated(url)
@@ -925,7 +949,63 @@ instance's greatest bottleneck.
                 return await request.read()
         raise CrawlError("get_authenticated failed even after authenticating")
 
-    # ToDo: Is this still required?
+    # ToDo: Is iorepeat still required?
     @_iorepeat(3, "Login", failure_is_error=True)
     async def _authenticate(self) -> None:
-        pass
+        # fill the session with the correct cookies
+        params = {
+            "client_id": self._conf.client_id,
+            "cmd": "force_login",
+        }
+        async with self.session.get(f"{self._conf.base_url}/login.php", params=params) as request:
+            login_page = soupify(await request.read())
+
+        login_form = login_page.find("form", attrs={"name": "formlogin"})
+        if login_form is None:
+            raise CrawlError("Could not find the login form! Specified client id might be invalid.")
+
+        login_url = login_form.attrs.get("action")
+        if login_url is None:
+            raise CrawlError("Could not find the action URL in the login form!")
+
+        username, password = await self._auth.credentials()
+
+        login_data = {
+            "username": username,
+            "password": password,
+            "cmd[doStandardAuthentication]": "Login",
+        }
+
+        # do the actual login
+        async with self.session.post(f"{self._conf.base_url}/{login_url}", data=login_data) as request:
+            soup = soupify(await request.read())
+            if not self._is_logged_in(soup):
+                self._auth.invalidate_credentials()
+
+    @ staticmethod
+    def _is_logged_in(soup: BeautifulSoup) -> bool:
+        # Normal ILIAS pages
+        mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")
+        if mainbar is not None:
+            login_button = mainbar.find(attrs={"href": lambda x: x and "login.php" in x})
+            shib_login = soup.find(id="button_shib_login")
+            return not login_button and not shib_login
+
+        # Personal Desktop
+        if soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x}):
+            return True
+
+        # Video listing embeds do not have complete ILIAS html. Try to match them by
+        # their video listing table
+        video_table = soup.find(
+            recursive=True,
+            name="table",
+            attrs={"id": lambda x: x is not None and x.startswith("tbl_xoct")}
+        )
+        if video_table is not None:
+            return True
+        # The individual video player wrapper page has nothing of the above.
+        # Match it by its playerContainer.
+        if soup.select_one("#playerContainer") is not None:
+            return True
+        return False

From 3a05b905251f0430ca8d34a353ffe9983304bbfc Mon Sep 17 00:00:00 2001
From: Philipp Fruck <dev@p-fruck.de>
Date: Mon, 8 Apr 2024 11:49:28 +0200
Subject: [PATCH 146/224] fix circular import for _io_repeat

---
 PFERD/crawl/ilias/async_helper.py          | 39 ++++++++++++++++++++++
 PFERD/crawl/ilias/ilias_web_crawler.py     |  3 +-
 PFERD/crawl/ilias/kit_ilias_web_crawler.py |  3 +-
 PFERD/utils.py                             | 36 --------------------
 4 files changed, 43 insertions(+), 38 deletions(-)
 create mode 100644 PFERD/crawl/ilias/async_helper.py

diff --git a/PFERD/crawl/ilias/async_helper.py b/PFERD/crawl/ilias/async_helper.py
new file mode 100644
index 0000000..527a819
--- /dev/null
+++ b/PFERD/crawl/ilias/async_helper.py
@@ -0,0 +1,39 @@
+import asyncio
+from typing import Any, Callable, Optional
+
+import aiohttp
+
+from ...logging import log
+from ..crawler import AWrapped, CrawlError, CrawlWarning
+
+
+def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Callable[[AWrapped], AWrapped]:
+    def decorator(f: AWrapped) -> AWrapped:
+        async def wrapper(*args: Any, **kwargs: Any) -> Optional[Any]:
+            last_exception: Optional[BaseException] = None
+            for round in range(attempts):
+                try:
+                    return await f(*args, **kwargs)
+                except aiohttp.ContentTypeError:  # invalid content type
+                    raise CrawlWarning("ILIAS returned an invalid content type")
+                except aiohttp.TooManyRedirects:
+                    raise CrawlWarning("Got stuck in a redirect loop")
+                except aiohttp.ClientPayloadError as e:  # encoding or not enough bytes
+                    last_exception = e
+                except aiohttp.ClientConnectionError as e:  # e.g. timeout, disconnect, resolve failed, etc.
+                    last_exception = e
+                except asyncio.exceptions.TimeoutError as e:  # explicit http timeouts in HttpCrawler
+                    last_exception = e
+                log.explain_topic(f"Retrying operation {name}. Retries left: {attempts - 1 - round}")
+
+            if last_exception:
+                message = f"Error in I/O Operation: {last_exception}"
+                if failure_is_error:
+                    raise CrawlError(message) from last_exception
+                else:
+                    raise CrawlWarning(message) from last_exception
+            raise CrawlError("Impossible return in ilias _iorepeat")
+
+        return wrapper  # type: ignore
+
+    return decorator
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 166034f..1048c30 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -15,9 +15,10 @@ from ...auth import Authenticator
 from ...config import Config
 from ...logging import ProgressBar, log
 from ...output_dir import FileSink, Redownload
-from ...utils import _iorepeat, fmt_path, soupify, url_set_query_param
+from ...utils import fmt_path, soupify, url_set_query_param
 from ..crawler import CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
 from ..http_crawler import HttpCrawler, HttpCrawlerSection
+from .async_helper import _iorepeat
 from .file_templates import Links, learning_module_template
 from .ilias_html_cleaner import clean, insert_base_markup
 from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index e9d1475..3cd0334 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -7,8 +7,9 @@ from bs4 import BeautifulSoup
 from ...auth import Authenticator, TfaAuthenticator
 from ...config import Config
 from ...logging import log
-from ...utils import _iorepeat, soupify
+from ...utils import soupify
 from ..crawler import CrawlError, CrawlWarning
+from .async_helper import _iorepeat
 from .ilias_web_crawler import IliasWebCrawler, IliasWebCrawlerSection
 
 TargetType = Union[str, int]
diff --git a/PFERD/utils.py b/PFERD/utils.py
index 9f5d4d5..7c7b6f4 100644
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -9,47 +9,11 @@ from types import TracebackType
 from typing import Any, Callable, Dict, Generic, Optional, Type, TypeVar
 from urllib.parse import parse_qs, urlencode, urlsplit, urlunsplit
 
-import aiohttp
 import bs4
 
-from .crawl.crawler import AWrapped, CrawlError, CrawlWarning
-from .logging import log
-
 T = TypeVar("T")
 
 
-def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Callable[[AWrapped], AWrapped]:
-    def decorator(f: AWrapped) -> AWrapped:
-        async def wrapper(*args: Any, **kwargs: Any) -> Optional[Any]:
-            last_exception: Optional[BaseException] = None
-            for round in range(attempts):
-                try:
-                    return await f(*args, **kwargs)
-                except aiohttp.ContentTypeError:  # invalid content type
-                    raise CrawlWarning("ILIAS returned an invalid content type")
-                except aiohttp.TooManyRedirects:
-                    raise CrawlWarning("Got stuck in a redirect loop")
-                except aiohttp.ClientPayloadError as e:  # encoding or not enough bytes
-                    last_exception = e
-                except aiohttp.ClientConnectionError as e:  # e.g. timeout, disconnect, resolve failed, etc.
-                    last_exception = e
-                except asyncio.exceptions.TimeoutError as e:  # explicit http timeouts in HttpCrawler
-                    last_exception = e
-                log.explain_topic(f"Retrying operation {name}. Retries left: {attempts - 1 - round}")
-
-            if last_exception:
-                message = f"Error in I/O Operation: {last_exception}"
-                if failure_is_error:
-                    raise CrawlError(message) from last_exception
-                else:
-                    raise CrawlWarning(message) from last_exception
-            raise CrawlError("Impossible return in ilias _iorepeat")
-
-        return wrapper  # type: ignore
-
-    return decorator
-
-
 async def in_daemon_thread(func: Callable[..., T], *args: Any, **kwargs: Any) -> T:
     loop = asyncio.get_running_loop()
     future: asyncio.Future[T] = asyncio.Future()

From b01f0934749ba613881446dfa0b41ebf803c3204 Mon Sep 17 00:00:00 2001
From: Philipp Fruck <dev@p-fruck.de>
Date: Mon, 8 Apr 2024 11:55:48 +0200
Subject: [PATCH 147/224] fix: Element detection for other universities

Other universities might use other URL schemes
for different element types
---
 PFERD/crawl/ilias/kit_ilias_html.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 866f7c0..54d56a0 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -48,6 +48,10 @@ class IliasPageElement:
         regexes = [
             r"eid=(?P<id>[0-9a-z\-]+)",
             r"file_(?P<id>\d+)",
+            r"copa_(?P<id>\d+)",
+            r"fold_(?P<id>\d+)",
+            r"frm_(?P<id>\d+)",
+            r"exc_(?P<id>\d+)",
             r"ref_id=(?P<id>\d+)",
             r"target=[a-z]+_(?P<id>\d+)",
             r"mm_(?P<id>\d+)"
@@ -997,6 +1001,19 @@ class IliasPage:
         if "baseClass=ilSAHSPresentationGUI" in parsed_url.query:
             return IliasElementType.SCORM_LEARNING_MODULE
 
+        # other universities might have content type specified in URL path
+        if "_file_" in parsed_url.path:
+            return IliasElementType.FILE
+
+        if "_fold_" in parsed_url.path or "_copa_" in parsed_url.path:
+            return IliasElementType.FOLDER
+
+        if "_frm_" in parsed_url.path:
+            return IliasElementType.FORUM
+
+        if "_exc_" in parsed_url.path:
+            return IliasElementType.EXERCISE
+
         # Booking and Meeting can not be detected based on the link. They do have a ref_id though, so
         # try to guess it from the image.
 

From 5c87517ceb178240651787be76f968f1b320dad2 Mon Sep 17 00:00:00 2001
From: Philipp Fruck <dev@p-fruck.de>
Date: Mon, 8 Apr 2024 12:02:20 +0200
Subject: [PATCH 148/224] docs: Explain usage with generic ilias

---
 README.md | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/README.md b/README.md
index 31a3475..54e77be 100644
--- a/README.md
+++ b/README.md
@@ -56,6 +56,8 @@ Also, you can download most ILIAS pages directly like this:
 $ pferd kit-ilias-web <url> <output_directory>
 ```
 
+[You want to use another ILIAS instance? Read here!](#other-ilias-instances)
+
 However, the CLI only lets you download a single thing at a time, and the
 resulting command can grow long quite quickly. Because of this, PFERD can also
 be used with a config file.
@@ -145,3 +147,53 @@ type = kit-ilias-web
 auth = auth:ilias
 target = 1337420
 ```
+
+## Other ILIAS instances
+
+PFERD was originally developed for the KIT ILIAS instance, so you might have to adapt some parts of the code for full compatibility with your instance.
+
+To run PFERD against any instance using the integrated ILIAS authentication, simply add the `--base-url` and `--client-id` flags to the CLI command, e.g.
+
+```
+$ pferd ilias-web --base-url https://ilias.my-university.example --client-id My_University desktop <output_directory>
+```
+
+To use a config file for the given instance, just extend the example above as follows:
+
+```ini
+[DEFAULT]
+# instance related settings
+base_url = https://ilias.my-university.example
+client_id = My_University
+type = ilias-web
+
+# same as described above
+[auth:ilias]
+...
+
+[crawl:Foo]
+auth = auth:ilias
+target = 1337420
+```
+
+## Example configuration
+
+Inspired by [this ILIAS downloader](https://github.com/V3lop5/ilias-downloader/blob/main/configs), the following configurations should work out of the box for the corresponding universities:
+
+| University    | `base_url`                           | `client_id`   |
+|---------------|--------------------------------------|---------------|
+| FH Aachen     | https://www.ili.fh-aachen.de         | elearning     |
+| Uni Koeln     | https://www.ilias.uni-koeln.de/ilias | uk            |
+| Uni Konstanz  | https://ilias.uni-konstanz.de        | ILIASKONSTANZ |
+| Uni Stuttgart | https://ilias3.uni-stuttgart.de      | Uni_Stuttgart |
+
+
+### My university isn't listed
+
+No problem, your university might also just work fine. To retrieve the values required for your instance, navigate to the corresponding login page. You should be redirected to
+
+```jinja
+{{ base_url }}/login.php?client_id={{ client_id }}&cmd=force_login&lang=
+```
+
+From this URL, you can simply extract your required values. Feel free to open a PR to add your configuration to the table above. This will help people find out if their ILIAS instance is supported too.

From fd6cb7b9660439e26c4523da5be037e75bbd547c Mon Sep 17 00:00:00 2001
From: Philipp Fruck <dev@p-fruck.de>
Date: Sun, 5 May 2024 02:34:00 +0200
Subject: [PATCH 149/224] docs: Remove some filler words

---
 README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 54e77be..abdf607 100644
--- a/README.md
+++ b/README.md
@@ -152,13 +152,13 @@ target = 1337420
 
 PFERD was originally developed for the KIT ILIAS instance, so you might have to adapt some parts of the code for full compatibility with your instance.
 
-To run PFERD against any instance using the integrated ILIAS authentication, simply add the `--base-url` and `--client-id` flags to the CLI command, e.g.
+To run PFERD against any instance using the integrated ILIAS authentication, add the `--base-url` and `--client-id` flags to the CLI command, e.g.
 
 ```
 $ pferd ilias-web --base-url https://ilias.my-university.example --client-id My_University desktop <output_directory>
 ```
 
-To use a config file for the given instance, just extend the example above as follows:
+To use a config file for the given instance, extend the example above as follows:
 
 ```ini
 [DEFAULT]
@@ -196,4 +196,4 @@ No problem, your university might also just work fine. To retrieve the values re
 {{ base_url }}/login.php?client_id={{ client_id }}&cmd=force_login&lang=
 ```
 
-From this URL, you can simply extract your required values. Feel free to open a PR to add your configuration to the table above. This will help people find out if their ILIAS instance is supported too.
+From this URL, you can extract your required values. Feel free to open a PR to add your configuration to the table above. This will help people find out if their ILIAS instance is supported too.

From ade6309dd9ba1ff4f094d6af590f7f959a187880 Mon Sep 17 00:00:00 2001
From: Philipp Fruck <dev@p-fruck.de>
Date: Sun, 5 May 2024 02:34:26 +0200
Subject: [PATCH 150/224] Update copyright information

---
 LICENSE | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/LICENSE b/LICENSE
index d81e827..13fa307 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
-Copyright 2019-2021 Garmelon, I-Al-Istannen, danstooamerican, pavelzw,
+Copyright 2019-2024 Garmelon, I-Al-Istannen, danstooamerican, pavelzw,
                     TheChristophe, Scriptim, thelukasprobst, Toorero,
-                    Mr-Pine
+                    Mr-Pine, p-fruck
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in

From 428b0179fc0ab042a35407833f782c65ac7fef45 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 10 May 2024 14:09:07 +0200
Subject: [PATCH 151/224] Remove IliasConfig

Also uses urljoin() in a few places that previously used string
concatenation or fstrings.

At this point, there isn't yet a need for IliasConfig, so I'd rather
keep the code base simpler and more consistent. Should we need a
structure like IliasConfig in the future (maybe because we have a few
more ilias parsers), it's easy to add back.
---
 PFERD/crawl/ilias/ilias_web_crawler.py | 48 +++++++++++---------------
 1 file changed, 21 insertions(+), 27 deletions(-)

diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 1048c30..59f28b8 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -27,31 +27,20 @@ from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningMo
 TargetType = Union[str, int]
 
 
-class IliasConfig():
-    def __init__(self, base_url: str, client_id: str):
-        self._base_url = base_url
-        self._client_id = client_id
-
-    @property
-    def base_url(self) -> str:
-        return self._base_url
-
-    @property
-    def client_id(self) -> str:
-        return self._client_id
-
-
 class IliasWebCrawlerSection(HttpCrawlerSection):
-    def conf(self) -> IliasConfig:
+    def base_url(self) -> str:
         base_url = self.s.get("base_url")
         if not base_url:
             self.missing_value("base_url")
 
+        return base_url
+
+    def client_id(self) -> str:
         client_id = self.s.get("client_id")
         if not client_id:
             self.missing_value("client_id")
 
-        return IliasConfig(base_url, client_id)
+        return client_id
 
     def target(self) -> TargetType:
         target = self.s.get("target")
@@ -64,8 +53,8 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
         if target == "desktop":
             # Full personal desktop
             return target
-        if target.startswith(self.conf().base_url):
-            # ILIAS URL
+        if target.startswith(self.base_url()):
+            # URL
             return target
 
         self.invalid_value("target", target, "Should be <course id | desktop | kit ilias URL>")
@@ -165,7 +154,8 @@ instance's greatest bottleneck.
             """.strip())
 
         self._auth = auth
-        self._conf = section.conf()
+        self._base_url = section.base_url()
+        self._client_id = section.client_id()
 
         self._target = section.target()
         self._link_file_redirect_delay = section.link_redirect_delay()
@@ -188,7 +178,8 @@ instance's greatest bottleneck.
     async def _crawl_course(self, course_id: int) -> None:
         # Start crawling at the given course
         root_url = url_set_query_param(
-            self._conf.base_url + "/goto.php", "target", f"crs_{course_id}"
+            urljoin(self._base_url, "/goto.php"),
+            "target", f"crs_{course_id}",
         )
 
         await self._crawl_url(root_url, expected_id=course_id)
@@ -196,7 +187,10 @@ instance's greatest bottleneck.
     async def _crawl_desktop(self) -> None:
         appendix = r"ILIAS\Repository\Provider\RepositoryMainBarProvider|mm_pd_sel_items"
         appendix = appendix.encode("ASCII").hex()
-        await self._crawl_url(self._conf.base_url + "/gs_content.php?item=" + appendix)
+        await self._crawl_url(url_set_query_param(
+            urljoin(self._base_url, "/gs_content.php"),
+            "item=", appendix,
+        ))
 
     async def _crawl_url(self, url: str, expected_id: Optional[int] = None) -> None:
         maybe_cl = await self.crawl(PurePath("."))
@@ -869,8 +863,8 @@ instance's greatest bottleneck.
                 continue
             if elem.name == "img":
                 if src := elem.attrs.get("src", None):
-                    url = urljoin(self._conf.base_url, src)
-                    if not url.startswith(self._conf.base_url):
+                    url = urljoin(self._base_url, src)
+                    if not url.startswith(self._base_url):
                         continue
                     log.explain(f"Internalizing {url!r}")
                     img = await self._get_authenticated(url)
@@ -955,10 +949,10 @@ instance's greatest bottleneck.
     async def _authenticate(self) -> None:
         # fill the session with the correct cookies
         params = {
-            "client_id": self._conf.client_id,
+            "client_id": self._client_id,
             "cmd": "force_login",
         }
-        async with self.session.get(f"{self._conf.base_url}/login.php", params=params) as request:
+        async with self.session.get(urljoin(self._base_url, "/login.php"), params=params) as request:
             login_page = soupify(await request.read())
 
         login_form = login_page.find("form", attrs={"name": "formlogin"})
@@ -978,12 +972,12 @@ instance's greatest bottleneck.
         }
 
         # do the actual login
-        async with self.session.post(f"{self._conf.base_url}/{login_url}", data=login_data) as request:
+        async with self.session.post(urljoin(self._base_url, login_url), data=login_data) as request:
             soup = soupify(await request.read())
             if not self._is_logged_in(soup):
                 self._auth.invalidate_credentials()
 
-    @ staticmethod
+    @staticmethod
     def _is_logged_in(soup: BeautifulSoup) -> bool:
         # Normal ILIAS pages
         mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")

From 778517d8c625ca5a8b967efb761a555ec03da136 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 10 May 2024 14:12:45 +0200
Subject: [PATCH 152/224] Fix KIT crawler requiring base_url and client_id
 options

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 3cd0334..558221d 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -22,6 +22,14 @@ class KitShibbolethBackgroundLoginSuccessful():
 
 
 class KitIliasWebCrawlerSection(IliasWebCrawlerSection):
+    def base_url(self) -> str:
+        return _ILIAS_URL
+
+    def client_id(self) -> str:
+        # KIT ILIAS uses the Shibboleth service for authentication. There's no
+        # use for a client id.
+        return "unused"
+
     def tfa_auth(self, authenticators: Dict[str, Authenticator]) -> Optional[Authenticator]:
         value: Optional[str] = self.s.get("tfa_auth")
         if value is None:

From 4b4f72b2ca9e003e9cc92dd097ddc0ddce870e02 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 10 May 2024 14:34:20 +0200
Subject: [PATCH 153/224] Fix command name

---
 PFERD/cli/command_ilias_web.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/cli/command_ilias_web.py b/PFERD/cli/command_ilias_web.py
index 58a7934..77a1657 100644
--- a/PFERD/cli/command_ilias_web.py
+++ b/PFERD/cli/command_ilias_web.py
@@ -18,7 +18,7 @@ GROUP = SUBPARSER.add_argument_group(
 )
 
 GROUP.add_argument(
-    "--ilias-url",
+    "--base-url",
     type=str,
     metavar="BASE_URL",
     help="The base url of the ilias instance"

From 89b44c69a71f07885a2118467012802abf7cc52f Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 10 May 2024 14:35:19 +0200
Subject: [PATCH 154/224] Update docs

All config file options must be documented in CONFIG.md. The README.md
is just a starting point. To avoid duplicated info, I've moved most of
the docs to CONFIG.md.
---
 CONFIG.md | 68 ++++++++++++++++++++++++++++++++++++++++++++++---------
 README.md | 61 ++++++++-----------------------------------------
 2 files changed, 67 insertions(+), 62 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index 25496e0..7766d39 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -4,11 +4,11 @@ A config file consists of sections. A section begins with a `[section]` header,
 which is followed by a list of `key = value` pairs. Comments must be on their
 own line and start with `#`. Multiline values must be indented beyond their key.
 Boolean values can be `yes` or `no`. For more details and some examples on the
-format, see the [configparser documentation][1] ([interpolation][2] is
-disabled).
+format, see the [configparser documentation][cp-file]
+([interpolation][cp-interp] is disabled).
 
-[1]: <https://docs.python.org/3/library/configparser.html#supported-ini-file-structure> "Supported INI File Structure"
-[2]: <https://docs.python.org/3/library/configparser.html#interpolation-of-values> "Interpolation of values"
+[cp-file]: <https://docs.python.org/3/library/configparser.html#supported-ini-file-structure> "Supported INI File Structure"
+[cp-interp]: <https://docs.python.org/3/library/configparser.html#interpolation-of-values> "Interpolation of values"
 
 ## The `DEFAULT` section
 
@@ -154,6 +154,52 @@ requests is likely a good idea.
   matches, the given link is downloaded as a file. This is used to extract
   files from KIT-IPD pages. (Default: `^.*?[^/]+\.(pdf|zip|c|cpp|java)$`)
 
+### The `ilias-web` crawler
+
+This crawler crawls a generic ILIAS instance.
+
+Inspired by [this ILIAS downloader][ilias-dl], the following configurations should work
+out of the box for the corresponding universities:
+
+[ilias-dl]: https://github.com/V3lop5/ilias-downloader/blob/main/configs "ilias-downloader configs"
+
+| University    | `base_url`                           | `client_id`   |
+|---------------|--------------------------------------|---------------|
+| FH Aachen     | https://www.ili.fh-aachen.de         | elearning     |
+| Uni Köln      | https://www.ilias.uni-koeln.de/ilias | uk            |
+| Uni Konstanz  | https://ilias.uni-konstanz.de        | ILIASKONSTANZ |
+| Uni Stuttgart | https://ilias3.uni-stuttgart.de      | Uni_Stuttgart |
+
+If your university isn't listed, try navigating to your instance's login page.
+Assuming no custom login service is used, the URL will look something like this:
+
+```jinja
+{{ base_url }}/login.php?client_id={{ client_id }}&cmd=force_login&lang=
+```
+
+If the values work, feel free to submit a PR and add them to the table above.
+
+- `base_url`: The URL where the ILIAS instance is located. (Required)
+- `client_id`: An ID used for authentication. (Required)
+- `target`: The ILIAS element to crawl. (Required)
+    - `desktop`: Crawl your personal desktop
+    - `<course id>`: Crawl the course with the given id
+    - `<url>`: Crawl a given element by URL (preferably the permanent URL linked
+      at the bottom of its ILIAS page)
+- `auth`: Name of auth section to use for login. (Required)
+- `links`: How to represent external links. (Default: `fancy`)
+    - `ignore`: Don't download links.
+    - `plaintext`: A text file containing only the URL.
+    - `fancy`: A HTML file looking like the ILIAS link element.
+    - `internet-shortcut`: An internet shortcut file (`.url` file).
+- `link_redirect_delay`: Time (in seconds) until `fancy` link files will
+  redirect to the actual URL. Set to a negative value to disable the automatic
+  redirect. (Default: `-1`)
+- `videos`: Whether to download videos. (Default: `no`)
+- `forums`: Whether to download forum threads. (Default: `no`)
+- `http_timeout`: The timeout (in seconds) for all HTTP requests. (Default:
+  `20.0`)
+
 ### The `kit-ilias-web` crawler
 
 This crawler crawls the KIT ILIAS instance.
@@ -232,10 +278,10 @@ is stored in the keyring.
 
 ### The `pass` authenticator
 
-This authenticator queries the [`pass` password manager][3] for a username and
-password. It tries to be mostly compatible with [browserpass][4] and
-[passff][5], so see those links for an overview of the format. If PFERD fails
-to load your password, you can use the `--explain` flag to see why.
+This authenticator queries the [`pass` password manager][pass] for a username
+and password. It tries to be mostly compatible with [browserpass][browserpass]
+and [passff][passff], so see those links for an overview of the format. If PFERD
+fails to load your password, you can use the `--explain` flag to see why.
 
 - `passname`: The name of the password to use (Required)
 - `username_prefixes`: A comma-separated list of username line prefixes
@@ -243,9 +289,9 @@ to load your password, you can use the `--explain` flag to see why.
 - `password_prefixes`: A comma-separated list of password line prefixes
   (Default: `password,pass,secret`)
 
-[3]: <https://www.passwordstore.org/> "Pass: The Standard Unix Password Manager"
-[4]: <https://github.com/browserpass/browserpass-extension#organizing-password-store> "Organizing password store"
-[5]: <https://github.com/passff/passff#multi-line-format> "Multi-line format"
+[pass]: <https://www.passwordstore.org/> "Pass: The Standard Unix Password Manager"
+[browserpass]: <https://github.com/browserpass/browserpass-extension#organizing-password-store> "Organizing password store"
+[passff]: <https://github.com/passff/passff#multi-line-format> "Multi-line format"
 
 ### The `tfa` authenticator
 
diff --git a/README.md b/README.md
index abdf607..d5d7980 100644
--- a/README.md
+++ b/README.md
@@ -56,7 +56,16 @@ Also, you can download most ILIAS pages directly like this:
 $ pferd kit-ilias-web <url> <output_directory>
 ```
 
-[You want to use another ILIAS instance? Read here!](#other-ilias-instances)
+PFERD supports other ILIAS instances as well, using the `ilias-web` crawler (see
+the [config section on `ilias-web`](CONFIG.md#the-ilias-web-crawler) for more
+detail on the `base-url` and `client-id` parameters):
+
+```
+$ pferd ilias-web \
+    --base-url https://ilias.my-university.example \
+    --client-id My_University desktop \
+    <output_directory>
+```
 
 However, the CLI only lets you download a single thing at a time, and the
 resulting command can grow long quite quickly. Because of this, PFERD can also
@@ -147,53 +156,3 @@ type = kit-ilias-web
 auth = auth:ilias
 target = 1337420
 ```
-
-## Other ILIAS instances
-
-PFERD was originally developed for the KIT ILIAS instance, so you might have to adapt some parts of the code for full compatibility with your instance.
-
-To run PFERD against any instance using the integrated ILIAS authentication, add the `--base-url` and `--client-id` flags to the CLI command, e.g.
-
-```
-$ pferd ilias-web --base-url https://ilias.my-university.example --client-id My_University desktop <output_directory>
-```
-
-To use a config file for the given instance, extend the example above as follows:
-
-```ini
-[DEFAULT]
-# instance related settings
-base_url = https://ilias.my-university.example
-client_id = My_University
-type = ilias-web
-
-# same as described above
-[auth:ilias]
-...
-
-[crawl:Foo]
-auth = auth:ilias
-target = 1337420
-```
-
-## Example configuration
-
-Inspired by [this ILIAS downloader](https://github.com/V3lop5/ilias-downloader/blob/main/configs), the following configurations should work out of the box for the corresponding universities:
-
-| University    | `base_url`                           | `client_id`   |
-|---------------|--------------------------------------|---------------|
-| FH Aachen     | https://www.ili.fh-aachen.de         | elearning     |
-| Uni Koeln     | https://www.ilias.uni-koeln.de/ilias | uk            |
-| Uni Konstanz  | https://ilias.uni-konstanz.de        | ILIASKONSTANZ |
-| Uni Stuttgart | https://ilias3.uni-stuttgart.de      | Uni_Stuttgart |
-
-
-### My university isn't listed
-
-No problem, your university might also just work fine. To retrieve the values required for your instance, navigate to the corresponding login page. You should be redirected to
-
-```jinja
-{{ base_url }}/login.php?client_id={{ client_id }}&cmd=force_login&lang=
-```
-
-From this URL, you can extract your required values. Feel free to open a PR to add your configuration to the table above. This will help people find out if their ILIAS instance is supported too.

From 819c6673c7724a8810ccceb0c09f2f214dea4763 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 10 May 2024 14:37:12 +0200
Subject: [PATCH 155/224] Update changelog

---
 CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f244a9b..6de08a3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Added
+- Generic `ilias-web` crawler and `ilias-web` CLI command
+
 ### Fixed
 - Normalization of meeting names in cards
 

From 422cf05f15e1d7acf095c7dfe888e2c572c9b83a Mon Sep 17 00:00:00 2001
From: Pavel Zwerschke <pavelzw@gmail.com>
Date: Sat, 11 May 2024 10:26:19 +0200
Subject: [PATCH 156/224] Move all configuration into pyproject.toml, add x86
 mac to CI

---
 .github/dependabot.yml                  | 10 +++++++
 .github/workflows/build-and-release.yml | 23 ++++++++++-----
 mypy.ini                                | 11 -------
 pyproject.toml                          | 39 +++++++++++++++++++++++++
 scripts/check                           |  2 +-
 scripts/format                          |  4 +--
 scripts/setup                           |  2 +-
 setup.cfg                               | 23 ---------------
 8 files changed, 69 insertions(+), 45 deletions(-)
 create mode 100644 .github/dependabot.yml
 delete mode 100644 mypy.ini
 delete mode 100644 setup.cfg

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000..3891848
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,10 @@
+version: 2
+updates:
+  - package-ecosystem: github-actions
+    directory: /
+    schedule:
+      interval: monthly
+    groups:
+      gh-actions:
+        patterns:
+          - "*"
diff --git a/.github/workflows/build-and-release.yml b/.github/workflows/build-and-release.yml
index 83a36e4..740c233 100644
--- a/.github/workflows/build-and-release.yml
+++ b/.github/workflows/build-and-release.yml
@@ -1,6 +1,9 @@
 name: build-and-release
 
-on: push
+on:
+  push:
+    branches: [master]
+  pull_request:
 
 defaults:
   run:
@@ -13,13 +16,12 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-latest, windows-latest, macos-latest]
+        os: [ubuntu-latest, windows-latest, macos-13, macos-latest]
         python: ["3.9"]
     steps:
+      - uses: actions/checkout@v4
 
-      - uses: actions/checkout@v3
-
-      - uses: actions/setup-python@v4
+      - uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python }}
 
@@ -34,7 +36,12 @@ jobs:
         run: ./scripts/setup --no-pip
 
       - name: Run checks
-        run: ./scripts/check
+        run: |
+          ./scripts/check
+          ./scripts/format
+
+      - name: Assert no changes
+        run: git diff --exit-code
 
       - name: Build
         run: ./scripts/build
@@ -65,10 +72,11 @@ jobs:
         run: |
           mv pferd-ubuntu-latest pferd-linux
           mv pferd-windows-latest pferd-windows.exe
+          mv pferd-macos-13 pferd-mac-x86_64
           mv pferd-macos-latest pferd-mac
 
       - name: Create release
-        uses: softprops/action-gh-release@v1
+        uses: softprops/action-gh-release@v2
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         with:
@@ -76,3 +84,4 @@ jobs:
             pferd-linux
             pferd-windows.exe
             pferd-mac
+            pferd-mac-x86_64
diff --git a/mypy.ini b/mypy.ini
deleted file mode 100644
index 14509d6..0000000
--- a/mypy.ini
+++ /dev/null
@@ -1,11 +0,0 @@
-[mypy]
-disallow_any_generics = True
-disallow_untyped_defs = True
-disallow_incomplete_defs = True
-no_implicit_optional = True
-warn_unused_ignores = True
-warn_unreachable = True
-show_error_context = True
-
-[mypy-rich.*,bs4,keyring]
-ignore_missing_imports = True
diff --git a/pyproject.toml b/pyproject.toml
index 9787c3b..bc67e1c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,3 +1,42 @@
 [build-system]
 requires = ["setuptools", "wheel"]
 build-backend = "setuptools.build_meta"
+
+[project]
+name = "PFERD"
+dependencies = [
+  "aiohttp>=3.8.1",
+  "beautifulsoup4>=4.10.0",
+  "rich>=11.0.0",
+  "keyring>=23.5.0",
+  "certifi>=2021.10.8"
+]
+dynamic = ["version"]
+requires-python = ">=3.9"
+
+[project.scripts]
+pferd = "PFERD.__main__:main"
+
+[tool.setuptools.dynamic]
+version = {attr = "PFERD.version.VERSION"}
+
+[tool.flake8]
+max-line-length = 110
+
+[tool.isort]
+line_length = 110
+
+[tool.autopep8]
+max_line_length = 110
+in-place = true
+recursive = true
+
+[tool.mypy]
+disallow_any_generics = true
+disallow_untyped_defs = true
+disallow_incomplete_defs = true
+no_implicit_optional = true
+warn_unused_ignores = true
+warn_unreachable = true
+show_error_context = true
+ignore_missing_imports = true
diff --git a/scripts/check b/scripts/check
index 2283951..aea2783 100755
--- a/scripts/check
+++ b/scripts/check
@@ -2,5 +2,5 @@
 
 set -e
 
-mypy PFERD
+mypy .
 flake8 PFERD
diff --git a/scripts/format b/scripts/format
index d8917ef..981cd75 100755
--- a/scripts/format
+++ b/scripts/format
@@ -2,5 +2,5 @@
 
 set -e
 
-autopep8 --recursive --in-place PFERD
-isort PFERD
+autopep8 .
+isort .
diff --git a/scripts/setup b/scripts/setup
index f6680bb..0114266 100755
--- a/scripts/setup
+++ b/scripts/setup
@@ -13,5 +13,5 @@ pip install --upgrade setuptools
 pip install --editable .
 
 # Installing tools and type hints
-pip install --upgrade mypy flake8 autopep8 isort pyinstaller
+pip install --upgrade mypy flake8 flake8-pyproject autopep8 isort pyinstaller
 pip install --upgrade types-chardet types-certifi
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index 2378c48..0000000
--- a/setup.cfg
+++ /dev/null
@@ -1,23 +0,0 @@
-[metadata]
-name = PFERD
-version = attr: PFERD.version.VERSION
-
-[options]
-packages = find:
-python_requires = >=3.9
-install_requires =
-  aiohttp>=3.8.1
-  beautifulsoup4>=4.10.0
-  rich>=11.0.0
-  keyring>=23.5.0
-  certifi>=2021.10.8
-
-[options.entry_points]
-console_scripts =
-  pferd = PFERD.__main__:main
-
-[flake8]
-max_line_length = 110
-
-[isort]
-line_length = 110

From 318226d7cba2a2191c3021ee526237c8c82f0808 Mon Sep 17 00:00:00 2001
From: Pavel Zwerschke <pavelzw@gmail.com>
Date: Sat, 11 May 2024 10:27:54 +0200
Subject: [PATCH 157/224] fix bump-version script

---
 scripts/bump-version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/bump-version b/scripts/bump-version
index 4479ef8..e341a4e 100755
--- a/scripts/bump-version
+++ b/scripts/bump-version
@@ -1,8 +1,8 @@
 #!/usr/bin/env python3
 
 import argparse
-import time
 import re
+import time
 from subprocess import run
 
 

From b29b6f93f81bed50afdc248757e467ff0db0cb68 Mon Sep 17 00:00:00 2001
From: Pavel Zwerschke <pavelzw@gmail.com>
Date: Sat, 11 May 2024 16:09:46 +0200
Subject: [PATCH 158/224] run ci twice

Co-authored-by: Garmelon <joscha@plugh.de>
---
 .github/workflows/build-and-release.yml | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/.github/workflows/build-and-release.yml b/.github/workflows/build-and-release.yml
index 740c233..dc7d4cc 100644
--- a/.github/workflows/build-and-release.yml
+++ b/.github/workflows/build-and-release.yml
@@ -1,9 +1,6 @@
 name: build-and-release
 
-on:
-  push:
-    branches: [master]
-  pull_request:
+on: [push, pull_request]
 
 defaults:
   run:

From 21a266e302034289507ee1e4b21da1d3d55a46ae Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 11 May 2024 16:33:10 +0200
Subject: [PATCH 159/224] Update upload-artifact action to v4

https://github.com/actions/upload-artifact/blob/main/docs/MIGRATION.md#multiple-uploads-to-the-same-named-artifact
---
 .github/workflows/build-and-release.yml | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/build-and-release.yml b/.github/workflows/build-and-release.yml
index dc7d4cc..1f60c59 100644
--- a/.github/workflows/build-and-release.yml
+++ b/.github/workflows/build-and-release.yml
@@ -49,9 +49,9 @@ jobs:
         run: mv dist/pferd* dist/pferd-${{ matrix.os }}
 
       - name: Upload binary
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
-          name: Binaries
+          name: pferd-${{ matrix.os }}
           path: dist/pferd-${{ matrix.os }}
 
   release:
@@ -61,9 +61,10 @@ jobs:
     steps:
 
       - name: Download binaries
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@v4
         with:
-          name: Binaries
+          pattern: pferd-*
+          merge-multiple: true
 
       - name: Rename binaries
         run: |

From c897d9e2f50d3c281bbf4a10c2dc7bb960ec202f Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 26 Jun 2024 16:39:24 +0200
Subject: [PATCH 160/224] Support finding entries for course overview page

Related to issue #93
---
 CHANGELOG.md                        |  2 ++
 PFERD/crawl/ilias/kit_ilias_html.py | 10 +++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6de08a3..b93bd33 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,8 @@ ambiguous situations.
 
 ### Added
 - Generic `ilias-web` crawler and `ilias-web` CLI command
+- Support for the course overview page. Using this URL as a target might cause
+  duplication warnings, as subgroups are listed separately.
 
 ### Fixed
 - Normalization of meeting names in cards
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 54d56a0..4c1d798 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -349,6 +349,9 @@ class IliasPage:
         might_be_info = self._soup.find("form", attrs={"name": lambda x: x == "formInfoScreen"}) is not None
         return self._page_type == IliasElementType.INFO_TAB and might_be_info
 
+    def _is_course_overview_page(self) -> bool:
+        return "baseClass=ilmembershipoverviewgui" in self._page_url
+
     def _select_content_page_url(self) -> Optional[IliasPageElement]:
         tab = self._soup.find(
             id="tab_view_content",
@@ -686,8 +689,13 @@ class IliasPage:
     def _find_normal_entries(self) -> List[IliasPageElement]:
         result: List[IliasPageElement] = []
 
+        links: List[Tag] = []
         # Fetch all links and throw them to the general interpreter
-        links: List[Tag] = self._soup.select("a.il_ContainerItemTitle")
+        if self._is_course_overview_page():
+            log.explain("Page is a course overview page, adjusting link selector")
+            links.extend(self._soup.select(".il-item-title > a"))
+        else:
+            links.extend(self._soup.select("a.il_ContainerItemTitle"))
 
         for link in links:
             abs_url = self._abs_url_from_link(link)

From 19beb8f07b68c39e04d9c24a897a3d08c919e529 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 31 Jul 2024 22:02:43 +0200
Subject: [PATCH 161/224] Document course overview downloading in config.md

---
 CONFIG.md | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index 7766d39..9a6eb4a 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -182,10 +182,14 @@ If the values work, feel free to submit a PR and add them to the table above.
 - `base_url`: The URL where the ILIAS instance is located. (Required)
 - `client_id`: An ID used for authentication. (Required)
 - `target`: The ILIAS element to crawl. (Required)
-    - `desktop`: Crawl your personal desktop
+    - `desktop`: Crawl your personal desktop / dashboard
     - `<course id>`: Crawl the course with the given id
     - `<url>`: Crawl a given element by URL (preferably the permanent URL linked
-      at the bottom of its ILIAS page)
+      at the bottom of its ILIAS page).  
+      This also supports the "My Courses" overview page to download *all*
+      courses. Note that this might produce confusing local directory layouts
+      and duplication warnings if you are a member of an ILIAS group. The
+      `desktop` target is generally preferable.
 - `auth`: Name of auth section to use for login. (Required)
 - `links`: How to represent external links. (Default: `fancy`)
     - `ignore`: Don't download links.

From 4f9e2ab48d5dc2a1ba4c2ccb8de987db900d2ed8 Mon Sep 17 00:00:00 2001
From: "Mr. Pine" <git@mr-pine.de>
Date: Mon, 21 Oct 2024 15:21:33 +0200
Subject: [PATCH 162/224] Support named capture groups in regex transformers
 (#94)

---
 CHANGELOG.md         | 1 +
 CONFIG.md            | 3 ++-
 PFERD/transformer.py | 4 ++++
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b93bd33..e56b011 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -26,6 +26,7 @@ ambiguous situations.
 - Generic `ilias-web` crawler and `ilias-web` CLI command
 - Support for the course overview page. Using this URL as a target might cause
   duplication warnings, as subgroups are listed separately.
+- Support for named capture groups in regex transforms
 
 ### Fixed
 - Normalization of meeting names in cards
diff --git a/CONFIG.md b/CONFIG.md
index 9a6eb4a..a52506d 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -394,7 +394,8 @@ matches `SOURCE`, the output path is created using `TARGET` as template.
 be referred to as `{g<n>}` (e.g. `{g3}`). `{g0}` refers to the original path.
 If capturing group *n*'s contents are a valid integer, the integer value is
 available as `{i<n>}` (e.g. `{i3}`). If capturing group *n*'s contents are a
-valid float, the float value is available as `{f<n>}` (e.g. `{f3}`). If a
+valid float, the float value is available as `{f<n>}` (e.g. `{f3}`). Named capture
+groups (e.g. `(?P<name>)`) are available by their name (e.g. `{name}`). If a
 capturing group is not present (e.g. when matching the string `cd` with the
 regex `(ab)?cd`), the corresponding variables are not defined.
 
diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index 1a56e27..a48c827 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -110,6 +110,10 @@ class ExactReTf(Transformation):
             except ValueError:
                 pass
 
+        named_groups: Dict[str, str] = match.groupdict()
+        for name, capture in named_groups.items():
+            locals_dir[name] = capture
+
         result = eval(f"f{right!r}", {}, locals_dir)
         return Transformed(PurePath(result))
 

From f9bb2e41cfa28443d22a41953a79262b2e2b83d1 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 21 Oct 2024 16:28:49 +0200
Subject: [PATCH 163/224] Sanitize slashes in exercise container names

---
 CHANGELOG.md                           |  1 +
 PFERD/crawl/ilias/ilias_web_crawler.py |  3 +++
 PFERD/crawl/ilias/kit_ilias_html.py    | 36 ++++++++++++++++++--------
 3 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e56b011..5f6e5d0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,6 +30,7 @@ ambiguous situations.
 
 ### Fixed
 - Normalization of meeting names in cards
+- Sanitization of slashes in exercise container names
 
 ## 3.5.2 - 2024-04-14
 
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 59f28b8..b77f4fc 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -328,6 +328,9 @@ instance's greatest bottleneck.
             )
         self._visited_urls[element.url] = parent_path
 
+        # element.name might contain `/` if the crawler created nested elements,
+        # so we can not sanitize it here. We trust in the output dir to thwart worst-case
+        # directory escape attacks.
         element_path = PurePath(parent_path, element.name)
 
         if element.type in _VIDEO_ELEMENTS:
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 4c1d798..a3b9459 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -71,12 +71,17 @@ class IliasPageElement:
         url: str,
         name: str,
         mtime: Optional[datetime] = None,
-        description: Optional[str] = None
+        description: Optional[str] = None,
+        skip_sanitize: bool = False
     ) -> 'IliasPageElement':
         if typ == IliasElementType.MEETING:
-            normalized = _sanitize_path_name(IliasPageElement._normalize_meeting_name(name))
+            normalized = IliasPageElement._normalize_meeting_name(name)
             log.explain(f"Normalized meeting name from {name!r} to {normalized!r}")
             name = normalized
+
+        if not skip_sanitize:
+            name = _sanitize_path_name(name)
+
         return IliasPageElement(typ, url, name, mtime, description)
 
     @staticmethod
@@ -648,15 +653,15 @@ class IliasPage:
                 # Two divs, side by side. Left is the name, right is the link ==> get left
                 # sibling
                 file_name = file_link.parent.findPrevious(name="div").getText().strip()
-                file_name = _sanitize_path_name(file_name)
                 url = self._abs_url_from_link(file_link)
 
                 log.explain(f"Found exercise entry {file_name!r}")
                 results.append(IliasPageElement.create_new(
                     IliasElementType.FILE,
                     url,
-                    container_name + "/" + file_name,
-                    None  # We do not have any timestamp
+                    _sanitize_path_name(container_name) + "/" + _sanitize_path_name(file_name),
+                    mtime=None,  # We do not have any timestamp
+                    skip_sanitize=True
                 ))
 
             # Find all links to file listings (e.g. "Submitted Files" for groups)
@@ -674,14 +679,15 @@ class IliasPage:
                 label_container: Tag = parent_container.find(
                     attrs={"class": lambda x: x and "control-label" in x}
                 )
-                file_name = _sanitize_path_name(label_container.getText().strip())
+                file_name = label_container.getText().strip()
                 url = self._abs_url_from_link(listing)
                 log.explain(f"Found exercise detail {file_name!r} at {url}")
                 results.append(IliasPageElement.create_new(
                     IliasElementType.EXERCISE_FILES,
                     url,
-                    container_name + "/" + file_name,
-                    None  # we do not have any timestamp
+                    _sanitize_path_name(container_name) + "/" + _sanitize_path_name(file_name),
+                    None,  # we do not have any timestamp
+                    skip_sanitize=True
                 ))
 
         return results
@@ -699,7 +705,8 @@ class IliasPage:
 
         for link in links:
             abs_url = self._abs_url_from_link(link)
-            parents = self._find_upwards_folder_hierarchy(link)
+            # Make sure parents are sanitized. We do not want accidental parents
+            parents = [_sanitize_path_name(x) for x in self._find_upwards_folder_hierarchy(link)]
 
             if parents:
                 element_name = "/".join(parents) + "/" + _sanitize_path_name(link.getText())
@@ -723,7 +730,12 @@ class IliasPage:
 
             log.explain(f"Found {element_name!r}")
             result.append(IliasPageElement.create_new(
-                element_type, abs_url, element_name, description=description))
+                element_type,
+                abs_url,
+                element_name,
+                description=description,
+                skip_sanitize=True
+            ))
 
         result += self._find_cards()
         result += self._find_mediacast_videos()
@@ -869,7 +881,9 @@ class IliasPage:
         full_path = name + "." + file_type
 
         log.explain(f"Found file {full_path!r}")
-        return IliasPageElement.create_new(IliasElementType.FILE, url, full_path, modification_date)
+        return IliasPageElement.create_new(
+            IliasElementType.FILE, url, full_path, modification_date, skip_sanitize=True
+        )
 
     def _find_cards(self) -> List[IliasPageElement]:
         result: List[IliasPageElement] = []

From 52fdeae7528473b7f8a8fc9e4bdb4e6029a0be8a Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 21 Oct 2024 23:41:08 +0200
Subject: [PATCH 164/224] Crawl custom item groups as folders

---
 CHANGELOG.md                        | 1 +
 PFERD/crawl/ilias/kit_ilias_html.py | 7 +++++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5f6e5d0..a755d93 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,7 @@ ambiguous situations.
 - Support for the course overview page. Using this URL as a target might cause
   duplication warnings, as subgroups are listed separately.
 - Support for named capture groups in regex transforms
+- Crawl custom item groups as folders
 
 ### Fixed
 - Normalization of meeting names in cards
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index a3b9459..34e02ba 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -817,11 +817,14 @@ class IliasPage:
             # ILIAS has proper accordions and weird blocks that look like normal headings,
             # but some JS later transforms them into an accordion.
 
-            # This is for these weird JS-y blocks
+            # This is for these weird JS-y blocks and custom item groups
             if "ilContainerItemsContainer" in parent.get("class"):
+                data_store_url = parent.parent.get("data-store-url", "").lower()
+                is_custom_item_group = "baseclass=ilcontainerblockpropertiesstoragegui" in data_store_url \
+                                       and "cont_block_id=" in data_store_url
                 # I am currently under the impression that *only* those JS blocks have an
                 # ilNoDisplay class.
-                if "ilNoDisplay" not in parent.get("class"):
+                if not is_custom_item_group and "ilNoDisplay" not in parent.get("class"):
                     continue
                 prev: Tag = parent.findPreviousSibling("div")
                 if "ilContainerBlockHeader" in prev.get("class"):

From d7f2229978c902d3c9f51e5bb9dbfe99d122e980 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 23 Oct 2024 20:17:47 +0200
Subject: [PATCH 165/224] Bump version to 3.6.0

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a755d93..573cad9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.6.0 - 2024-10-23
+
 ### Added
 - Generic `ilias-web` crawler and `ilias-web` CLI command
 - Support for the course overview page. Using this URL as a target might cause
diff --git a/PFERD/version.py b/PFERD/version.py
index 47da4a6..0bf695b 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.5.2"
+VERSION = "3.6.0"

From c54c3bcfa157631af1d55a210b60ad3bfc64f972 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 27 Oct 2024 10:50:59 +0100
Subject: [PATCH 166/224] Fix crawling of favorites

---
 CHANGELOG.md                           | 3 +++
 PFERD/crawl/ilias/ilias_web_crawler.py | 9 +++------
 PFERD/crawl/ilias/kit_ilias_html.py    | 9 +++++++--
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 573cad9..ce20269 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Fixed
+- Personal desktop/dashboard/favorites crawling
+
 ## 3.6.0 - 2024-10-23
 
 ### Added
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index b77f4fc..a566ce5 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -185,12 +185,9 @@ instance's greatest bottleneck.
         await self._crawl_url(root_url, expected_id=course_id)
 
     async def _crawl_desktop(self) -> None:
-        appendix = r"ILIAS\Repository\Provider\RepositoryMainBarProvider|mm_pd_sel_items"
-        appendix = appendix.encode("ASCII").hex()
-        await self._crawl_url(url_set_query_param(
-            urljoin(self._base_url, "/gs_content.php"),
-            "item=", appendix,
-        ))
+        await self._crawl_url(
+            urljoin(self._base_url, "/ilias.php?baseClass=ilDashboardGUI&cmd=show")
+        )
 
     async def _crawl_url(self, url: str, expected_id: Optional[int] = None) -> None:
         maybe_cl = await self.crawl(PurePath("."))
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 34e02ba..98b32c3 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -322,7 +322,7 @@ class IliasPage:
         return False
 
     def _is_personal_desktop(self) -> bool:
-        return self._soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x})
+        return "baseclass=ildashboardgui" in self._page_url.lower() and "&cmd=show" in self._page_url.lower()
 
     def _is_content_page(self) -> bool:
         if link := self.get_permalink():
@@ -427,9 +427,14 @@ class IliasPage:
     def _find_personal_desktop_entries(self) -> List[IliasPageElement]:
         items: List[IliasPageElement] = []
 
-        titles: List[Tag] = self._soup.select(".il-item-title")
+        titles: List[Tag] = self._soup.select("#block_pditems_0 .il-item-title")
         for title in titles:
             link = title.find("a")
+
+            if not link:
+                log.explain(f"Skipping offline item: {title.getText().strip()!r}")
+                continue
+
             name = _sanitize_path_name(link.text.strip())
             url = self._abs_url_from_link(link)
 

From 739dd958500349dfc54f6a8370a10b122b1e1bee Mon Sep 17 00:00:00 2001
From: Tim <me@scriptim.dev>
Date: Sun, 27 Oct 2024 19:03:47 +0100
Subject: [PATCH 167/224] Use Last-Modified and ETag headers to determine
 KIT-IPD file versions (#95)

Co-authored-by: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
---
 PFERD/crawl/crawler.py         | 11 ++++++-
 PFERD/crawl/http_crawler.py    | 52 +++++++++++++++++++++++++++++++++-
 PFERD/crawl/kit_ipd_crawler.py | 36 +++++++++++++++++++----
 PFERD/output_dir.py            | 15 ++++++++--
 4 files changed, 104 insertions(+), 10 deletions(-)

diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index 0e67c02..dd500e6 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -293,6 +293,8 @@ class Crawler(ABC):
     async def download(
             self,
             path: PurePath,
+            *,
+            etag_differs: Optional[bool] = None,
             mtime: Optional[datetime] = None,
             redownload: Optional[Redownload] = None,
             on_conflict: Optional[OnConflict] = None,
@@ -307,7 +309,14 @@ class Crawler(ABC):
             log.status("[bold bright_black]", "Ignored", fmt_path(path))
             return None
 
-        fs_token = await self._output_dir.download(path, transformed_path, mtime, redownload, on_conflict)
+        fs_token = await self._output_dir.download(
+            path,
+            transformed_path,
+            etag_differs=etag_differs,
+            mtime=mtime,
+            redownload=redownload,
+            on_conflict=on_conflict
+        )
         if fs_token is None:
             log.explain("Answer: No")
             return None
diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index 44ec4dd..39b22f3 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -1,8 +1,9 @@
 import asyncio
 import http.cookies
 import ssl
+from datetime import datetime
 from pathlib import Path, PurePath
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Tuple
 
 import aiohttp
 import certifi
@@ -15,6 +16,8 @@ from ..utils import fmt_real_path
 from ..version import NAME, VERSION
 from .crawler import Crawler, CrawlerSection
 
+ETAGS_CUSTOM_REPORT_VALUE_KEY = "etags"
+
 
 class HttpCrawlerSection(CrawlerSection):
     def http_timeout(self) -> float:
@@ -169,6 +172,53 @@ class HttpCrawler(Crawler):
             log.warn(f"Failed to save cookies to {fmt_real_path(self._cookie_jar_path)}")
             log.warn(str(e))
 
+    def _get_previous_etag_from_report(self, path: PurePath) -> Optional[str]:
+        """
+        If available, retrieves the entity tag for a given path which was stored in the previous report.
+        """
+        if not self._output_dir.prev_report:
+            return None
+
+        etags = self._output_dir.prev_report.get_custom_value(ETAGS_CUSTOM_REPORT_VALUE_KEY) or {}
+        return etags.get(str(path))
+
+    def _add_etag_to_report(self, path: PurePath, etag: Optional[str]) -> None:
+        """
+        Adds an entity tag for a given path to the report's custom values.
+        """
+        if not etag:
+            return
+
+        etags = self._output_dir.report.get_custom_value(ETAGS_CUSTOM_REPORT_VALUE_KEY) or {}
+        etags[str(path)] = etag
+        self._output_dir.report.add_custom_value(ETAGS_CUSTOM_REPORT_VALUE_KEY, etags)
+
+    async def _request_resource_version(self, resource_url: str) -> Tuple[Optional[str], Optional[datetime]]:
+        """
+        Requests the ETag and Last-Modified headers of a resource via a HEAD request.
+        If no entity tag / modification date can be obtained, the according value will be None.
+        """
+        try:
+            async with self.session.head(resource_url) as resp:
+                if resp.status != 200:
+                    return None, None
+
+                etag_header = resp.headers.get("ETag")
+                last_modified_header = resp.headers.get("Last-Modified")
+
+                if last_modified_header:
+                    try:
+                        # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Last-Modified#directives
+                        datetime_format = "%a, %d %b %Y %H:%M:%S GMT"
+                        last_modified = datetime.strptime(last_modified_header, datetime_format)
+                    except ValueError:
+                        # last_modified remains None
+                        pass
+
+                return etag_header, last_modified
+        except aiohttp.ClientError:
+            return None, None
+
     async def run(self) -> None:
         self._request_count = 0
         self._cookie_jar = aiohttp.CookieJar()
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index c852be0..d9515e2 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -1,6 +1,7 @@
 import os
 import re
 from dataclasses import dataclass
+from datetime import datetime
 from pathlib import PurePath
 from typing import Awaitable, List, Optional, Pattern, Set, Tuple, Union
 from urllib.parse import urljoin
@@ -75,8 +76,11 @@ class KitIpdCrawler(HttpCrawler):
                 if isinstance(item, KitIpdFolder):
                     tasks.append(self._crawl_folder(item))
                 else:
+                    # do this here to at least be sequential and not parallel (rate limiting is hard, as the
+                    # crawl abstraction does not hold for these requests)
+                    etag, mtime = await self._request_resource_version(item.url)
                     # Orphan files are placed in the root folder
-                    tasks.append(self._download_file(PurePath("."), item))
+                    tasks.append(self._download_file(PurePath("."), item, etag, mtime))
 
         await self.gather(tasks)
 
@@ -85,18 +89,36 @@ class KitIpdCrawler(HttpCrawler):
         if not await self.crawl(path):
             return
 
-        tasks = [self._download_file(path, file) for file in folder.files]
+        tasks = []
+        for file in folder.files:
+            # do this here to at least be sequential and not parallel (rate limiting is hard, as the crawl
+            # abstraction does not hold for these requests)
+            etag, mtime = await self._request_resource_version(file.url)
+            tasks.append(self._download_file(path, file, etag, mtime))
 
         await self.gather(tasks)
 
-    async def _download_file(self, parent: PurePath, file: KitIpdFile) -> None:
+    async def _download_file(
+        self,
+        parent: PurePath,
+        file: KitIpdFile,
+        etag: Optional[str],
+        mtime: Optional[datetime]
+    ) -> None:
         element_path = parent / file.name
-        maybe_dl = await self.download(element_path)
+
+        prev_etag = self._get_previous_etag_from_report(element_path)
+        etag_differs = None if prev_etag is None else prev_etag != etag
+
+        maybe_dl = await self.download(element_path, etag_differs=etag_differs, mtime=mtime)
         if not maybe_dl:
+            # keep storing the known file's etag
+            if prev_etag:
+                self._add_etag_to_report(element_path, prev_etag)
             return
 
         async with maybe_dl as (bar, sink):
-            await self._stream_from_url(file.url, sink, bar)
+            await self._stream_from_url(file.url, element_path, sink, bar)
 
     async def _fetch_items(self) -> Set[Union[KitIpdFile, KitIpdFolder]]:
         page, url = await self.get_page()
@@ -146,7 +168,7 @@ class KitIpdCrawler(HttpCrawler):
     def _abs_url_from_link(self, url: str, link_tag: Tag) -> str:
         return urljoin(url, link_tag.get("href"))
 
-    async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar) -> None:
+    async def _stream_from_url(self, url: str, path: PurePath, sink: FileSink, bar: ProgressBar) -> None:
         async with self.session.get(url, allow_redirects=False) as resp:
             if resp.status == 403:
                 raise CrawlError("Received a 403. Are you within the KIT network/VPN?")
@@ -159,6 +181,8 @@ class KitIpdCrawler(HttpCrawler):
 
             sink.done()
 
+            self._add_etag_to_report(path, resp.headers.get("ETag"))
+
     async def get_page(self) -> Tuple[BeautifulSoup, str]:
         async with self.session.get(self._url) as request:
             # The web page for Algorithmen für Routenplanung contains some
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index e9e9b93..09cf133 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -57,6 +57,7 @@ class OnConflict(Enum):
 
 @dataclass
 class Heuristics:
+    etag_differs: Optional[bool]
     mtime: Optional[datetime]
 
 
@@ -233,8 +234,16 @@ class OutputDirectory:
 
         remote_newer = None
 
+        # ETag should be a more reliable indicator than mtime, so we check it first
+        if heuristics.etag_differs is not None:
+            remote_newer = heuristics.etag_differs
+            if remote_newer:
+                log.explain("Remote file's entity tag differs")
+            else:
+                log.explain("Remote file's entity tag is the same")
+
         # Python on Windows crashes when faced with timestamps around the unix epoch
-        if heuristics.mtime and (os.name != "nt" or heuristics.mtime.year > 1970):
+        if remote_newer is None and heuristics.mtime and (os.name != "nt" or heuristics.mtime.year > 1970):
             mtime = heuristics.mtime
             remote_newer = mtime.timestamp() > stat.st_mtime
             if remote_newer:
@@ -366,6 +375,8 @@ class OutputDirectory:
             self,
             remote_path: PurePath,
             path: PurePath,
+            *,
+            etag_differs: Optional[bool] = None,
             mtime: Optional[datetime] = None,
             redownload: Optional[Redownload] = None,
             on_conflict: Optional[OnConflict] = None,
@@ -375,7 +386,7 @@ class OutputDirectory:
         MarkConflictError.
         """
 
-        heuristics = Heuristics(mtime)
+        heuristics = Heuristics(etag_differs, mtime)
         redownload = self._redownload if redownload is None else redownload
         on_conflict = self._on_conflict if on_conflict is None else on_conflict
         local_path = self.resolve(path)

From 8fbd1978affb059f79bab374030afa139b341a6c Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 28 Oct 2024 18:52:09 +0100
Subject: [PATCH 168/224] Fix crawling of nested courses

---
 CHANGELOG.md                           |  1 +
 PFERD/crawl/ilias/ilias_web_crawler.py |  7 ++++---
 PFERD/crawl/ilias/kit_ilias_html.py    | 19 +++++++++++--------
 3 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ce20269..3ee3f43 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ ambiguous situations.
 
 ### Fixed
 - Personal desktop/dashboard/favorites crawling
+- Crawling of nested courses
 
 ## 3.6.0 - 2024-10-23
 
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index a566ce5..1ff4910 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -81,23 +81,24 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
 
 
 _DIRECTORY_PAGES: Set[IliasElementType] = {
+    IliasElementType.COURSE,
     IliasElementType.EXERCISE,
     IliasElementType.EXERCISE_FILES,
     IliasElementType.FOLDER,
     IliasElementType.INFO_TAB,
-    IliasElementType.MEETING,
     IliasElementType.MEDIACAST_VIDEO_FOLDER,
+    IliasElementType.MEETING,
     IliasElementType.OPENCAST_VIDEO_FOLDER,
     IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
 }
 
 _VIDEO_ELEMENTS: Set[IliasElementType] = {
-    IliasElementType.MEDIACAST_VIDEO_FOLDER,
     IliasElementType.MEDIACAST_VIDEO,
+    IliasElementType.MEDIACAST_VIDEO_FOLDER,
     IliasElementType.OPENCAST_VIDEO,
-    IliasElementType.OPENCAST_VIDEO_PLAYER,
     IliasElementType.OPENCAST_VIDEO_FOLDER,
     IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
+    IliasElementType.OPENCAST_VIDEO_PLAYER,
 }
 
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 98b32c3..31107cf 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -15,25 +15,26 @@ TargetType = Union[str, int]
 
 
 class IliasElementType(Enum):
+    BOOKING = "booking"
+    COURSE = "course"
     EXERCISE = "exercise"
     EXERCISE_FILES = "exercise_files"  # own submitted files
-    TEST = "test"  # an online test. Will be ignored currently.
     FILE = "file"
     FOLDER = "folder"
     FORUM = "forum"
-    LINK = "link"
     INFO_TAB = "info_tab"
     LEARNING_MODULE = "learning_module"
-    BOOKING = "booking"
-    MEETING = "meeting"
-    SURVEY = "survey"
-    SCORM_LEARNING_MODULE = "scorm_learning_module"
-    MEDIACAST_VIDEO_FOLDER = "mediacast_video_folder"
+    LINK = "link"
     MEDIACAST_VIDEO = "mediacast_video"
+    MEDIACAST_VIDEO_FOLDER = "mediacast_video_folder"
+    MEETING = "meeting"
     OPENCAST_VIDEO = "opencast_video"
-    OPENCAST_VIDEO_PLAYER = "opencast_video_player"
     OPENCAST_VIDEO_FOLDER = "opencast_video_folder"
     OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED = "opencast_video_folder_maybe_paginated"
+    OPENCAST_VIDEO_PLAYER = "opencast_video_player"
+    SCORM_LEARNING_MODULE = "scorm_learning_module"
+    SURVEY = "survey"
+    TEST = "test"  # an online test. Will be ignored currently.
 
 
 @dataclass
@@ -968,6 +969,8 @@ class IliasPage:
             return IliasElementType.LINK
         if "book" in icon["class"]:
             return IliasElementType.BOOKING
+        if "crsr" in icon["class"]:
+            return IliasElementType.COURSE
         if "frm" in icon["class"]:
             return IliasElementType.FORUM
         if "sess" in icon["class"]:

From c1046498e7ff6ab054c65db4a133af6e53e93f03 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 28 Oct 2024 19:15:40 +0100
Subject: [PATCH 169/224] Fix download of links without a target URL

They are now downloaded as links to the empty url.
---
 CHANGELOG.md                           |  1 +
 PFERD/crawl/ilias/ilias_web_crawler.py | 25 +++++++++++++++++--------
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3ee3f43..8bc6f06 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ ambiguous situations.
 ### Fixed
 - Personal desktop/dashboard/favorites crawling
 - Crawling of nested courses
+- Downloading of links with no target URL
 
 ## 3.6.0 - 2024-10-23
 
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 1ff4910..8fbd90f 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -491,17 +491,26 @@ instance's greatest bottleneck.
             self._write_link_content(link_template, element.url, element.name, element.description, sink)
 
     async def _resolve_link_target(self, export_url: str) -> str:
-        async with self.session.get(export_url, allow_redirects=False) as resp:
-            # No redirect means we were authenticated
-            if hdrs.LOCATION not in resp.headers:
-                return soupify(await resp.read()).select_one("a").get("href").strip()
+        async def impl() -> Optional[str]:
+            async with self.session.get(export_url, allow_redirects=False) as resp:
+                # No redirect means we were authenticated
+                if hdrs.LOCATION not in resp.headers:
+                    return soupify(await resp.read()).select_one("a").get("href").strip()
+                # We are either unauthenticated or the link is not active
+                new_url = resp.headers[hdrs.LOCATION].lower()
+                if "baseclass=illinkresourcehandlergui" in new_url and "cmd=infoscreen" in new_url:
+                    return ""
+                return None
+
+        target = await impl()
+        if target is not None:
+            return target
 
         await self._authenticate()
 
-        async with self.session.get(export_url, allow_redirects=False) as resp:
-            # No redirect means we were authenticated
-            if hdrs.LOCATION not in resp.headers:
-                return soupify(await resp.read()).select_one("a").get("href").strip()
+        target = await impl()
+        if target is not None:
+            return target
 
         raise CrawlError("resolve_link_target failed even after authenticating")
 

From 71c65e89d178cde2e2a625d078eba713139a3601 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 28 Oct 2024 19:31:50 +0100
Subject: [PATCH 170/224] Internalize images in course descriptions

---
 PFERD/crawl/ilias/ilias_web_crawler.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 8fbd90f..08add07 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -476,6 +476,7 @@ instance's greatest bottleneck.
 
         async with dl as (bar, sink):
             description = clean(insert_base_markup(description))
+            description = await self.internalize_images(description)
             sink.file.write(description.prettify().encode("utf-8"))
             sink.done()
 

From d7a2b6e019a994a9e18e00cffe14da2db763e025 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 28 Oct 2024 19:32:16 +0100
Subject: [PATCH 171/224] Delete videos from course descriptions

---
 CHANGELOG.md                            | 3 +++
 PFERD/crawl/ilias/ilias_html_cleaner.py | 5 +++++
 2 files changed, 8 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8bc6f06..f635719 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Changed
+- Remove videos from description pages
+
 ### Fixed
 - Personal desktop/dashboard/favorites crawling
 - Crawling of nested courses
diff --git a/PFERD/crawl/ilias/ilias_html_cleaner.py b/PFERD/crawl/ilias/ilias_html_cleaner.py
index 5495304..0075784 100644
--- a/PFERD/crawl/ilias/ilias_html_cleaner.py
+++ b/PFERD/crawl/ilias/ilias_html_cleaner.py
@@ -85,6 +85,11 @@ def clean(soup: BeautifulSoup) -> BeautifulSoup:
         if isinstance(type(children[0]), Comment):
             dummy.decompose()
 
+    # Delete video figures, as they can not be internalized anyway
+    for video in soup.select(".ilc_media_cont_MediaContainerHighlighted .ilPageVideo"):
+        if figure := video.find_parent("figure"):
+            figure.decompose()
+
     for hrule_imposter in soup.find_all(class_="ilc_section_Separator"):
         hrule_imposter.insert(0, soup.new_tag("hr"))
 

From 81d6ff53c43f0ed7cc49f66c5505f36c0bf0f1b3 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 28 Oct 2024 19:34:45 +0100
Subject: [PATCH 172/224] Respect row flex in descriptions

---
 CHANGELOG.md                            | 1 +
 PFERD/crawl/ilias/ilias_html_cleaner.py | 7 +++++++
 2 files changed, 8 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f635719..e14f785 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ ambiguous situations.
 - Personal desktop/dashboard/favorites crawling
 - Crawling of nested courses
 - Downloading of links with no target URL
+- Handle row flex on description pages
 
 ## 3.6.0 - 2024-10-23
 
diff --git a/PFERD/crawl/ilias/ilias_html_cleaner.py b/PFERD/crawl/ilias/ilias_html_cleaner.py
index 0075784..e82906f 100644
--- a/PFERD/crawl/ilias/ilias_html_cleaner.py
+++ b/PFERD/crawl/ilias/ilias_html_cleaner.py
@@ -12,6 +12,13 @@ _STYLE_TAG_CONTENT = """
       font-weight: bold;
     }
 
+    .row-flex {
+      display: flex;
+    }
+    .row-flex-wrap {
+      flex-wrap: wrap;
+    }
+
     .accordion-head {
       background-color: #f5f7fa;
       padding: 0.5rem 0;

From fa71a9f44fe11a367a396b0cd80b745fe7ef6fe8 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 28 Oct 2024 20:15:55 +0100
Subject: [PATCH 173/224] Add support for mob videos in page descriptions

---
 CHANGELOG.md                           |  3 +++
 PFERD/crawl/ilias/ilias_web_crawler.py | 16 ++++++++++---
 PFERD/crawl/ilias/kit_ilias_html.py    | 33 ++++++++++++++++++++++++++
 3 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e14f785..d9431bc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Added
+- Support for MOB videos in page descriptions
+
 ### Changed
 - Remove videos from description pages
 
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 08add07..73fed9c 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -389,6 +389,8 @@ instance's greatest bottleneck.
             return await self._handle_opencast_video(element, element_path)
         elif element.type == IliasElementType.MEDIACAST_VIDEO:
             return await self._handle_file(element, element_path)
+        elif element.type == IliasElementType.MOB_VIDEO:
+            return await self._handle_file(element, element_path, is_video=True)
         elif element.type in _DIRECTORY_PAGES:
             return await self._handle_ilias_page(element.url, element, element_path)
         else:
@@ -631,18 +633,19 @@ instance's greatest bottleneck.
         self,
         element: IliasPageElement,
         element_path: PurePath,
+        is_video: bool = False,
     ) -> Optional[Coroutine[Any, Any, None]]:
         maybe_dl = await self.download(element_path, mtime=element.mtime)
         if not maybe_dl:
             return None
-        return self._download_file(element, maybe_dl)
+        return self._download_file(element, maybe_dl, is_video)
 
     @_iorepeat(3, "downloading file")
     @anoncritical
-    async def _download_file(self, element: IliasPageElement, dl: DownloadToken) -> None:
+    async def _download_file(self, element: IliasPageElement, dl: DownloadToken, is_video: bool) -> None:
         assert dl  # The function is only reached when dl is not None
         async with dl as (bar, sink):
-            await self._stream_from_url(element.url, sink, bar, is_video=False)
+            await self._stream_from_url(element.url, sink, bar, is_video)
 
     async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar, is_video: bool) -> None:
         async def try_stream() -> bool:
@@ -671,6 +674,13 @@ instance's greatest bottleneck.
                 if is_video and "html" in resp.content_type:
                     return False
 
+                # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Range
+                if content_range := resp.headers.get(hdrs.CONTENT_RANGE, default=None):
+                    parts = content_range.split("/")
+                    if len(parts) == 2 and parts[1].isdigit():
+                        bar.set_total(int(parts[1]))
+
+                # Prefer the content length header
                 if resp.content_length:
                     bar.set_total(resp.content_length)
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 31107cf..e0c87ad 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -28,6 +28,7 @@ class IliasElementType(Enum):
     MEDIACAST_VIDEO = "mediacast_video"
     MEDIACAST_VIDEO_FOLDER = "mediacast_video_folder"
     MEETING = "meeting"
+    MOB_VIDEO = "mob_video"
     OPENCAST_VIDEO = "opencast_video"
     OPENCAST_VIDEO_FOLDER = "opencast_video_folder"
     OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED = "opencast_video_folder_maybe_paginated"
@@ -745,6 +746,7 @@ class IliasPage:
 
         result += self._find_cards()
         result += self._find_mediacast_videos()
+        result += self._find_mob_videos()
 
         return result
 
@@ -773,6 +775,37 @@ class IliasPage:
 
         return videos
 
+    def _find_mob_videos(self) -> List[IliasPageElement]:
+        videos: List[IliasPageElement] = []
+
+        for figure in self._soup.select("figure.ilc_media_cont_MediaContainerHighlighted"):
+            title = figure.select_one("figcaption").getText().strip() + ".mp4"
+            video_element = figure.select_one("video")
+            if not video_element:
+                _unexpected_html_warning()
+                log.warn_contd(f"No <video> element found for mob video '{title}'")
+                continue
+
+            url = None
+            for source in video_element.select("source"):
+                if source.get("type", "") == "video/mp4":
+                    url = source.get("src")
+                    break
+
+            if url is None:
+                _unexpected_html_warning()
+                log.warn_contd(f"No <source> element found for mob video '{title}'")
+                continue
+
+            videos.append(IliasPageElement.create_new(
+                typ=IliasElementType.MOB_VIDEO,
+                url=self._abs_url_from_relative(url),
+                name=_sanitize_path_name(title),
+                mtime=None
+            ))
+
+        return videos
+
     def _find_mediacast_video_mtime(self, enclosing_td: Tag) -> Optional[datetime]:
         description_td: Tag = enclosing_td.findPreviousSibling("td")
         if not description_td:

From f5273f7ca0c7a899bac2251aaa8087196db77c5e Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 2 Nov 2024 21:53:47 +0100
Subject: [PATCH 174/224] Collapse ilias url crawling into normal page crawling

---
 PFERD/crawl/ilias/ilias_web_crawler.py | 74 ++++++--------------------
 1 file changed, 16 insertions(+), 58 deletions(-)

diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 73fed9c..14dde89 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -191,79 +191,28 @@ instance's greatest bottleneck.
         )
 
     async def _crawl_url(self, url: str, expected_id: Optional[int] = None) -> None:
-        maybe_cl = await self.crawl(PurePath("."))
-        if not maybe_cl:
-            return
-        cl = maybe_cl  # Not mypy's fault, but explained here: https://github.com/python/mypy/issues/2608
-
-        elements: List[IliasPageElement] = []
-        # A list as variable redefinitions are not propagated to outer scopes
-        description: List[BeautifulSoup] = []
-
-        @_iorepeat(3, "crawling url")
-        async def gather_elements() -> None:
-            elements.clear()
-            async with cl:
-                next_stage_url: Optional[str] = url
-                current_parent = None
-
-                # Duplicated code, but the root page is special - we want to avoid fetching it twice!
-                while next_stage_url:
-                    soup = await self._get_page(next_stage_url, root_page_allowed=True)
-
-                    if current_parent is None and expected_id is not None:
-                        perma_link = IliasPage.get_soup_permalink(soup)
-                        if not perma_link or "crs_" not in perma_link:
-                            raise CrawlError("Invalid course id? Didn't find anything looking like a course")
-
-                    log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
-                    log.explain(f"URL: {next_stage_url}")
-                    page = IliasPage(soup, next_stage_url, current_parent)
-                    if next_element := page.get_next_stage_element():
-                        current_parent = next_element
-                        next_stage_url = next_element.url
-                    else:
-                        next_stage_url = None
-
-                elements.extend(page.get_child_elements())
-                if info_tab := page.get_info_tab():
-                    elements.append(info_tab)
-                if description_string := page.get_description():
-                    description.append(description_string)
-
-        # Fill up our task list with the found elements
-        await gather_elements()
-
-        if description:
-            await self._download_description(PurePath("."), description[0])
-
-        elements.sort(key=lambda e: e.id())
-
-        tasks: List[Awaitable[None]] = []
-        for element in elements:
-            if handle := await self._handle_ilias_element(PurePath("."), element):
-                tasks.append(asyncio.create_task(handle))
-
-        # And execute them
-        await self.gather(tasks)
+        if awaitable := await self._handle_ilias_page(url, None, PurePath("."), expected_id):
+            await awaitable
 
     async def _handle_ilias_page(
         self,
         url: str,
-        parent: IliasPageElement,
+        parent: Optional[IliasPageElement],
         path: PurePath,
+        expected_course_id: Optional[int] = None,
     ) -> Optional[Coroutine[Any, Any, None]]:
         maybe_cl = await self.crawl(path)
         if not maybe_cl:
             return None
-        return self._crawl_ilias_page(url, parent, maybe_cl)
+        return self._crawl_ilias_page(url, parent, maybe_cl, expected_course_id)
 
     @anoncritical
     async def _crawl_ilias_page(
         self,
         url: str,
-        parent: IliasPageElement,
+        parent: Optional[IliasPageElement],
         cl: CrawlToken,
+        expected_course_id: Optional[int] = None,
     ) -> None:
         elements: List[IliasPageElement] = []
         # A list as variable redefinitions are not propagated to outer scopes
@@ -280,6 +229,15 @@ instance's greatest bottleneck.
                     soup = await self._get_page(next_stage_url)
                     log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
                     log.explain(f"URL: {next_stage_url}")
+
+                    # If we expect to find a root course, enforce it
+                    if current_parent is None and expected_course_id is not None:
+                        perma_link = IliasPage.get_soup_permalink(soup)
+                        if not perma_link or "crs_" not in perma_link:
+                            raise CrawlError("Invalid course id? Didn't find anything looking like a course")
+                        if str(expected_course_id) not in perma_link:
+                            raise CrawlError(f"Expected course id {expected_course_id} but got {perma_link}")
+
                     page = IliasPage(soup, next_stage_url, current_parent)
                     if next_element := page.get_next_stage_element():
                         current_parent = next_element

From f5c4e828160cf408fcaffd1300ed5920976a8580 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 2 Nov 2024 22:17:26 +0100
Subject: [PATCH 175/224] Delay ilias loop detection after transform

This allows users to filter out duplicated elements and suppress the
warning.
---
 CHANGELOG.md                           |  2 ++
 PFERD/crawl/ilias/ilias_web_crawler.py | 36 +++++++++++++++++---------
 2 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d9431bc..3926f7a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,8 @@ ambiguous situations.
 
 ### Changed
 - Remove videos from description pages
+- Perform ILIAS cycle detection after processing the transform to allow
+  ignoring duplicated elements
 
 ### Fixed
 - Personal desktop/dashboard/favorites crawling
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 14dde89..941b265 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -197,20 +197,23 @@ instance's greatest bottleneck.
     async def _handle_ilias_page(
         self,
         url: str,
-        parent: Optional[IliasPageElement],
+        current_element: Optional[IliasPageElement],
         path: PurePath,
         expected_course_id: Optional[int] = None,
     ) -> Optional[Coroutine[Any, Any, None]]:
         maybe_cl = await self.crawl(path)
         if not maybe_cl:
             return None
-        return self._crawl_ilias_page(url, parent, maybe_cl, expected_course_id)
+        if current_element:
+            self._ensure_not_seen(current_element, path)
+
+        return self._crawl_ilias_page(url, current_element, maybe_cl, expected_course_id)
 
     @anoncritical
     async def _crawl_ilias_page(
         self,
         url: str,
-        parent: Optional[IliasPageElement],
+        current_element: Optional[IliasPageElement],
         cl: CrawlToken,
         expected_course_id: Optional[int] = None,
     ) -> None:
@@ -223,7 +226,7 @@ instance's greatest bottleneck.
             elements.clear()
             async with cl:
                 next_stage_url: Optional[str] = url
-                current_parent = parent
+                current_parent = current_element
 
                 while next_stage_url:
                     soup = await self._get_page(next_stage_url)
@@ -276,14 +279,6 @@ instance's greatest bottleneck.
         parent_path: PurePath,
         element: IliasPageElement,
     ) -> Optional[Coroutine[Any, Any, None]]:
-        if element.url in self._visited_urls:
-            raise CrawlWarning(
-                f"Found second path to element {element.name!r} at {element.url!r}. "
-                + f"First path: {fmt_path(self._visited_urls[element.url])}. "
-                + f"Second path: {fmt_path(parent_path)}."
-            )
-        self._visited_urls[element.url] = parent_path
-
         # element.name might contain `/` if the crawler created nested elements,
         # so we can not sanitize it here. We trust in the output dir to thwart worst-case
         # directory escape attacks.
@@ -424,6 +419,8 @@ instance's greatest bottleneck.
         if not maybe_dl:
             return None
 
+        self._ensure_not_seen(element, element_path)
+
         return self._download_booking(element, link_template_maybe, maybe_dl)
 
     @anoncritical
@@ -498,6 +495,8 @@ instance's greatest bottleneck.
         if not maybe_dl:
             return None
 
+        self._ensure_not_seen(element, element_path)
+
         # If we have every file from the cached mapping already, we can ignore this and bail
         if self._all_opencast_videos_locally_present(element, maybe_dl.path):
             # Mark all existing videos as known to ensure they do not get deleted during cleanup.
@@ -596,6 +595,8 @@ instance's greatest bottleneck.
         maybe_dl = await self.download(element_path, mtime=element.mtime)
         if not maybe_dl:
             return None
+        self._ensure_not_seen(element, element_path)
+
         return self._download_file(element, maybe_dl, is_video)
 
     @_iorepeat(3, "downloading file")
@@ -731,6 +732,8 @@ instance's greatest bottleneck.
         maybe_cl = await self.crawl(element_path)
         if not maybe_cl:
             return None
+        self._ensure_not_seen(element, element_path)
+
         return self._crawl_learning_module(element, maybe_cl)
 
     @_iorepeat(3, "crawling learning module")
@@ -853,6 +856,15 @@ instance's greatest bottleneck.
                 elem.attrs["src"] = "https:" + elem.attrs["src"]
         return tag
 
+    def _ensure_not_seen(self, element: IliasPageElement, parent_path: PurePath) -> None:
+        if element.url in self._visited_urls:
+            raise CrawlWarning(
+                f"Found second path to element {element.name!r} at {element.url!r}. "
+                + f"First path: {fmt_path(self._visited_urls[element.url])}. "
+                + f"Second path: {fmt_path(parent_path)}."
+            )
+        self._visited_urls[element.url] = parent_path
+
     async def _get_page(self, url: str, root_page_allowed: bool = False) -> BeautifulSoup:
         auth_id = await self._current_auth_id()
         async with self.session.get(url) as request:

From 26e802d88b367e5bcc9f72ab6d40b4f107dd9ca4 Mon Sep 17 00:00:00 2001
From: Tim <me@scriptim.dev>
Date: Mon, 4 Nov 2024 00:32:32 +0100
Subject: [PATCH 176/224] Add clickable links to file names in the printed
 report (#100)

Co-authored-by: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
---
 CHANGELOG.md           |  1 +
 PFERD/crawl/crawler.py |  4 ++++
 PFERD/pferd.py         | 14 ++++++++++----
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3926f7a..c6c9cb9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ ambiguous situations.
 
 ### Added
 - Support for MOB videos in page descriptions
+- Clickable links in the report to directly open new/modified/not-deleted files
 
 ### Changed
 - Remove videos from description pages
diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index dd500e6..fda1307 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -258,6 +258,10 @@ class Crawler(ABC):
     def prev_report(self) -> Optional[Report]:
         return self._output_dir.prev_report
 
+    @property
+    def output_dir(self) -> OutputDirectory:
+        return self._output_dir
+
     @staticmethod
     async def gather(awaitables: Sequence[Awaitable[Any]]) -> List[Any]:
         """
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index b30a04a..850e68e 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -1,5 +1,6 @@
-from pathlib import Path
+from pathlib import Path, PurePath
 from typing import Dict, List, Optional
+from urllib.parse import quote
 
 from rich.markup import escape
 
@@ -168,19 +169,24 @@ class Pferd:
             log.report("")
             log.report(f"[bold bright_cyan]Report[/] for {escape(name)}")
 
+            def fmt_path_link(relative_path: PurePath) -> str:
+                # We need to URL-encode the path because it might contain spaces or special characters
+                link = f"file://{quote(str(crawler.output_dir.resolve(relative_path).absolute()))}"
+                return f"[link={link}]{fmt_path(relative_path)}[/link]"
+
             something_changed = False
             for path in sorted(crawler.report.added_files):
                 something_changed = True
-                log.report(f"  [bold bright_green]Added[/] {fmt_path(path)}")
+                log.report(f"  [bold bright_green]Added[/] {fmt_path_link(path)}")
             for path in sorted(crawler.report.changed_files):
                 something_changed = True
-                log.report(f"  [bold bright_yellow]Changed[/] {fmt_path(path)}")
+                log.report(f"  [bold bright_yellow]Changed[/] {fmt_path_link(path)}")
             for path in sorted(crawler.report.deleted_files):
                 something_changed = True
                 log.report(f"  [bold bright_magenta]Deleted[/] {fmt_path(path)}")
             for path in sorted(crawler.report.not_deleted_files):
                 something_changed = True
-                log.report_not_deleted(f"  [bold bright_magenta]Not deleted[/] {fmt_path(path)}")
+                log.report_not_deleted(f"  [bold bright_magenta]Not deleted[/] {fmt_path_link(path)}")
 
             for warning in crawler.report.encountered_warnings:
                 something_changed = True

From 59832002470b8691f89e1bf822cee56c8e03ee10 Mon Sep 17 00:00:00 2001
From: Tim <me@scriptim.dev>
Date: Mon, 4 Nov 2024 23:53:48 +0100
Subject: [PATCH 177/224] Treat headings as folders in kit-ipd crawler (#99)

---
 CHANGELOG.md                   |  1 +
 PFERD/crawl/http_crawler.py    | 26 +++++++++++
 PFERD/crawl/kit_ipd_crawler.py | 84 ++++++++++++++++------------------
 3 files changed, 66 insertions(+), 45 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c6c9cb9..12cda26 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,6 +30,7 @@ ambiguous situations.
 - Remove videos from description pages
 - Perform ILIAS cycle detection after processing the transform to allow
   ignoring duplicated elements
+- Parse headings (h1-h3) as folders in kit-ipd crawler
 
 ### Fixed
 - Personal desktop/dashboard/favorites crawling
diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index 39b22f3..fe8a360 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -8,6 +8,7 @@ from typing import Any, Dict, List, Optional, Tuple
 import aiohttp
 import certifi
 from aiohttp.client import ClientTimeout
+from bs4 import Tag
 
 from ..auth import Authenticator
 from ..config import Config
@@ -172,6 +173,31 @@ class HttpCrawler(Crawler):
             log.warn(f"Failed to save cookies to {fmt_real_path(self._cookie_jar_path)}")
             log.warn(str(e))
 
+    @staticmethod
+    def get_folder_structure_from_heading_hierarchy(file_link: Tag, drop_h1: bool = False) -> PurePath:
+        """
+        Retrieves the hierarchy of headings associated with the give file link and constructs a folder
+        structure from them.
+
+        <h1> level headings usually only appear once and serve as the page title, so they would introduce
+        redundant nesting. To avoid this, <h1> headings are ignored via the drop_h1 parameter.
+        """
+
+        def find_associated_headings(tag: Tag, level: int) -> PurePath:
+            if level == 0 or (level == 1 and drop_h1):
+                return PurePath()
+
+            level_heading = tag.find_previous(name=f"h{level}")
+
+            if level_heading is None:
+                return find_associated_headings(tag, level - 1)
+
+            folder_name = level_heading.getText().strip()
+            return find_associated_headings(level_heading, level - 1) / folder_name
+
+        # start at level <h3> because paragraph-level headings are usually too granular for folder names
+        return find_associated_headings(file_link, 3)
+
     def _get_previous_etag_from_report(self, path: PurePath) -> Optional[str]:
         """
         If available, retrieves the entity tag for a given path which was stored in the previous report.
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index d9515e2..e1d13a7 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -3,7 +3,7 @@ import re
 from dataclasses import dataclass
 from datetime import datetime
 from pathlib import PurePath
-from typing import Awaitable, List, Optional, Pattern, Set, Tuple, Union
+from typing import Any, Awaitable, Generator, Iterable, List, Optional, Pattern, Tuple, Union
 from urllib.parse import urljoin
 
 from bs4 import BeautifulSoup, Tag
@@ -32,24 +32,24 @@ class KitIpdCrawlerSection(HttpCrawlerSection):
         return re.compile(regex)
 
 
-@dataclass(unsafe_hash=True)
+@dataclass
 class KitIpdFile:
     name: str
     url: str
 
+    def explain(self) -> None:
+        log.explain(f"File {self.name!r} (href={self.url!r})")
+
 
 @dataclass
 class KitIpdFolder:
     name: str
-    files: List[KitIpdFile]
+    entries: List[Union[KitIpdFile, "KitIpdFolder"]]
 
     def explain(self) -> None:
         log.explain_topic(f"Folder {self.name!r}")
-        for file in self.files:
-            log.explain(f"File {file.name!r} (href={file.url!r})")
-
-    def __hash__(self) -> int:
-        return self.name.__hash__()
+        for entry in self.entries:
+            entry.explain()
 
 
 class KitIpdCrawler(HttpCrawler):
@@ -73,28 +73,33 @@ class KitIpdCrawler(HttpCrawler):
 
         async with maybe_cl:
             for item in await self._fetch_items():
+                item.explain()
                 if isinstance(item, KitIpdFolder):
-                    tasks.append(self._crawl_folder(item))
+                    tasks.append(self._crawl_folder(PurePath("."), item))
                 else:
+                    log.explain_topic(f"Orphan file {item.name!r} (href={item.url!r})")
+                    log.explain("Attributing it to root folder")
                     # do this here to at least be sequential and not parallel (rate limiting is hard, as the
                     # crawl abstraction does not hold for these requests)
                     etag, mtime = await self._request_resource_version(item.url)
-                    # Orphan files are placed in the root folder
                     tasks.append(self._download_file(PurePath("."), item, etag, mtime))
 
         await self.gather(tasks)
 
-    async def _crawl_folder(self, folder: KitIpdFolder) -> None:
-        path = PurePath(folder.name)
+    async def _crawl_folder(self, parent: PurePath, folder: KitIpdFolder) -> None:
+        path = parent / folder.name
         if not await self.crawl(path):
             return
 
         tasks = []
-        for file in folder.files:
-            # do this here to at least be sequential and not parallel (rate limiting is hard, as the crawl
-            # abstraction does not hold for these requests)
-            etag, mtime = await self._request_resource_version(file.url)
-            tasks.append(self._download_file(path, file, etag, mtime))
+        for entry in folder.entries:
+            if isinstance(entry, KitIpdFolder):
+                tasks.append(self._crawl_folder(path, entry))
+            else:
+                # do this here to at least be sequential and not parallel (rate limiting is hard, as the crawl
+                # abstraction does not hold for these requests)
+                etag, mtime = await self._request_resource_version(entry.url)
+                tasks.append(self._download_file(path, entry, etag, mtime))
 
         await self.gather(tasks)
 
@@ -120,42 +125,31 @@ class KitIpdCrawler(HttpCrawler):
         async with maybe_dl as (bar, sink):
             await self._stream_from_url(file.url, element_path, sink, bar)
 
-    async def _fetch_items(self) -> Set[Union[KitIpdFile, KitIpdFolder]]:
+    async def _fetch_items(self) -> Iterable[Union[KitIpdFile, KitIpdFolder]]:
         page, url = await self.get_page()
         elements: List[Tag] = self._find_file_links(page)
-        items: Set[Union[KitIpdFile, KitIpdFolder]] = set()
 
+        # do not add unnecessary nesting for a single <h1> heading
+        drop_h1: bool = len(page.find_all(name="h1")) <= 1
+
+        folder_tree: KitIpdFolder = KitIpdFolder(".", [])
         for element in elements:
-            folder_label = self._find_folder_label(element)
-            if folder_label:
-                folder = self._extract_folder(folder_label, url)
-                if folder not in items:
-                    items.add(folder)
-                    folder.explain()
-            else:
-                file = self._extract_file(element, url)
-                items.add(file)
-                log.explain_topic(f"Orphan file {file.name!r} (href={file.url!r})")
-                log.explain("Attributing it to root folder")
+            parent = HttpCrawler.get_folder_structure_from_heading_hierarchy(element, drop_h1)
+            file = self._extract_file(element, url)
 
-        return items
+            current_folder: KitIpdFolder = folder_tree
+            for folder_name in parent.parts:
+                # helps the type checker to verify that current_folder is indeed a folder
+                def subfolders() -> Generator[KitIpdFolder, Any, None]:
+                    return (entry for entry in current_folder.entries if isinstance(entry, KitIpdFolder))
 
-    def _extract_folder(self, folder_tag: Tag, url: str) -> KitIpdFolder:
-        files: List[KitIpdFile] = []
-        name = folder_tag.getText().strip()
+                if not any(entry.name == folder_name for entry in subfolders()):
+                    current_folder.entries.append(KitIpdFolder(folder_name, []))
+                current_folder = next(entry for entry in subfolders() if entry.name == folder_name)
 
-        container: Tag = folder_tag.findNextSibling(name="table")
-        for link in self._find_file_links(container):
-            files.append(self._extract_file(link, url))
+            current_folder.entries.append(file)
 
-        return KitIpdFolder(name, files)
-
-    @staticmethod
-    def _find_folder_label(file_link: Tag) -> Optional[Tag]:
-        enclosing_table: Tag = file_link.findParent(name="table")
-        if enclosing_table is None:
-            return None
-        return enclosing_table.findPreviousSibling(name=re.compile("^h[1-6]$"))
+        return folder_tree.entries
 
     def _extract_file(self, link: Tag, url: str) -> KitIpdFile:
         url = self._abs_url_from_link(url, link)

From 596b6a7688a5101ec6e44a13f602c4673eb5e8e0 Mon Sep 17 00:00:00 2001
From: PinieP <59698589+PinieP@users.noreply.github.com>
Date: Tue, 5 Nov 2024 18:30:34 +0100
Subject: [PATCH 178/224] Add support for non-KIT shibboleth login (#98)

Co-authored-by: Mr-Pine <git@mr-pine.de>
Co-authored-by: I-Al-Istannen <I-Al-Istannen@users.noreply.github.com>
---
 CHANGELOG.md                               |   1 +
 CONFIG.md                                  |  21 ++-
 LICENSE                                    |   2 +-
 PFERD/crawl/http_crawler.py                |   7 +-
 PFERD/crawl/ilias/async_helper.py          |   3 +-
 PFERD/crawl/ilias/ilias_web_crawler.py     |  98 ++++++----
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 210 +--------------------
 PFERD/crawl/ilias/shibboleth_login.py      | 128 +++++++++++++
 8 files changed, 226 insertions(+), 244 deletions(-)
 create mode 100644 PFERD/crawl/ilias/shibboleth_login.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 12cda26..8024bba 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ ambiguous situations.
 ### Added
 - Support for MOB videos in page descriptions
 - Clickable links in the report to directly open new/modified/not-deleted files
+- Support for non KIT shibboleth login
 
 ### Changed
 - Remove videos from description pages
diff --git a/CONFIG.md b/CONFIG.md
index a52506d..9b79be8 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -163,12 +163,13 @@ out of the box for the corresponding universities:
 
 [ilias-dl]: https://github.com/V3lop5/ilias-downloader/blob/main/configs "ilias-downloader configs"
 
-| University    | `base_url`                           | `client_id`   |
-|---------------|--------------------------------------|---------------|
-| FH Aachen     | https://www.ili.fh-aachen.de         | elearning     |
-| Uni Köln      | https://www.ilias.uni-koeln.de/ilias | uk            |
-| Uni Konstanz  | https://ilias.uni-konstanz.de        | ILIASKONSTANZ |
-| Uni Stuttgart | https://ilias3.uni-stuttgart.de      | Uni_Stuttgart |
+| University    | `base_url`                              | `login_type` | `client_id`   |
+|---------------|-----------------------------------------|--------------|---------------|
+| FH Aachen     | https://www.ili.fh-aachen.de            | local        | elearning     |
+| Uni Köln      | https://www.ilias.uni-koeln.de/ilias    | local        | uk            |
+| Uni Konstanz  | https://ilias.uni-konstanz.de           | local        | ILIASKONSTANZ |
+| Uni Stuttgart | https://ilias3.uni-stuttgart.de         | local        | Uni_Stuttgart |
+| Uni Tübingen  | https://ovidius.uni-tuebingen.de/ilias3 | shibboleth   |               |
 
 If your university isn't listed, try navigating to your instance's login page.
 Assuming no custom login service is used, the URL will look something like this:
@@ -180,7 +181,11 @@ Assuming no custom login service is used, the URL will look something like this:
 If the values work, feel free to submit a PR and add them to the table above.
 
 - `base_url`: The URL where the ILIAS instance is located. (Required)
-- `client_id`: An ID used for authentication. (Required)
+- `login_type`: How you authenticate. (Required)
+    - `local`: Use `client_id` for authentication.
+    - `shibboleth`: Use shibboleth for authentication.
+- `client_id`: An ID used for authentication if `login_type` is `local`. Is
+  ignored if `login_type` is `shibboleth`.
 - `target`: The ILIAS element to crawl. (Required)
     - `desktop`: Crawl your personal desktop / dashboard
     - `<course id>`: Crawl the course with the given id
@@ -191,6 +196,8 @@ If the values work, feel free to submit a PR and add them to the table above.
       and duplication warnings if you are a member of an ILIAS group. The
       `desktop` target is generally preferable.
 - `auth`: Name of auth section to use for login. (Required)
+- `tfa_auth`: Name of auth section to use for two-factor authentication. Only
+  uses the auth section's password. (Default: Anonymous `tfa` authenticator)
 - `links`: How to represent external links. (Default: `fancy`)
     - `ignore`: Don't download links.
     - `plaintext`: A text file containing only the URL.
diff --git a/LICENSE b/LICENSE
index 13fa307..ccccbe3 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 Copyright 2019-2024 Garmelon, I-Al-Istannen, danstooamerican, pavelzw,
                     TheChristophe, Scriptim, thelukasprobst, Toorero,
-                    Mr-Pine, p-fruck
+                    Mr-Pine, p-fruck, PinieP
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index fe8a360..2cc97e1 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -262,7 +262,12 @@ class HttpCrawler(Crawler):
                     connect=self._http_timeout,
                     sock_connect=self._http_timeout,
                     sock_read=self._http_timeout,
-                )
+                ),
+                # See https://github.com/aio-libs/aiohttp/issues/6626
+                # Without this aiohttp will mangle the redirect header from Shibboleth, invalidating the
+                # passed signature. Shibboleth will not accept the broken signature and authentication will
+                # fail.
+                requote_redirect_url=False
         ) as session:
             self.session = session
             try:
diff --git a/PFERD/crawl/ilias/async_helper.py b/PFERD/crawl/ilias/async_helper.py
index 527a819..5e586b1 100644
--- a/PFERD/crawl/ilias/async_helper.py
+++ b/PFERD/crawl/ilias/async_helper.py
@@ -25,9 +25,10 @@ def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Calla
                 except asyncio.exceptions.TimeoutError as e:  # explicit http timeouts in HttpCrawler
                     last_exception = e
                 log.explain_topic(f"Retrying operation {name}. Retries left: {attempts - 1 - round}")
+                log.explain(f"Last exception: {last_exception!r}")
 
             if last_exception:
-                message = f"Error in I/O Operation: {last_exception}"
+                message = f"Error in I/O Operation: {last_exception!r}"
                 if failure_is_error:
                     raise CrawlError(message) from last_exception
                 else:
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 941b265..a6c68f1 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -23,10 +23,16 @@ from .file_templates import Links, learning_module_template
 from .ilias_html_cleaner import clean, insert_base_markup
 from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
                              IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
+from .shibboleth_login import ShibbolethLogin
 
 TargetType = Union[str, int]
 
 
+class LoginTypeLocal:
+    def __init__(self, client_id: str):
+        self.client_id = client_id
+
+
 class IliasWebCrawlerSection(HttpCrawlerSection):
     def base_url(self) -> str:
         base_url = self.s.get("base_url")
@@ -35,12 +41,30 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
 
         return base_url
 
-    def client_id(self) -> str:
-        client_id = self.s.get("client_id")
-        if not client_id:
-            self.missing_value("client_id")
+    def login(self) -> Union[Literal["shibboleth"], LoginTypeLocal]:
+        login_type = self.s.get("login_type")
+        if not login_type:
+            self.missing_value("login_type")
+        if login_type == "shibboleth":
+            return "shibboleth"
+        if login_type == "local":
+            client_id = self.s.get("client_id")
+            if not client_id:
+                self.missing_value("client_id")
+            return LoginTypeLocal(client_id)
 
-        return client_id
+        self.invalid_value("login_type", login_type, "Should be <shibboleth | local>")
+
+    def tfa_auth(
+        self, authenticators: Dict[str, Authenticator]
+    ) -> Optional[Authenticator]:
+        value: Optional[str] = self.s.get("tfa_auth")
+        if value is None:
+            return None
+        auth = authenticators.get(value)
+        if auth is None:
+            self.invalid_value("tfa_auth", value, "No such auth section exists")
+        return auth
 
     def target(self) -> TargetType:
         target = self.s.get("target")
@@ -156,7 +180,13 @@ instance's greatest bottleneck.
 
         self._auth = auth
         self._base_url = section.base_url()
-        self._client_id = section.client_id()
+        self._tfa_auth = section.tfa_auth(authenticators)
+
+        self._login_type = section.login()
+        if isinstance(self._login_type, LoginTypeLocal):
+            self._client_id = self._login_type.client_id
+        else:
+            self._shibboleth_login = ShibbolethLogin(self._base_url, self._auth, self._tfa_auth)
 
         self._target = section.target()
         self._link_file_redirect_delay = section.link_redirect_delay()
@@ -179,7 +209,7 @@ instance's greatest bottleneck.
     async def _crawl_course(self, course_id: int) -> None:
         # Start crawling at the given course
         root_url = url_set_query_param(
-            urljoin(self._base_url, "/goto.php"),
+            urljoin(self._base_url + "/", "goto.php"),
             "target", f"crs_{course_id}",
         )
 
@@ -460,11 +490,12 @@ instance's greatest bottleneck.
                     return ""
                 return None
 
+        auth_id = await self._current_auth_id()
         target = await impl()
         if target is not None:
             return target
 
-        await self._authenticate()
+        await self.authenticate(auth_id)
 
         target = await impl()
         if target is not None:
@@ -935,38 +966,39 @@ instance's greatest bottleneck.
                 return await request.read()
         raise CrawlError("get_authenticated failed even after authenticating")
 
-    # ToDo: Is iorepeat still required?
-    @_iorepeat(3, "Login", failure_is_error=True)
     async def _authenticate(self) -> None:
         # fill the session with the correct cookies
-        params = {
-            "client_id": self._client_id,
-            "cmd": "force_login",
-        }
-        async with self.session.get(urljoin(self._base_url, "/login.php"), params=params) as request:
-            login_page = soupify(await request.read())
+        if self._login_type == "shibboleth":
+            await self._shibboleth_login.login(self.session)
+        else:
+            params = {
+                "client_id": self._client_id,
+                "cmd": "force_login",
+            }
+            async with self.session.get(urljoin(self._base_url, "/login.php"), params=params) as request:
+                login_page = soupify(await request.read())
 
-        login_form = login_page.find("form", attrs={"name": "formlogin"})
-        if login_form is None:
-            raise CrawlError("Could not find the login form! Specified client id might be invalid.")
+            login_form = login_page.find("form", attrs={"name": "formlogin"})
+            if login_form is None:
+                raise CrawlError("Could not find the login form! Specified client id might be invalid.")
 
-        login_url = login_form.attrs.get("action")
-        if login_url is None:
-            raise CrawlError("Could not find the action URL in the login form!")
+            login_url = login_form.attrs.get("action")
+            if login_url is None:
+                raise CrawlError("Could not find the action URL in the login form!")
 
-        username, password = await self._auth.credentials()
+            username, password = await self._auth.credentials()
 
-        login_data = {
-            "username": username,
-            "password": password,
-            "cmd[doStandardAuthentication]": "Login",
-        }
+            login_data = {
+                "username": username,
+                "password": password,
+                "cmd[doStandardAuthentication]": "Login",
+            }
 
-        # do the actual login
-        async with self.session.post(urljoin(self._base_url, login_url), data=login_data) as request:
-            soup = soupify(await request.read())
-            if not self._is_logged_in(soup):
-                self._auth.invalidate_credentials()
+            # do the actual login
+            async with self.session.post(urljoin(self._base_url, login_url), data=login_data) as request:
+                soup = soupify(await request.read())
+                if not self._is_logged_in(soup):
+                    self._auth.invalidate_credentials()
 
     @staticmethod
     def _is_logged_in(soup: BeautifulSoup) -> bool:
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 558221d..fc1d58f 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -1,23 +1,14 @@
-from typing import Any, Dict, Optional, Union
+from typing import Dict, Literal
 
-import aiohttp
-import yarl
-from bs4 import BeautifulSoup
-
-from ...auth import Authenticator, TfaAuthenticator
+from ...auth import Authenticator
 from ...config import Config
-from ...logging import log
-from ...utils import soupify
-from ..crawler import CrawlError, CrawlWarning
-from .async_helper import _iorepeat
 from .ilias_web_crawler import IliasWebCrawler, IliasWebCrawlerSection
-
-TargetType = Union[str, int]
+from .shibboleth_login import ShibbolethLogin
 
 _ILIAS_URL = "https://ilias.studium.kit.edu"
 
 
-class KitShibbolethBackgroundLoginSuccessful():
+class KitShibbolethBackgroundLoginSuccessful:
     pass
 
 
@@ -25,19 +16,8 @@ class KitIliasWebCrawlerSection(IliasWebCrawlerSection):
     def base_url(self) -> str:
         return _ILIAS_URL
 
-    def client_id(self) -> str:
-        # KIT ILIAS uses the Shibboleth service for authentication. There's no
-        # use for a client id.
-        return "unused"
-
-    def tfa_auth(self, authenticators: Dict[str, Authenticator]) -> Optional[Authenticator]:
-        value: Optional[str] = self.s.get("tfa_auth")
-        if value is None:
-            return None
-        auth = authenticators.get(value)
-        if auth is None:
-            self.invalid_value("tfa_auth", value, "No such auth section exists")
-        return auth
+    def login(self) -> Literal["shibboleth"]:
+        return "shibboleth"
 
 
 class KitIliasWebCrawler(IliasWebCrawler):
@@ -46,184 +26,12 @@ class KitIliasWebCrawler(IliasWebCrawler):
         name: str,
         section: KitIliasWebCrawlerSection,
         config: Config,
-        authenticators: Dict[str, Authenticator]
+        authenticators: Dict[str, Authenticator],
     ):
         super().__init__(name, section, config, authenticators)
 
-        self._shibboleth_login = KitShibbolethLogin(
+        self._shibboleth_login = ShibbolethLogin(
+            _ILIAS_URL,
             self._auth,
             section.tfa_auth(authenticators),
         )
-
-    # We repeat this as the login method in shibboleth doesn't handle I/O errors.
-    # Shibboleth is quite reliable as well, the repeat is likely not critical here.
-    @_iorepeat(3, "Login", failure_is_error=True)
-    async def _authenticate(self) -> None:
-        await self._shibboleth_login.login(self.session)
-
-
-class KitShibbolethLogin:
-    """
-    Login via KIT's shibboleth system.
-    """
-
-    def __init__(self, authenticator: Authenticator, tfa_authenticator: Optional[Authenticator]) -> None:
-        self._auth = authenticator
-        self._tfa_auth = tfa_authenticator
-
-    async def login(self, sess: aiohttp.ClientSession) -> None:
-        """
-        Performs the ILIAS Shibboleth authentication dance and saves the login
-        cookies it receieves.
-
-        This function should only be called whenever it is detected that you're
-        not logged in. The cookies obtained should be good for a few minutes,
-        maybe even an hour or two.
-        """
-
-        # Equivalent: Click on "Mit KIT-Account anmelden" button in
-        # https://ilias.studium.kit.edu/login.php
-        url = f"{_ILIAS_URL}/shib_login.php"
-        data = {
-            "sendLogin": "1",
-            "idp_selection": "https://idp.scc.kit.edu/idp/shibboleth",
-            "il_target": "",
-            "home_organization_selection": "Weiter",
-        }
-        soup: Union[BeautifulSoup, KitShibbolethBackgroundLoginSuccessful] = await _shib_post(sess, url, data)
-
-        if isinstance(soup, KitShibbolethBackgroundLoginSuccessful):
-            return
-
-        # Attempt to login using credentials, if necessary
-        while not self._login_successful(soup):
-            # Searching the form here so that this fails before asking for
-            # credentials rather than after asking.
-            form = soup.find("form", {"class": "full content", "method": "post"})
-            action = form["action"]
-
-            csrf_token = form.find("input", {"name": "csrf_token"})["value"]
-
-            # Equivalent: Enter credentials in
-            # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
-            url = "https://idp.scc.kit.edu" + action
-            username, password = await self._auth.credentials()
-            data = {
-                "_eventId_proceed": "",
-                "j_username": username,
-                "j_password": password,
-                "csrf_token": csrf_token
-            }
-            soup = await _post(sess, url, data)
-
-            if soup.find(id="attributeRelease"):
-                raise CrawlError(
-                    "ILIAS Shibboleth entitlements changed! "
-                    "Please log in once in your browser and review them"
-                )
-
-            if self._tfa_required(soup):
-                soup = await self._authenticate_tfa(sess, soup)
-
-            if not self._login_successful(soup):
-                self._auth.invalidate_credentials()
-
-        # Equivalent: Being redirected via JS automatically
-        # (or clicking "Continue" if you have JS disabled)
-        relay_state = soup.find("input", {"name": "RelayState"})
-        saml_response = soup.find("input", {"name": "SAMLResponse"})
-        url = f"{_ILIAS_URL}/Shibboleth.sso/SAML2/POST"
-        data = {  # using the info obtained in the while loop above
-            "RelayState": relay_state["value"],
-            "SAMLResponse": saml_response["value"],
-        }
-        await sess.post(url, data=data)
-
-    async def _authenticate_tfa(
-        self,
-        session: aiohttp.ClientSession,
-        soup: BeautifulSoup
-    ) -> BeautifulSoup:
-        if not self._tfa_auth:
-            self._tfa_auth = TfaAuthenticator("ilias-anon-tfa")
-
-        tfa_token = await self._tfa_auth.password()
-
-        # Searching the form here so that this fails before asking for
-        # credentials rather than after asking.
-        form = soup.find("form", {"method": "post"})
-        action = form["action"]
-        csrf_token = form.find("input", {"name": "csrf_token"})["value"]
-
-        # Equivalent: Enter token in
-        # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
-        url = "https://idp.scc.kit.edu" + action
-        data = {
-            "_eventId_proceed": "",
-            "j_tokenNumber": tfa_token,
-            "csrf_token": csrf_token
-        }
-        return await _post(session, url, data)
-
-    @staticmethod
-    def _login_successful(soup: BeautifulSoup) -> bool:
-        relay_state = soup.find("input", {"name": "RelayState"})
-        saml_response = soup.find("input", {"name": "SAMLResponse"})
-        return relay_state is not None and saml_response is not None
-
-    @staticmethod
-    def _tfa_required(soup: BeautifulSoup) -> bool:
-        return soup.find(id="j_tokenNumber") is not None
-
-
-async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup:
-    async with session.post(url, data=data) as response:
-        return soupify(await response.read())
-
-
-async def _shib_post(
-    session: aiohttp.ClientSession,
-    url: str,
-    data: Any
-) -> Union[BeautifulSoup, KitShibbolethBackgroundLoginSuccessful]:
-    """
-    aiohttp unescapes '/' and ':' in URL query parameters which is not RFC compliant and rejected
-    by Shibboleth. Thanks a lot. So now we unroll the requests manually, parse location headers and
-    build encoded URL objects ourselves... Who thought mangling location header was a good idea??
-    """
-    log.explain_topic("Shib login POST")
-    async with session.post(url, data=data, allow_redirects=False) as response:
-        location = response.headers.get("location")
-        log.explain(f"Got location {location!r}")
-        if not location:
-            raise CrawlWarning(f"Login failed (1), no location header present at {url}")
-        correct_url = yarl.URL(location, encoded=True)
-        log.explain(f"Corrected location to {correct_url!r}")
-
-        if str(correct_url).startswith(_ILIAS_URL):
-            log.explain("ILIAS recognized our shib token and logged us in in the background, returning")
-            return KitShibbolethBackgroundLoginSuccessful()
-
-        async with session.get(correct_url, allow_redirects=False) as response:
-            location = response.headers.get("location")
-            log.explain(f"Redirected to {location!r} with status {response.status}")
-            # If shib still has a valid session, it will directly respond to the request
-            if location is None:
-                log.explain("Shib recognized us, returning its response directly")
-                return soupify(await response.read())
-
-            as_yarl = yarl.URL(response.url)
-            # Probably not needed anymore, but might catch a few weird situations with a nicer message
-            if not location or not as_yarl.host:
-                raise CrawlWarning(f"Login failed (2), no location header present at {correct_url}")
-
-            correct_url = yarl.URL.build(
-                scheme=as_yarl.scheme,
-                host=as_yarl.host,
-                path=location,
-                encoded=True
-            )
-            log.explain(f"Corrected location to {correct_url!r}")
-
-            async with session.get(correct_url, allow_redirects=False) as response:
-                return soupify(await response.read())
diff --git a/PFERD/crawl/ilias/shibboleth_login.py b/PFERD/crawl/ilias/shibboleth_login.py
new file mode 100644
index 0000000..d57820e
--- /dev/null
+++ b/PFERD/crawl/ilias/shibboleth_login.py
@@ -0,0 +1,128 @@
+from typing import Any, Optional
+
+import aiohttp
+import yarl
+from bs4 import BeautifulSoup
+
+from ...auth import Authenticator, TfaAuthenticator
+from ...logging import log
+from ...utils import soupify
+from ..crawler import CrawlError
+
+
+class ShibbolethLogin:
+    """
+    Login via shibboleth system.
+    """
+
+    def __init__(
+        self, ilias_url: str, authenticator: Authenticator, tfa_authenticator: Optional[Authenticator]
+    ) -> None:
+        self._ilias_url = ilias_url
+        self._auth = authenticator
+        self._tfa_auth = tfa_authenticator
+
+    async def login(self, sess: aiohttp.ClientSession) -> None:
+        """
+        Performs the ILIAS Shibboleth authentication dance and saves the login
+        cookies it receieves.
+
+        This function should only be called whenever it is detected that you're
+        not logged in. The cookies obtained should be good for a few minutes,
+        maybe even an hour or two.
+        """
+
+        # Equivalent: Click on "Mit KIT-Account anmelden" button in
+        # https://ilias.studium.kit.edu/login.php
+        url = f"{self._ilias_url}/shib_login.php"
+        async with sess.get(url) as response:
+            shib_url = response.url
+            if str(shib_url).startswith(self._ilias_url):
+                log.explain(
+                    "ILIAS recognized our shib token and logged us in in the background, returning"
+                )
+                return
+            soup: BeautifulSoup = soupify(await response.read())
+
+        # Attempt to login using credentials, if necessary
+        while not self._login_successful(soup):
+            # Searching the form here so that this fails before asking for
+            # credentials rather than after asking.
+            form = soup.find("form", {"method": "post"})
+            action = form["action"]
+
+            # Equivalent: Enter credentials in
+            # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
+            url = str(shib_url.origin()) + action
+            username, password = await self._auth.credentials()
+            data = {
+                "_eventId_proceed": "",
+                "j_username": username,
+                "j_password": password,
+            }
+            if csrf_token_input := form.find("input", {"name": "csrf_token"}):
+                data["csrf_token"] = csrf_token_input["value"]
+            soup = await _post(sess, url, data)
+
+            if soup.find(id="attributeRelease"):
+                raise CrawlError(
+                    "ILIAS Shibboleth entitlements changed! "
+                    "Please log in once in your browser and review them"
+                )
+
+            if self._tfa_required(soup):
+                soup = await self._authenticate_tfa(sess, soup, shib_url)
+
+            if not self._login_successful(soup):
+                self._auth.invalidate_credentials()
+
+        # Equivalent: Being redirected via JS automatically
+        # (or clicking "Continue" if you have JS disabled)
+        relay_state = soup.find("input", {"name": "RelayState"})
+        saml_response = soup.find("input", {"name": "SAMLResponse"})
+        url = form = soup.find("form", {"method": "post"})["action"]
+        data = {  # using the info obtained in the while loop above
+            "RelayState": relay_state["value"],
+            "SAMLResponse": saml_response["value"],
+        }
+        await sess.post(url, data=data)
+
+    async def _authenticate_tfa(
+        self, session: aiohttp.ClientSession, soup: BeautifulSoup, shib_url: yarl.URL
+    ) -> BeautifulSoup:
+        if not self._tfa_auth:
+            self._tfa_auth = TfaAuthenticator("ilias-anon-tfa")
+
+        tfa_token = await self._tfa_auth.password()
+
+        # Searching the form here so that this fails before asking for
+        # credentials rather than after asking.
+        form = soup.find("form", {"method": "post"})
+        action = form["action"]
+
+        # Equivalent: Enter token in
+        # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
+        url = str(shib_url.origin()) + action
+        username, password = await self._auth.credentials()
+        data = {
+            "_eventId_proceed": "",
+            "j_tokenNumber": tfa_token,
+        }
+        if csrf_token_input := form.find("input", {"name": "csrf_token"}):
+            data["csrf_token"] = csrf_token_input["value"]
+        return await _post(session, url, data)
+
+    @staticmethod
+    def _login_successful(soup: BeautifulSoup) -> bool:
+        relay_state = soup.find("input", {"name": "RelayState"})
+        saml_response = soup.find("input", {"name": "SAMLResponse"})
+        return relay_state is not None and saml_response is not None
+
+    @staticmethod
+    def _tfa_required(soup: BeautifulSoup) -> bool:
+        return soup.find(id="j_tokenNumber") is not None
+
+
+async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup:
+    async with session.post(url, data=data) as response:
+        return soupify(await response.read())

From 6dda4c55a8bdd0afba9126f39e7402df7dc59479 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 5 Nov 2024 18:36:21 +0100
Subject: [PATCH 179/224] Add doctype header to forum threads

This should fix mimetype detection on most systems and is more relevant
now that the report is clickable
---
 CHANGELOG.md                           | 1 +
 PFERD/crawl/ilias/ilias_web_crawler.py | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8024bba..5206b20 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -38,6 +38,7 @@ ambiguous situations.
 - Crawling of nested courses
 - Downloading of links with no target URL
 - Handle row flex on description pages
+- Add `<!DOCTYPE html>` heading to forum threads to fix mime type detection
 
 ## 3.6.0 - 2024-10-23
 
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index a6c68f1..2fc399d 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -750,7 +750,8 @@ instance's greatest bottleneck.
             return
 
         async with maybe_dl as (bar, sink):
-            content = element.title_tag.prettify()
+            content = "<!DOCTYPE html>\n"
+            content += element.title_tag.prettify()
             content += element.content_tag.prettify()
             sink.file.write(content.encode("utf-8"))
             sink.done()

From 712217e95962a383ee95c58fd85c61980ef1fc14 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 11 Nov 2024 12:52:55 +0100
Subject: [PATCH 180/224] Handle groups in cards

---
 CHANGELOG.md                        | 1 +
 PFERD/crawl/ilias/kit_ilias_html.py | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5206b20..095442d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -39,6 +39,7 @@ ambiguous situations.
 - Downloading of links with no target URL
 - Handle row flex on description pages
 - Add `<!DOCTYPE html>` heading to forum threads to fix mime type detection
+- Handle groups in cards
 
 ## 3.6.0 - 2024-10-23
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index e0c87ad..57c81e5 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -998,6 +998,8 @@ class IliasPage:
             return IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED
         if "exc" in icon["class"]:
             return IliasElementType.EXERCISE
+        if "grp" in icon["class"]:
+            return IliasElementType.FOLDER
         if "webr" in icon["class"]:
             return IliasElementType.LINK
         if "book" in icon["class"]:

From 287173b0b114f708cb34db4f3fef247962fccc3d Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 13 Nov 2024 20:38:27 +0100
Subject: [PATCH 181/224] Bump version to 3.7.0

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 095442d..e18f88a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.7.0 - 2024-11-13
+
 ### Added
 - Support for MOB videos in page descriptions
 - Clickable links in the report to directly open new/modified/not-deleted files
diff --git a/PFERD/version.py b/PFERD/version.py
index 0bf695b..21118d3 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.6.0"
+VERSION = "3.7.0"

From 678283d341294d3fefe69242d8f8b87d58a2b5c0 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 14 Nov 2024 20:06:13 +0100
Subject: [PATCH 182/224] Use Python facilities to convert paths to file://
 urls

---
 CHANGELOG.md   | 3 +++
 PFERD/pferd.py | 3 +--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e18f88a..bbd2dd6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+## Fixed
+- File links in report on Windows
+
 ## 3.7.0 - 2024-11-13
 
 ### Added
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index 850e68e..ca2e5b7 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -1,6 +1,5 @@
 from pathlib import Path, PurePath
 from typing import Dict, List, Optional
-from urllib.parse import quote
 
 from rich.markup import escape
 
@@ -171,7 +170,7 @@ class Pferd:
 
             def fmt_path_link(relative_path: PurePath) -> str:
                 # We need to URL-encode the path because it might contain spaces or special characters
-                link = f"file://{quote(str(crawler.output_dir.resolve(relative_path).absolute()))}"
+                link = crawler.output_dir.resolve(relative_path).absolute().as_uri()
                 return f"[link={link}]{fmt_path(relative_path)}[/link]"
 
             something_changed = False

From 16a2dd5b15561f91134bc2a3b31a92483921e021 Mon Sep 17 00:00:00 2001
From: Aurelia <mail@libaurea.de>
Date: Wed, 12 Feb 2025 21:48:05 +0100
Subject: [PATCH 183/224] fix: totp

---
 CHANGELOG.md                          | 1 +
 PFERD/crawl/ilias/shibboleth_login.py | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bbd2dd6..2ff98bc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ ambiguous situations.
 
 ## Fixed
 - File links in report on Windows
+- TOTP authentication in KIT Shibboleth
 
 ## 3.7.0 - 2024-11-13
 
diff --git a/PFERD/crawl/ilias/shibboleth_login.py b/PFERD/crawl/ilias/shibboleth_login.py
index d57820e..ab59f25 100644
--- a/PFERD/crawl/ilias/shibboleth_login.py
+++ b/PFERD/crawl/ilias/shibboleth_login.py
@@ -59,6 +59,7 @@ class ShibbolethLogin:
                 "_eventId_proceed": "",
                 "j_username": username,
                 "j_password": password,
+                "fudis_web_authn_assertion_input": "",
             }
             if csrf_token_input := form.find("input", {"name": "csrf_token"}):
                 data["csrf_token"] = csrf_token_input["value"]
@@ -106,7 +107,7 @@ class ShibbolethLogin:
         username, password = await self._auth.credentials()
         data = {
             "_eventId_proceed": "",
-            "j_tokenNumber": tfa_token,
+            "fudis_otp_input": tfa_token,
         }
         if csrf_token_input := form.find("input", {"name": "csrf_token"}):
             data["csrf_token"] = csrf_token_input["value"]
@@ -120,7 +121,7 @@ class ShibbolethLogin:
 
     @staticmethod
     def _tfa_required(soup: BeautifulSoup) -> bool:
-        return soup.find(id="j_tokenNumber") is not None
+        return soup.find(id="fudiscr-form") is not None
 
 
 async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup:

From bd9d7efe646b63f607dc1c2b5c23c6e9b5bd0466 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 12 Feb 2025 22:41:43 +0100
Subject: [PATCH 184/224] "Fix" mypy errors

Thank you mypy, very cool. These types make things *so much better*.
They don't just complicate everything and don't really help because they
can not detect that an element queried by a tag is no navigable
string...
---
 PFERD/auth/keyring.py                   |   4 +-
 PFERD/crawl/http_crawler.py             |   7 +-
 PFERD/crawl/ilias/file_templates.py     |  10 +-
 PFERD/crawl/ilias/ilias_html_cleaner.py |  14 +-
 PFERD/crawl/ilias/ilias_web_crawler.py  |  34 +--
 PFERD/crawl/ilias/kit_ilias_html.py     | 323 ++++++++++++------------
 PFERD/crawl/ilias/shibboleth_login.py   |  28 +-
 PFERD/crawl/kit_ipd_crawler.py          |   8 +-
 8 files changed, 224 insertions(+), 204 deletions(-)

diff --git a/PFERD/auth/keyring.py b/PFERD/auth/keyring.py
index c14f6fb..02a9269 100644
--- a/PFERD/auth/keyring.py
+++ b/PFERD/auth/keyring.py
@@ -1,4 +1,4 @@
-from typing import Optional, Tuple
+from typing import Optional, Tuple, cast
 
 import keyring
 
@@ -13,7 +13,7 @@ class KeyringAuthSection(AuthSection):
         return self.s.get("username")
 
     def keyring_name(self) -> str:
-        return self.s.get("keyring_name", fallback=NAME)
+        return cast(str, self.s.get("keyring_name", fallback=NAME))
 
 
 class KeyringAuthenticator(Authenticator):
diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index 2cc97e1..1c4631c 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -3,7 +3,7 @@ import http.cookies
 import ssl
 from datetime import datetime
 from pathlib import Path, PurePath
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import aiohttp
 import certifi
@@ -187,12 +187,12 @@ class HttpCrawler(Crawler):
             if level == 0 or (level == 1 and drop_h1):
                 return PurePath()
 
-            level_heading = tag.find_previous(name=f"h{level}")
+            level_heading = cast(Optional[Tag], tag.find_previous(name=f"h{level}"))
 
             if level_heading is None:
                 return find_associated_headings(tag, level - 1)
 
-            folder_name = level_heading.getText().strip()
+            folder_name = level_heading.get_text().strip()
             return find_associated_headings(level_heading, level - 1) / folder_name
 
         # start at level <h3> because paragraph-level headings are usually too granular for folder names
@@ -231,6 +231,7 @@ class HttpCrawler(Crawler):
 
                 etag_header = resp.headers.get("ETag")
                 last_modified_header = resp.headers.get("Last-Modified")
+                last_modified = None
 
                 if last_modified_header:
                     try:
diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index b206461..0a72199 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -1,5 +1,5 @@
 from enum import Enum
-from typing import Optional
+from typing import Optional, cast
 
 import bs4
 
@@ -139,13 +139,13 @@ def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next
         </div>
     """
     if prev and body.select_one(".ilc_page_lnav_LeftNavigation"):
-        text = body.select_one(".ilc_page_lnav_LeftNavigation").getText().strip()
+        text = cast(bs4.Tag, body.select_one(".ilc_page_lnav_LeftNavigation")).get_text().strip()
         left = f'<a href="{prev}">{text}</a>'
     else:
         left = "<span></span>"
 
     if next and body.select_one(".ilc_page_rnav_RightNavigation"):
-        text = body.select_one(".ilc_page_rnav_RightNavigation").getText().strip()
+        text = cast(bs4.Tag, body.select_one(".ilc_page_rnav_RightNavigation")).get_text().strip()
         right = f'<a href="{next}">{text}</a>'
     else:
         right = "<span></span>"
@@ -160,8 +160,8 @@ def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next
             "{{left}}", left).replace("{{right}}", right).encode())
         )
 
-    body = body.prettify()
-    return _learning_module_template.replace("{{body}}", body).replace("{{name}}", name)
+    body_str = cast(str, body.prettify())
+    return _learning_module_template.replace("{{body}}", body_str).replace("{{name}}", name)
 
 
 class Links(Enum):
diff --git a/PFERD/crawl/ilias/ilias_html_cleaner.py b/PFERD/crawl/ilias/ilias_html_cleaner.py
index e82906f..fb35bc0 100644
--- a/PFERD/crawl/ilias/ilias_html_cleaner.py
+++ b/PFERD/crawl/ilias/ilias_html_cleaner.py
@@ -1,3 +1,5 @@
+from typing import cast
+
 from bs4 import BeautifulSoup, Comment, Tag
 
 _STYLE_TAG_CONTENT = """
@@ -70,18 +72,18 @@ def insert_base_markup(soup: BeautifulSoup) -> BeautifulSoup:
 
 
 def clean(soup: BeautifulSoup) -> BeautifulSoup:
-    for block in soup.find_all(class_=lambda x: x in _ARTICLE_WORTHY_CLASSES):
+    for block in cast(list[Tag], soup.find_all(class_=lambda x: x in _ARTICLE_WORTHY_CLASSES)):
         block.name = "article"
 
-    for block in soup.find_all("h3"):
+    for block in cast(list[Tag], soup.find_all("h3")):
         block.name = "div"
 
-    for block in soup.find_all("h1"):
+    for block in cast(list[Tag], soup.find_all("h1")):
         block.name = "h3"
 
-    for block in soup.find_all(class_="ilc_va_ihcap_VAccordIHeadCap"):
+    for block in cast(list[Tag], soup.find_all(class_="ilc_va_ihcap_VAccordIHeadCap")):
         block.name = "h3"
-        block["class"] += ["accordion-head"]
+        block["class"] += ["accordion-head"]  # type: ignore
 
     for dummy in soup.select(".ilc_text_block_Standard.ilc_Paragraph"):
         children = list(dummy.children)
@@ -97,7 +99,7 @@ def clean(soup: BeautifulSoup) -> BeautifulSoup:
         if figure := video.find_parent("figure"):
             figure.decompose()
 
-    for hrule_imposter in soup.find_all(class_="ilc_section_Separator"):
+    for hrule_imposter in cast(list[Tag], soup.find_all(class_="ilc_section_Separator")):
         hrule_imposter.insert(0, soup.new_tag("hr"))
 
     return soup
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 2fc399d..557150c 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -257,6 +257,7 @@ instance's greatest bottleneck.
             async with cl:
                 next_stage_url: Optional[str] = url
                 current_parent = current_element
+                page = None
 
                 while next_stage_url:
                     soup = await self._get_page(next_stage_url)
@@ -278,6 +279,7 @@ instance's greatest bottleneck.
                     else:
                         next_stage_url = None
 
+                page = cast(IliasPage, page)
                 elements.extend(page.get_child_elements())
                 if description_string := page.get_description():
                     description.append(description_string)
@@ -461,10 +463,10 @@ instance's greatest bottleneck.
         if not dl:
             return
 
-        async with dl as (bar, sink):
+        async with dl as (_bar, sink):
             description = clean(insert_base_markup(description))
-            description = await self.internalize_images(description)
-            sink.file.write(description.prettify().encode("utf-8"))
+            description_tag = await self.internalize_images(description)
+            sink.file.write(cast(str, description_tag.prettify()).encode("utf-8"))
             sink.done()
 
     @anoncritical
@@ -483,7 +485,7 @@ instance's greatest bottleneck.
             async with self.session.get(export_url, allow_redirects=False) as resp:
                 # No redirect means we were authenticated
                 if hdrs.LOCATION not in resp.headers:
-                    return soupify(await resp.read()).select_one("a").get("href").strip()
+                    return soupify(await resp.read()).select_one("a").get("href").strip()  # type: ignore
                 # We are either unauthenticated or the link is not active
                 new_url = resp.headers[hdrs.LOCATION].lower()
                 if "baseclass=illinkresourcehandlergui" in new_url and "cmd=infoscreen" in new_url:
@@ -707,6 +709,8 @@ instance's greatest bottleneck.
 
         async with cl:
             next_stage_url = element.url
+            page = None
+
             while next_stage_url:
                 log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
                 log.explain(f"URL: {next_stage_url}")
@@ -719,7 +723,7 @@ instance's greatest bottleneck.
                 else:
                     break
 
-            download_data = page.get_download_forum_data()
+            download_data = cast(IliasPage, page).get_download_forum_data()
             if not download_data:
                 raise CrawlWarning("Failed to extract forum data")
             if download_data.empty:
@@ -751,8 +755,8 @@ instance's greatest bottleneck.
 
         async with maybe_dl as (bar, sink):
             content = "<!DOCTYPE html>\n"
-            content += element.title_tag.prettify()
-            content += element.content_tag.prettify()
+            content += cast(str, element.title_tag.prettify())
+            content += cast(str, element.content_tag.prettify())
             sink.file.write(content.encode("utf-8"))
             sink.done()
 
@@ -877,15 +881,15 @@ instance's greatest bottleneck.
                 continue
             if elem.name == "img":
                 if src := elem.attrs.get("src", None):
-                    url = urljoin(self._base_url, src)
+                    url = urljoin(self._base_url, cast(str, src))
                     if not url.startswith(self._base_url):
                         continue
                     log.explain(f"Internalizing {url!r}")
                     img = await self._get_authenticated(url)
                     elem.attrs["src"] = "data:;base64," + base64.b64encode(img).decode()
-            if elem.name == "iframe" and elem.attrs.get("src", "").startswith("//"):
+            if elem.name == "iframe" and cast(str, elem.attrs.get("src", "")).startswith("//"):
                 # For unknown reasons the protocol seems to be stripped.
-                elem.attrs["src"] = "https:" + elem.attrs["src"]
+                elem.attrs["src"] = "https:" + cast(str, elem.attrs["src"])
         return tag
 
     def _ensure_not_seen(self, element: IliasPageElement, parent_path: PurePath) -> None:
@@ -979,11 +983,11 @@ instance's greatest bottleneck.
             async with self.session.get(urljoin(self._base_url, "/login.php"), params=params) as request:
                 login_page = soupify(await request.read())
 
-            login_form = login_page.find("form", attrs={"name": "formlogin"})
+            login_form = cast(Optional[Tag], login_page.find("form", attrs={"name": "formlogin"}))
             if login_form is None:
                 raise CrawlError("Could not find the login form! Specified client id might be invalid.")
 
-            login_url = login_form.attrs.get("action")
+            login_url = cast(Optional[str], login_form.attrs.get("action"))
             if login_url is None:
                 raise CrawlError("Could not find the action URL in the login form!")
 
@@ -1004,14 +1008,14 @@ instance's greatest bottleneck.
     @staticmethod
     def _is_logged_in(soup: BeautifulSoup) -> bool:
         # Normal ILIAS pages
-        mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")
+        mainbar = cast(Optional[Tag], soup.find(class_="il-maincontrols-metabar"))
         if mainbar is not None:
-            login_button = mainbar.find(attrs={"href": lambda x: x and "login.php" in x})
+            login_button = mainbar.find(attrs={"href": lambda x: x is not None and "login.php" in x})
             shib_login = soup.find(id="button_shib_login")
             return not login_button and not shib_login
 
         # Personal Desktop
-        if soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x}):
+        if soup.find("a", attrs={"href": lambda x: x is not None and "block_type=pditems" in x}):
             return True
 
         # Video listing embeds do not have complete ILIAS html. Try to match them by
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 57c81e5..ee61cab 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -3,7 +3,7 @@ import re
 from dataclasses import dataclass
 from datetime import date, datetime, timedelta
 from enum import Enum
-from typing import Dict, List, Optional, Union, cast
+from typing import Dict, Optional, Union, cast
 from urllib.parse import urljoin, urlparse
 
 from bs4 import BeautifulSoup, Tag
@@ -117,7 +117,7 @@ class IliasPageElement:
 @dataclass
 class IliasDownloadForumData:
     url: str
-    form_data: Dict[str, Union[str, List[str]]]
+    form_data: Dict[str, Union[str, list[str]]]
     empty: bool
 
 
@@ -151,7 +151,7 @@ class IliasPage:
             return "goto.php?target=root_" in permalink
         return False
 
-    def get_child_elements(self) -> List[IliasPageElement]:
+    def get_child_elements(self) -> list[IliasPageElement]:
         """
         Return all child page elements you can find here.
         """
@@ -177,10 +177,10 @@ class IliasPage:
         return self._find_normal_entries()
 
     def get_info_tab(self) -> Optional[IliasPageElement]:
-        tab: Optional[Tag] = self._soup.find(
+        tab: Optional[Tag] = cast(Optional[Tag], self._soup.find(
             name="a",
-            attrs={"href": lambda x: x and "cmdClass=ilinfoscreengui" in x}
-        )
+            attrs={"href": lambda x: x is not None and "cmdClass=ilinfoscreengui" in x}
+        ))
         if tab is not None:
             return IliasPageElement.create_new(
                 IliasElementType.INFO_TAB,
@@ -193,7 +193,7 @@ class IliasPage:
         def is_interesting_class(name: str) -> bool:
             return name in ["ilCOPageSection", "ilc_Paragraph", "ilc_va_ihcap_VAccordIHeadCap"]
 
-        paragraphs: List[Tag] = self._soup.findAll(class_=is_interesting_class)
+        paragraphs: list[Tag] = cast(list[Tag], self._soup.find_all(class_=is_interesting_class))
         if not paragraphs:
             return None
 
@@ -217,8 +217,8 @@ class IliasPage:
     def get_learning_module_data(self) -> Optional[IliasLearningModulePage]:
         if not self._is_learning_module_page():
             return None
-        content = self._soup.select_one("#ilLMPageContent")
-        title = self._soup.select_one(".ilc_page_title_PageTitle").getText().strip()
+        content = cast(Tag, self._soup.select_one("#ilLMPageContent"))
+        title = cast(Tag, self._soup.select_one(".ilc_page_title_PageTitle")).get_text().strip()
         return IliasLearningModulePage(
             title=title,
             content=content,
@@ -243,15 +243,18 @@ class IliasPage:
         return None
 
     def get_download_forum_data(self) -> Optional[IliasDownloadForumData]:
-        form = self._soup.find("form", attrs={"action": lambda x: x and "fallbackCmd=showThreads" in x})
+        form = cast(Optional[Tag], self._soup.find(
+            "form",
+            attrs={"action": lambda x: x is not None and "fallbackCmd=showThreads" in x}
+        ))
         if not form:
             return None
-        post_url = self._abs_url_from_relative(form["action"])
+        post_url = self._abs_url_from_relative(cast(str, form["action"]))
 
-        thread_ids = [f["value"] for f in form.find_all(attrs={"name": "thread_ids[]"})]
+        thread_ids = [f["value"] for f in cast(list[Tag], form.find_all(attrs={"name": "thread_ids[]"}))]
 
-        form_data: Dict[str, Union[str, List[str]]] = {
-            "thread_ids[]": thread_ids,
+        form_data: Dict[str, Union[str, list[str]]] = {
+            "thread_ids[]": cast(list[str], thread_ids),
             "selected_cmd2": "html",
             "select_cmd2": "Ausführen",
             "selected_cmd": "",
@@ -285,7 +288,7 @@ class IliasPage:
     def _is_forum_page(self) -> bool:
         read_more_btn = self._soup.find(
             "button",
-            attrs={"onclick": lambda x: x and "cmdClass=ilobjforumgui&cmd=markAllRead" in x}
+            attrs={"onclick": lambda x: x is not None and "cmdClass=ilobjforumgui&cmd=markAllRead" in x}
         )
         return read_more_btn is not None
 
@@ -297,7 +300,7 @@ class IliasPage:
             return True
 
         # Raw listing without ILIAS fluff
-        video_element_table: Tag = self._soup.find(
+        video_element_table = self._soup.find(
             name="table", id=re.compile(r"tbl_xoct_.+")
         )
         return video_element_table is not None
@@ -305,8 +308,8 @@ class IliasPage:
     def _is_ilias_opencast_embedding(self) -> bool:
         # ILIAS fluff around the real opencast html
         if self._soup.find(id="headerimage"):
-            element: Tag = self._soup.find(id="headerimage")
-            if "opencast" in element.attrs["src"].lower():
+            element: Tag = cast(Tag, self._soup.find(id="headerimage"))
+            if "opencast" in cast(str, element.attrs["src"]).lower():
                 return True
         return False
 
@@ -317,8 +320,8 @@ class IliasPage:
 
         # We have no suitable parent - let's guesss
         if self._soup.find(id="headerimage"):
-            element: Tag = self._soup.find(id="headerimage")
-            if "exc" in element.attrs["src"].lower():
+            element: Tag = cast(Tag, self._soup.find(id="headerimage"))
+            if "exc" in cast(str, element.attrs["src"]).lower():
                 return True
 
         return False
@@ -340,10 +343,10 @@ class IliasPage:
         return self._uncollapse_future_meetings_url() is not None
 
     def _uncollapse_future_meetings_url(self) -> Optional[IliasPageElement]:
-        element = self._soup.find(
+        element = cast(Optional[Tag], self._soup.find(
             "a",
-            attrs={"href": lambda x: x and ("crs_next_sess=1" in x or "crs_prev_sess=1" in x)}
-        )
+            attrs={"href": lambda x: x is not None and ("crs_next_sess=1" in x or "crs_prev_sess=1" in x)}
+        ))
         if not element:
             return None
         link = self._abs_url_from_link(element)
@@ -360,24 +363,24 @@ class IliasPage:
         return "baseClass=ilmembershipoverviewgui" in self._page_url
 
     def _select_content_page_url(self) -> Optional[IliasPageElement]:
-        tab = self._soup.find(
+        tab = cast(Optional[Tag], self._soup.find(
             id="tab_view_content",
             attrs={"class": lambda x: x is not None and "active" not in x}
-        )
+        ))
         # Already selected (or not found)
         if not tab:
             return None
-        link = tab.find("a")
+        link = cast(Optional[Tag], tab.find("a"))
         if link:
-            link = self._abs_url_from_link(link)
-            return IliasPageElement.create_new(IliasElementType.FOLDER, link, "select content page")
+            link_str = self._abs_url_from_link(link)
+            return IliasPageElement.create_new(IliasElementType.FOLDER, link_str, "select content page")
 
         _unexpected_html_warning()
         log.warn_contd(f"Could not find content tab URL on {self._page_url!r}.")
         log.warn_contd("PFERD might not find content on the course's main page.")
         return None
 
-    def _player_to_video(self) -> List[IliasPageElement]:
+    def _player_to_video(self) -> list[IliasPageElement]:
         # Fetch the actual video page. This is a small wrapper page initializing a javscript
         # player. Sadly we can not execute that JS. The actual video stream url is nowhere
         # on the page, but defined in a JS object inside a script tag, passed to the player
@@ -414,10 +417,10 @@ class IliasPage:
         return items
 
     def _get_show_max_forum_entries_per_page_url(self) -> Optional[IliasPageElement]:
-        correct_link = self._soup.find(
+        correct_link = cast(Optional[Tag], self._soup.find(
             "a",
-            attrs={"href": lambda x: x and "trows=800" in x and "cmd=showThreads" in x}
-        )
+            attrs={"href": lambda x: x is not None and "trows=800" in x and "cmd=showThreads" in x}
+        ))
 
         if not correct_link:
             return None
@@ -426,15 +429,15 @@ class IliasPage:
 
         return IliasPageElement.create_new(IliasElementType.FORUM, link, "show all forum threads")
 
-    def _find_personal_desktop_entries(self) -> List[IliasPageElement]:
-        items: List[IliasPageElement] = []
+    def _find_personal_desktop_entries(self) -> list[IliasPageElement]:
+        items: list[IliasPageElement] = []
 
-        titles: List[Tag] = self._soup.select("#block_pditems_0 .il-item-title")
+        titles: list[Tag] = self._soup.select("#block_pditems_0 .il-item-title")
         for title in titles:
-            link = title.find("a")
+            link = cast(Optional[Tag], title.find("a"))
 
             if not link:
-                log.explain(f"Skipping offline item: {title.getText().strip()!r}")
+                log.explain(f"Skipping offline item: {title.get_text().strip()!r}")
                 continue
 
             name = _sanitize_path_name(link.text.strip())
@@ -460,13 +463,13 @@ class IliasPage:
 
         return items
 
-    def _find_copa_entries(self) -> List[IliasPageElement]:
-        items: List[IliasPageElement] = []
-        links: List[Tag] = self._soup.findAll(class_="ilc_flist_a_FileListItemLink")
+    def _find_copa_entries(self) -> list[IliasPageElement]:
+        items: list[IliasPageElement] = []
+        links: list[Tag] = cast(list[Tag], self._soup.find_all(class_="ilc_flist_a_FileListItemLink"))
 
         for link in links:
             url = self._abs_url_from_link(link)
-            name = re.sub(r"\([\d,.]+ [MK]B\)", "", link.getText()).strip().replace("\t", "")
+            name = re.sub(r"\([\d,.]+ [MK]B\)", "", link.get_text()).strip().replace("\t", "")
             name = _sanitize_path_name(name)
 
             if "file_id" not in url:
@@ -478,9 +481,9 @@ class IliasPage:
 
         return items
 
-    def _find_info_tab_entries(self) -> List[IliasPageElement]:
+    def _find_info_tab_entries(self) -> list[IliasPageElement]:
         items = []
-        links: List[Tag] = self._soup.select("a.il_ContainerItemCommand")
+        links: list[Tag] = self._soup.select("a.il_ContainerItemCommand")
 
         for link in links:
             if "cmdClass=ilobjcoursegui" not in link["href"]:
@@ -490,12 +493,12 @@ class IliasPage:
             items.append(IliasPageElement.create_new(
                 IliasElementType.FILE,
                 self._abs_url_from_link(link),
-                _sanitize_path_name(link.getText())
+                _sanitize_path_name(link.get_text())
             ))
 
         return items
 
-    def _find_opencast_video_entries(self) -> List[IliasPageElement]:
+    def _find_opencast_video_entries(self) -> list[IliasPageElement]:
         # ILIAS has three stages for video pages
         # 1. The initial dummy page without any videos. This page contains the link to the listing
         # 2. The video listing which might be paginated
@@ -503,14 +506,14 @@ class IliasPage:
         #
         # We need to figure out where we are.
 
-        video_element_table: Tag = self._soup.find(
+        video_element_table = cast(Optional[Tag], self._soup.find(
             name="table", id=re.compile(r"tbl_xoct_.+")
-        )
+        ))
 
         if video_element_table is None:
             # We are in stage 1
             # The page is actually emtpy but contains the link to stage 2
-            content_link: Tag = self._soup.select_one("#tab_series a")
+            content_link: Tag = cast(Tag, self._soup.select_one("#tab_series a"))
             url: str = self._abs_url_from_link(content_link)
             query_params = {"limit": "800", "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
             url = url_set_query_params(url, query_params)
@@ -527,14 +530,14 @@ class IliasPage:
 
         return self._find_opencast_video_entries_no_paging()
 
-    def _find_opencast_video_entries_paginated(self) -> List[IliasPageElement]:
-        table_element: Tag = self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
+    def _find_opencast_video_entries_paginated(self) -> list[IliasPageElement]:
+        table_element = cast(Optional[Tag], self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+")))
 
         if table_element is None:
             log.warn("Couldn't increase elements per page (table not found). I might miss elements.")
             return self._find_opencast_video_entries_no_paging()
 
-        id_match = re.match(r"tbl_xoct_(.+)", table_element.attrs["id"])
+        id_match = re.match(r"tbl_xoct_(.+)", cast(str, table_element.attrs["id"]))
         if id_match is None:
             log.warn("Couldn't increase elements per page (table id not found). I might miss elements.")
             return self._find_opencast_video_entries_no_paging()
@@ -548,16 +551,16 @@ class IliasPage:
         log.explain("Disabled pagination, retrying folder as a new entry")
         return [IliasPageElement.create_new(IliasElementType.OPENCAST_VIDEO_FOLDER, url, "")]
 
-    def _find_opencast_video_entries_no_paging(self) -> List[IliasPageElement]:
+    def _find_opencast_video_entries_no_paging(self) -> list[IliasPageElement]:
         """
         Crawls the "second stage" video page. This page contains the actual video urls.
         """
         # Video start links are marked with an "Abspielen" link
-        video_links: List[Tag] = self._soup.findAll(
+        video_links = cast(list[Tag], self._soup.find_all(
             name="a", text=re.compile(r"\s*(Abspielen|Play)\s*")
-        )
+        ))
 
-        results: List[IliasPageElement] = []
+        results: list[IliasPageElement] = []
 
         for link in video_links:
             results.append(self._listed_opencast_video_to_element(link))
@@ -569,12 +572,12 @@ class IliasPage:
         # 6th or 7th child (1 indexed) is the modification time string. Try to find it
         # by parsing backwards from the end and finding something that looks like a date
         modification_time = None
-        row: Tag = link.parent.parent.parent
+        row: Tag = link.parent.parent.parent  # type: ignore
         column_count = len(row.select("td.std"))
         for index in range(column_count, 0, -1):
-            modification_string = link.parent.parent.parent.select_one(
+            modification_string = link.parent.parent.parent.select_one(  # type: ignore
                 f"td.std:nth-child({index})"
-            ).getText().strip()
+            ).get_text().strip()
             if match := re.search(r"\d+\.\d+.\d+ \d+:\d+", modification_string):
                 modification_time = datetime.strptime(match.group(0), "%d.%m.%Y %H:%M")
                 break
@@ -583,7 +586,7 @@ class IliasPage:
             log.warn(f"Could not determine upload time for {link}")
             modification_time = datetime.now()
 
-        title = link.parent.parent.parent.select_one("td.std:nth-child(3)").getText().strip()
+        title = link.parent.parent.parent.select_one("td.std:nth-child(3)").get_text().strip()  # type: ignore
         title += ".mp4"
 
         video_name: str = _sanitize_path_name(title)
@@ -595,33 +598,34 @@ class IliasPage:
             IliasElementType.OPENCAST_VIDEO_PLAYER, video_url, video_name, modification_time
         )
 
-    def _find_exercise_entries(self) -> List[IliasPageElement]:
+    def _find_exercise_entries(self) -> list[IliasPageElement]:
         if self._soup.find(id="tab_submission"):
             log.explain("Found submission tab. This is an exercise detail page")
             return self._find_exercise_entries_detail_page()
         log.explain("Found no submission tab. This is an exercise root page")
         return self._find_exercise_entries_root_page()
 
-    def _find_exercise_entries_detail_page(self) -> List[IliasPageElement]:
-        results: List[IliasPageElement] = []
+    def _find_exercise_entries_detail_page(self) -> list[IliasPageElement]:
+        results: list[IliasPageElement] = []
 
         # Find all download links in the container (this will contain all the files)
-        download_links: List[Tag] = self._soup.findAll(
+        download_links = cast(list[Tag], self._soup.find_all(
             name="a",
             # download links contain the given command class
-            attrs={"href": lambda x: x and "cmd=download" in x},
+            attrs={"href": lambda x: x is not None and "cmd=download" in x},
             text="Download"
-        )
+        ))
 
         for link in download_links:
-            parent_row: Tag = link.findParent("tr")
-            children: List[Tag] = parent_row.findChildren("td")
+            parent_row: Tag = cast(Tag, link.find_parent("tr"))
+            children = cast(list[Tag], parent_row.find_all("td"))
 
-            name = _sanitize_path_name(children[1].getText().strip())
+            name = _sanitize_path_name(children[1].get_text().strip())
             log.explain(f"Found exercise detail entry {name!r}")
 
+            date = None
             for child in reversed(children):
-                date = demangle_date(child.getText().strip(), fail_silently=True)
+                date = demangle_date(child.get_text().strip(), fail_silently=True)
                 if date is not None:
                     break
             if date is None:
@@ -636,30 +640,33 @@ class IliasPage:
 
         return results
 
-    def _find_exercise_entries_root_page(self) -> List[IliasPageElement]:
-        results: List[IliasPageElement] = []
+    def _find_exercise_entries_root_page(self) -> list[IliasPageElement]:
+        results: list[IliasPageElement] = []
 
         # Each assignment is in an accordion container
-        assignment_containers: List[Tag] = self._soup.select(".il_VAccordionInnerContainer")
+        assignment_containers: list[Tag] = self._soup.select(".il_VAccordionInnerContainer")
 
         for container in assignment_containers:
             # Fetch the container name out of the header to use it in the path
-            container_name = container.select_one(".ilAssignmentHeader").getText().strip()
+            container_name = cast(Tag, container.select_one(".ilAssignmentHeader")).get_text().strip()
             log.explain(f"Found exercise container {container_name!r}")
 
             # Find all download links in the container (this will contain all the files)
-            files: List[Tag] = container.findAll(
+            files = cast(list[Tag], container.find_all(
                 name="a",
                 # download links contain the given command class
-                attrs={"href": lambda x: x and "cmdClass=ilexsubmissiongui" in x},
+                attrs={"href": lambda x: x is not None and "cmdClass=ilexsubmissiongui" in x},
                 text="Download"
-            )
+            ))
 
             # Grab each file as you now have the link
             for file_link in files:
                 # Two divs, side by side. Left is the name, right is the link ==> get left
                 # sibling
-                file_name = file_link.parent.findPrevious(name="div").getText().strip()
+                file_name = cast(
+                    Tag,
+                    cast(Tag, file_link.parent).find_previous(name="div")
+                ).get_text().strip()
                 url = self._abs_url_from_link(file_link)
 
                 log.explain(f"Found exercise entry {file_name!r}")
@@ -672,21 +679,21 @@ class IliasPage:
                 ))
 
             # Find all links to file listings (e.g. "Submitted Files" for groups)
-            file_listings: List[Tag] = container.findAll(
+            file_listings = cast(list[Tag], container.find_all(
                 name="a",
                 # download links contain the given command class
-                attrs={"href": lambda x: x and "cmdclass=ilexsubmissionfilegui" in x.lower()}
-            )
+                attrs={"href": lambda x: x is not None and "cmdclass=ilexsubmissionfilegui" in x.lower()}
+            ))
 
             # Add each listing as a new
             for listing in file_listings:
-                parent_container: Tag = listing.findParent(
-                    "div", attrs={"class": lambda x: x and "form-group" in x}
-                )
-                label_container: Tag = parent_container.find(
-                    attrs={"class": lambda x: x and "control-label" in x}
-                )
-                file_name = label_container.getText().strip()
+                parent_container = cast(Tag, listing.find_parent(
+                    "div", attrs={"class": lambda x: x is not None and "form-group" in x}
+                ))
+                label_container = cast(Tag, parent_container.find(
+                    attrs={"class": lambda x: x is not None and "control-label" in x}
+                ))
+                file_name = label_container.get_text().strip()
                 url = self._abs_url_from_link(listing)
                 log.explain(f"Found exercise detail {file_name!r} at {url}")
                 results.append(IliasPageElement.create_new(
@@ -699,10 +706,10 @@ class IliasPage:
 
         return results
 
-    def _find_normal_entries(self) -> List[IliasPageElement]:
-        result: List[IliasPageElement] = []
+    def _find_normal_entries(self) -> list[IliasPageElement]:
+        result: list[IliasPageElement] = []
 
-        links: List[Tag] = []
+        links: list[Tag] = []
         # Fetch all links and throw them to the general interpreter
         if self._is_course_overview_page():
             log.explain("Page is a course overview page, adjusting link selector")
@@ -716,9 +723,9 @@ class IliasPage:
             parents = [_sanitize_path_name(x) for x in self._find_upwards_folder_hierarchy(link)]
 
             if parents:
-                element_name = "/".join(parents) + "/" + _sanitize_path_name(link.getText())
+                element_name = "/".join(parents) + "/" + _sanitize_path_name(link.get_text())
             else:
-                element_name = _sanitize_path_name(link.getText())
+                element_name = _sanitize_path_name(link.get_text())
 
             element_type = self._find_type_from_link(element_name, link, abs_url)
             description = self._find_link_description(link)
@@ -750,17 +757,17 @@ class IliasPage:
 
         return result
 
-    def _find_mediacast_videos(self) -> List[IliasPageElement]:
-        videos: List[IliasPageElement] = []
+    def _find_mediacast_videos(self) -> list[IliasPageElement]:
+        videos: list[IliasPageElement] = []
 
-        for elem in cast(List[Tag], self._soup.select(".ilPlayerPreviewOverlayOuter")):
+        for elem in cast(list[Tag], self._soup.select(".ilPlayerPreviewOverlayOuter")):
             element_name = _sanitize_path_name(
-                elem.select_one(".ilPlayerPreviewDescription").getText().strip()
+                cast(Tag, elem.select_one(".ilPlayerPreviewDescription")).get_text().strip()
             )
             if not element_name.endswith(".mp4"):
                 # just to make sure it has some kinda-alrightish ending
                 element_name = element_name + ".mp4"
-            video_element = elem.find(name="video")
+            video_element = cast(Optional[Tag], elem.find(name="video"))
             if not video_element:
                 _unexpected_html_warning()
                 log.warn_contd(f"No <video> element found for mediacast video '{element_name}'")
@@ -768,18 +775,18 @@ class IliasPage:
 
             videos.append(IliasPageElement.create_new(
                 typ=IliasElementType.MEDIACAST_VIDEO,
-                url=self._abs_url_from_relative(video_element.get("src")),
+                url=self._abs_url_from_relative(cast(str, video_element.get("src"))),
                 name=element_name,
-                mtime=self._find_mediacast_video_mtime(elem.findParent(name="td"))
+                mtime=self._find_mediacast_video_mtime(cast(Tag, elem.find_parent(name="td")))
             ))
 
         return videos
 
-    def _find_mob_videos(self) -> List[IliasPageElement]:
-        videos: List[IliasPageElement] = []
+    def _find_mob_videos(self) -> list[IliasPageElement]:
+        videos: list[IliasPageElement] = []
 
         for figure in self._soup.select("figure.ilc_media_cont_MediaContainerHighlighted"):
-            title = figure.select_one("figcaption").getText().strip() + ".mp4"
+            title = cast(Tag, figure.select_one("figcaption")).get_text().strip() + ".mp4"
             video_element = figure.select_one("video")
             if not video_element:
                 _unexpected_html_warning()
@@ -789,7 +796,7 @@ class IliasPage:
             url = None
             for source in video_element.select("source"):
                 if source.get("type", "") == "video/mp4":
-                    url = source.get("src")
+                    url = cast(Optional[str], source.get("src"))
                     break
 
             if url is None:
@@ -807,15 +814,15 @@ class IliasPage:
         return videos
 
     def _find_mediacast_video_mtime(self, enclosing_td: Tag) -> Optional[datetime]:
-        description_td: Tag = enclosing_td.findPreviousSibling("td")
+        description_td = cast(Tag, enclosing_td.find_previous_sibling("td"))
         if not description_td:
             return None
 
-        meta_tag: Tag = description_td.find_all("p")[-1]
+        meta_tag = cast(Optional[Tag], description_td.find_all("p")[-1])
         if not meta_tag:
             return None
 
-        updated_str = meta_tag.getText().strip().replace("\n", " ")
+        updated_str = meta_tag.get_text().strip().replace("\n", " ")
         updated_str = re.sub(".+?: ", "", updated_str)
         return demangle_date(updated_str)
 
@@ -826,20 +833,20 @@ class IliasPage:
         It is in the same general div and this whole thing is guesswork.
         Therefore, you should check for meetings before passing them in this function.
         """
-        parents: List[Tag] = list(tag.parents)
+        parents: list[Tag] = list(tag.parents)
         for parent in parents:
             if not parent.get("class"):
                 continue
 
             # We should not crawl files under meetings
-            if "ilContainerListItemContentCB" in parent.get("class"):
-                link: Tag = parent.parent.find("a")
+            if "ilContainerListItemContentCB" in cast(str, parent.get("class")):
+                link: Tag = parent.parent.find("a")  # type: ignore
                 type = IliasPage._find_type_from_folder_like(link, self._page_url)
                 return type == IliasElementType.MEETING
 
         return False
 
-    def _find_upwards_folder_hierarchy(self, tag: Tag) -> List[str]:
+    def _find_upwards_folder_hierarchy(self, tag: Tag) -> list[str]:
         """
         Interprets accordions and expandable blocks as virtual folders and returns them
         in order. This allows us to find a file named "Test" in an accordion "Acc" as "Acc/Test"
@@ -848,7 +855,7 @@ class IliasPage:
 
         outer_accordion_content: Optional[Tag] = None
 
-        parents: List[Tag] = list(tag.parents)
+        parents: list[Tag] = list(tag.parents)
         for parent in parents:
             if not parent.get("class"):
                 continue
@@ -857,57 +864,63 @@ class IliasPage:
             # but some JS later transforms them into an accordion.
 
             # This is for these weird JS-y blocks and custom item groups
-            if "ilContainerItemsContainer" in parent.get("class"):
-                data_store_url = parent.parent.get("data-store-url", "").lower()
+            if "ilContainerItemsContainer" in cast(str, parent.get("class")):
+                data_store_url = parent.parent.get("data-store-url", "").lower()  # type: ignore
                 is_custom_item_group = "baseclass=ilcontainerblockpropertiesstoragegui" in data_store_url \
                                        and "cont_block_id=" in data_store_url
                 # I am currently under the impression that *only* those JS blocks have an
                 # ilNoDisplay class.
-                if not is_custom_item_group and "ilNoDisplay" not in parent.get("class"):
+                if not is_custom_item_group and "ilNoDisplay" not in cast(str, parent.get("class")):
                     continue
-                prev: Tag = parent.findPreviousSibling("div")
-                if "ilContainerBlockHeader" in prev.get("class"):
+                prev = cast(Tag, parent.find_previous_sibling("div"))
+                if "ilContainerBlockHeader" in cast(str, prev.get("class")):
                     if prev.find("h3"):
-                        found_titles.append(prev.find("h3").getText().strip())
+                        found_titles.append(cast(Tag, prev.find("h3")).get_text().strip())
                     else:
-                        found_titles.append(prev.find("h2").getText().strip())
+                        found_titles.append(cast(Tag, prev.find("h2")).get_text().strip())
 
             # And this for real accordions
-            if "il_VAccordionContentDef" in parent.get("class"):
+            if "il_VAccordionContentDef" in cast(str, parent.get("class")):
                 outer_accordion_content = parent
                 break
 
         if outer_accordion_content:
-            accordion_tag: Tag = outer_accordion_content.parent
-            head_tag: Tag = accordion_tag.find(attrs={
-                "class": lambda x: x and "ilc_va_ihead_VAccordIHead" in x
-            })
-            found_titles.append(head_tag.getText().strip())
+            accordion_tag = cast(Tag, outer_accordion_content.parent)
+            head_tag = cast(Tag, accordion_tag.find(attrs={
+                "class": lambda x: x is not None and "ilc_va_ihead_VAccordIHead" in x
+            }))
+            found_titles.append(head_tag.get_text().strip())
 
         return [_sanitize_path_name(x) for x in reversed(found_titles)]
 
     def _find_link_description(self, link: Tag) -> Optional[str]:
-        tile: Tag = link.findParent("div", {"class": lambda x: x and "il_ContainerListItem" in x})
+        tile = cast(
+            Tag,
+            link.find_parent("div", {"class": lambda x: x is not None and "il_ContainerListItem" in x})
+        )
         if not tile:
             return None
-        description_element: Tag = tile.find("div", {"class": lambda x: x and "il_Description" in x})
+        description_element = cast(
+            Tag,
+            tile.find("div", {"class": lambda x: x is not None and "il_Description" in x})
+        )
         if not description_element:
             return None
-        return description_element.getText().strip()
+        return description_element.get_text().strip()
 
     def _file_to_element(self, name: str, url: str, link_element: Tag) -> IliasPageElement:
         # Files have a list of properties (type, modification date, size, etc.)
         # In a series of divs.
         # Find the parent containing all those divs, so we can filter our what we need
-        properties_parent: Tag = link_element.findParent(
+        properties_parent = cast(Tag, cast(Tag, link_element.find_parent(
             "div", {"class": lambda x: "il_ContainerListItem" in x}
-        ).select_one(".il_ItemProperties")
+        )).select_one(".il_ItemProperties"))
         # The first one is always the filetype
-        file_type = properties_parent.select_one("span.il_ItemProperty").getText().strip()
+        file_type = cast(Tag, properties_parent.select_one("span.il_ItemProperty")).get_text().strip()
 
         # The rest does not have a stable order. Grab the whole text and reg-ex the date
         # out of it
-        all_properties_text = properties_parent.getText().strip()
+        all_properties_text = properties_parent.get_text().strip()
         modification_date_match = re.search(
             r"(((\d+\. \w+ \d+)|(Gestern|Yesterday)|(Heute|Today)|(Morgen|Tomorrow)), \d+:\d+)",
             all_properties_text
@@ -927,14 +940,14 @@ class IliasPage:
             IliasElementType.FILE, url, full_path, modification_date, skip_sanitize=True
         )
 
-    def _find_cards(self) -> List[IliasPageElement]:
-        result: List[IliasPageElement] = []
+    def _find_cards(self) -> list[IliasPageElement]:
+        result: list[IliasPageElement] = []
 
-        card_titles: List[Tag] = self._soup.select(".card-title a")
+        card_titles: list[Tag] = self._soup.select(".card-title a")
 
         for title in card_titles:
             url = self._abs_url_from_link(title)
-            name = _sanitize_path_name(title.getText().strip())
+            name = _sanitize_path_name(title.get_text().strip())
             type = self._find_type_from_card(title)
 
             if not type:
@@ -944,25 +957,25 @@ class IliasPage:
 
             result.append(IliasPageElement.create_new(type, url, name))
 
-        card_button_tiles: List[Tag] = self._soup.select(".card-title button")
+        card_button_tiles: list[Tag] = self._soup.select(".card-title button")
 
         for button in card_button_tiles:
-            regex = re.compile(button["id"] + r".*window.open\(['\"](.+?)['\"]")
+            regex = re.compile(button["id"] + r".*window.open\(['\"](.+?)['\"]")  # type: ignore
             res = regex.search(str(self._soup))
             if not res:
                 _unexpected_html_warning()
                 log.warn_contd(f"Could not find click handler target for {button}")
                 continue
             url = self._abs_url_from_relative(res.group(1))
-            name = _sanitize_path_name(button.getText().strip())
+            name = _sanitize_path_name(button.get_text().strip())
             type = self._find_type_from_card(button)
-            caption_parent = button.findParent(
+            caption_parent = cast(Tag, button.find_parent(
                 "div",
-                attrs={"class": lambda x: x and "caption" in x},
-            )
+                attrs={"class": lambda x: x is not None and "caption" in x},
+            ))
             caption_container = caption_parent.find_next_sibling("div")
             if caption_container:
-                description = caption_container.getText().strip()
+                description = caption_container.get_text().strip()
             else:
                 description = None
 
@@ -992,7 +1005,7 @@ class IliasPage:
             log.warn_contd(f"Tried to figure out element type, but did not find an icon for {card_title}")
             return None
 
-        icon: Tag = card_root.select_one(".il-card-repository-head .icon")
+        icon = cast(Tag, card_root.select_one(".il-card-repository-head .icon"))
 
         if "opencast" in icon["class"] or "xoct" in icon["class"]:
             return IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED
@@ -1125,7 +1138,7 @@ class IliasPage:
 
         is_session_expansion_button = found_parent.find(
             "a",
-            attrs={"href": lambda x: x and ("crs_next_sess=" in x or "crs_prev_sess=" in x)}
+            attrs={"href": lambda x: x is not None and ("crs_next_sess=" in x or "crs_prev_sess=" in x)}
         )
         if img_tag is None and is_session_expansion_button:
             log.explain("Found session expansion button, skipping it as it has no content")
@@ -1168,19 +1181,19 @@ class IliasPage:
     @staticmethod
     def is_logged_in(soup: BeautifulSoup) -> bool:
         # Normal ILIAS pages
-        mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")
+        mainbar = cast(Optional[Tag], soup.find(class_="il-maincontrols-metabar"))
         if mainbar is not None:
-            login_button = mainbar.find(attrs={"href": lambda x: x and "login.php" in x})
+            login_button = mainbar.find(attrs={"href": lambda x: x is not None and "login.php" in x})
             shib_login = soup.find(id="button_shib_login")
             return not login_button and not shib_login
 
         # Personal Desktop
-        if soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x}):
+        if soup.find("a", attrs={"href": lambda x: x is not None and "block_type=pditems" in x}):
             return True
 
         # Empty personal desktop has zero (0) markers. Match on the text...
         if alert := soup.select_one(".alert-info"):
-            text = alert.getText().lower()
+            text = alert.get_text().lower()
             if "you have not yet selected any favourites" in text:
                 return True
             if "sie haben aktuell noch keine favoriten ausgewählt" in text:
@@ -1208,7 +1221,7 @@ class IliasPage:
         """
         Create an absolute url from an <a> tag.
         """
-        return self._abs_url_from_relative(link_tag.get("href"))
+        return self._abs_url_from_relative(cast(str, link_tag.get("href")))
 
     def _abs_url_from_relative(self, relative_url: str) -> str:
         """
@@ -1218,10 +1231,10 @@ class IliasPage:
 
     @staticmethod
     def get_soup_permalink(soup: BeautifulSoup) -> Optional[str]:
-        perma_link_element: Tag = soup.select_one(".il-footer-permanent-url > a")
+        perma_link_element = cast(Tag, soup.select_one(".il-footer-permanent-url > a"))
         if not perma_link_element or not perma_link_element.get("href"):
             return None
-        return perma_link_element.get("href")
+        return cast(Optional[str], perma_link_element.get("href"))
 
 
 def _unexpected_html_warning() -> None:
@@ -1298,11 +1311,11 @@ def _sanitize_path_name(name: str) -> str:
     return name.replace("/", "-").replace("\\", "-").strip()
 
 
-def parse_ilias_forum_export(forum_export: BeautifulSoup) -> List[IliasForumThread]:
+def parse_ilias_forum_export(forum_export: BeautifulSoup) -> list[IliasForumThread]:
     elements = []
     for p in forum_export.select("body > p"):
         title_tag = p
-        content_tag = p.find_next_sibling("ul")
+        content_tag = cast(Optional[Tag], p.find_next_sibling("ul"))
 
         if not content_tag:
             # ILIAS allows users to delete the initial post while keeping the thread open
@@ -1310,7 +1323,7 @@ def parse_ilias_forum_export(forum_export: BeautifulSoup) -> List[IliasForumThre
             # I am not sure why you would want this, but ILIAS makes it easy to do.
             continue
 
-        title = p.find("b").text
+        title = cast(Tag, p.find("b")).text
         if ":" in title:
             title = title[title.find(":") + 1:]
         title = title.strip()
@@ -1321,7 +1334,7 @@ def parse_ilias_forum_export(forum_export: BeautifulSoup) -> List[IliasForumThre
 
 
 def _guess_timestamp_from_forum_post_content(content: Tag) -> Optional[datetime]:
-    posts: Optional[Tag] = content.select(".ilFrmPostHeader > span.small")
+    posts = cast(Optional[Tag], content.select(".ilFrmPostHeader > span.small"))
     if not posts:
         return None
 
diff --git a/PFERD/crawl/ilias/shibboleth_login.py b/PFERD/crawl/ilias/shibboleth_login.py
index ab59f25..7e725f0 100644
--- a/PFERD/crawl/ilias/shibboleth_login.py
+++ b/PFERD/crawl/ilias/shibboleth_login.py
@@ -1,8 +1,8 @@
-from typing import Any, Optional
+from typing import Any, Optional, cast
 
 import aiohttp
 import yarl
-from bs4 import BeautifulSoup
+from bs4 import BeautifulSoup, Tag
 
 from ...auth import Authenticator, TfaAuthenticator
 from ...logging import log
@@ -48,8 +48,8 @@ class ShibbolethLogin:
         while not self._login_successful(soup):
             # Searching the form here so that this fails before asking for
             # credentials rather than after asking.
-            form = soup.find("form", {"method": "post"})
-            action = form["action"]
+            form = cast(Tag, soup.find("form", {"method": "post"}))
+            action = cast(str, form["action"])
 
             # Equivalent: Enter credentials in
             # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
@@ -62,7 +62,7 @@ class ShibbolethLogin:
                 "fudis_web_authn_assertion_input": "",
             }
             if csrf_token_input := form.find("input", {"name": "csrf_token"}):
-                data["csrf_token"] = csrf_token_input["value"]
+                data["csrf_token"] = csrf_token_input["value"]  # type: ignore
             soup = await _post(sess, url, data)
 
             if soup.find(id="attributeRelease"):
@@ -79,14 +79,14 @@ class ShibbolethLogin:
 
         # Equivalent: Being redirected via JS automatically
         # (or clicking "Continue" if you have JS disabled)
-        relay_state = soup.find("input", {"name": "RelayState"})
-        saml_response = soup.find("input", {"name": "SAMLResponse"})
-        url = form = soup.find("form", {"method": "post"})["action"]
+        relay_state = cast(Tag, soup.find("input", {"name": "RelayState"}))
+        saml_response = cast(Tag, soup.find("input", {"name": "SAMLResponse"}))
+        url = form = soup.find("form", {"method": "post"})["action"]  # type: ignore
         data = {  # using the info obtained in the while loop above
-            "RelayState": relay_state["value"],
-            "SAMLResponse": saml_response["value"],
+            "RelayState": cast(str, relay_state["value"]),
+            "SAMLResponse": cast(str, saml_response["value"]),
         }
-        await sess.post(url, data=data)
+        await sess.post(cast(str, url), data=data)
 
     async def _authenticate_tfa(
         self, session: aiohttp.ClientSession, soup: BeautifulSoup, shib_url: yarl.URL
@@ -98,8 +98,8 @@ class ShibbolethLogin:
 
         # Searching the form here so that this fails before asking for
         # credentials rather than after asking.
-        form = soup.find("form", {"method": "post"})
-        action = form["action"]
+        form = cast(Tag, soup.find("form", {"method": "post"}))
+        action = cast(str, form["action"])
 
         # Equivalent: Enter token in
         # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
@@ -110,7 +110,7 @@ class ShibbolethLogin:
             "fudis_otp_input": tfa_token,
         }
         if csrf_token_input := form.find("input", {"name": "csrf_token"}):
-            data["csrf_token"] = csrf_token_input["value"]
+            data["csrf_token"] = csrf_token_input["value"]  # type: ignore
         return await _post(session, url, data)
 
     @staticmethod
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index e1d13a7..21d9dec 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -3,7 +3,7 @@ import re
 from dataclasses import dataclass
 from datetime import datetime
 from pathlib import PurePath
-from typing import Any, Awaitable, Generator, Iterable, List, Optional, Pattern, Tuple, Union
+from typing import Any, Awaitable, Generator, Iterable, List, Optional, Pattern, Tuple, Union, cast
 from urllib.parse import urljoin
 
 from bs4 import BeautifulSoup, Tag
@@ -156,11 +156,11 @@ class KitIpdCrawler(HttpCrawler):
         name = os.path.basename(url)
         return KitIpdFile(name, url)
 
-    def _find_file_links(self, tag: Union[Tag, BeautifulSoup]) -> List[Tag]:
-        return tag.findAll(name="a", attrs={"href": self._file_regex})
+    def _find_file_links(self, tag: Union[Tag, BeautifulSoup]) -> list[Tag]:
+        return cast(list[Tag], tag.find_all(name="a", attrs={"href": self._file_regex}))
 
     def _abs_url_from_link(self, url: str, link_tag: Tag) -> str:
-        return urljoin(url, link_tag.get("href"))
+        return urljoin(url, cast(str, link_tag.get("href")))
 
     async def _stream_from_url(self, url: str, path: PurePath, sink: FileSink, bar: ProgressBar) -> None:
         async with self.session.get(url, allow_redirects=False) as resp:

From 5f88539f7ed9bd06838de662a85dfa6027fb91a6 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 19 Feb 2025 12:05:20 +0100
Subject: [PATCH 185/224] Fix page size increase for forum threads

---
 PFERD/crawl/ilias/kit_ilias_html.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index ee61cab..f6fa423 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -286,11 +286,9 @@ class IliasPage:
         return None
 
     def _is_forum_page(self) -> bool:
-        read_more_btn = self._soup.find(
-            "button",
-            attrs={"onclick": lambda x: x is not None and "cmdClass=ilobjforumgui&cmd=markAllRead" in x}
-        )
-        return read_more_btn is not None
+        if perma_link := self.get_permalink():
+            return "target=frm_" in perma_link
+        return False
 
     def _is_video_player(self) -> bool:
         return "paella_config_file" in str(self._soup)

From 2f0e792670559fe98572eb937feafa95de41e9bd Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 19 Feb 2025 12:06:07 +0100
Subject: [PATCH 186/224] Increase default http timeout to 30

Otherwise larger forums will fail to download in time
---
 PFERD/crawl/http_crawler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index 1c4631c..471bf1e 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -22,7 +22,7 @@ ETAGS_CUSTOM_REPORT_VALUE_KEY = "etags"
 
 class HttpCrawlerSection(CrawlerSection):
     def http_timeout(self) -> float:
-        return self.s.getfloat("http_timeout", fallback=20)
+        return self.s.getfloat("http_timeout", fallback=30)
 
 
 class HttpCrawler(Crawler):

From ba2833dba5669e9b748cb64bbf98890f5d1299ae Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 19 Feb 2025 12:07:19 +0100
Subject: [PATCH 187/224] Crawl all threads in a forum

Before this patch the row count was unconditionally changed to 800. This
patch tries to detect how many rows the forum has and then fetches this
amount, if it is larger than 800.
---
 CHANGELOG.md                        |  1 +
 PFERD/crawl/ilias/kit_ilias_html.py | 37 ++++++++++++++++++++++++++---
 2 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2ff98bc..572f8c7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ ambiguous situations.
 ## Fixed
 - File links in report on Windows
 - TOTP authentication in KIT Shibboleth
+- Forum crawling only considering the first 20 entries
 
 ## 3.7.0 - 2024-11-13
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index f6fa423..a194856 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -264,10 +264,22 @@ class IliasPage:
 
     def get_next_stage_element(self) -> Optional[IliasPageElement]:
         if self._is_forum_page():
-            if "trows=800" in self._page_url:
+            if "trows=" in self._page_url:
+                log.explain("Manual row override detected, accepting it as good")
                 return None
             log.explain("Requesting *all* forum threads")
-            return self._get_show_max_forum_entries_per_page_url()
+            thread_count = self._get_forum_thread_count()
+            if thread_count is not None and thread_count > 400:
+                log.warn(
+                    "Forum has more than 400 threads, fetching all threads will take a while. "
+                    "You might need to adjust your http_timeout config option."
+                )
+
+            # Fetch at least 400 in case we detect it wrong
+            if thread_count is not None and thread_count < 400:
+                thread_count = 400
+
+            return self._get_show_max_forum_entries_per_page_url(thread_count)
         if self._is_ilias_opencast_embedding():
             log.explain("Unwrapping opencast embedding")
             return self.get_child_elements()[0]
@@ -414,7 +426,9 @@ class IliasPage:
 
         return items
 
-    def _get_show_max_forum_entries_per_page_url(self) -> Optional[IliasPageElement]:
+    def _get_show_max_forum_entries_per_page_url(
+        self, wanted_max: Optional[int] = None
+    ) -> Optional[IliasPageElement]:
         correct_link = cast(Optional[Tag], self._soup.find(
             "a",
             attrs={"href": lambda x: x is not None and "trows=800" in x and "cmd=showThreads" in x}
@@ -424,9 +438,26 @@ class IliasPage:
             return None
 
         link = self._abs_url_from_link(correct_link)
+        if wanted_max is not None:
+            link = link.replace("trows=800", f"trows={wanted_max}")
 
         return IliasPageElement.create_new(IliasElementType.FORUM, link, "show all forum threads")
 
+    def _get_forum_thread_count(self) -> Optional[int]:
+        log.explain_topic("Trying to find forum thread count")
+
+        candidates = cast(list[Tag], self._soup.select(".ilTableFootLight"))
+        extract_regex = re.compile(r"\s(?P<max>\d+)\s*\)")
+
+        for candidate in candidates:
+            log.explain(f"Found thread count candidate: {candidate}")
+            if match := extract_regex.search(candidate.get_text()):
+                return int(match.group("max"))
+        else:
+            log.explain("Found no candidates to extract thread count from")
+
+        return None
+
     def _find_personal_desktop_entries(self) -> list[IliasPageElement]:
         items: list[IliasPageElement] = []
 

From be175f9347ea73160839de643e089db328cf78df Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 19 Feb 2025 14:18:43 +0100
Subject: [PATCH 188/224] Download only new/updated forum threads

---
 PFERD/crawl/crawler.py                 | 29 +++++++++++
 PFERD/crawl/ilias/ilias_web_crawler.py | 60 +++++++++++++++++------
 PFERD/crawl/ilias/kit_ilias_html.py    | 67 +++++++++++++++++++++-----
 PFERD/output_dir.py                    | 16 ++++++
 4 files changed, 145 insertions(+), 27 deletions(-)

diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index fda1307..74616e0 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -294,6 +294,35 @@ class Crawler(ABC):
         log.explain("Answer: Yes")
         return CrawlToken(self._limiter, path)
 
+    def should_try_download(
+            self,
+            path: PurePath,
+            *,
+            etag_differs: Optional[bool] = None,
+            mtime: Optional[datetime] = None,
+            redownload: Optional[Redownload] = None,
+            on_conflict: Optional[OnConflict] = None,
+    ) -> bool:
+        log.explain_topic(f"Decision: Should Download {fmt_path(path)}")
+
+        if self._transformer.transform(path) is None:
+            log.explain("Answer: No (ignored)")
+            return False
+
+        should_download = self._output_dir.should_try_download(
+            path,
+            etag_differs=etag_differs,
+            mtime=mtime,
+            redownload=redownload,
+            on_conflict=on_conflict
+        )
+        if should_download:
+            log.explain("Answer: Yes")
+            return True
+        else:
+            log.explain("Answer: No")
+            return False
+
     async def download(
             self,
             path: PurePath,
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 557150c..7351593 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -723,20 +723,52 @@ instance's greatest bottleneck.
                 else:
                     break
 
-            download_data = cast(IliasPage, page).get_download_forum_data()
-            if not download_data:
-                raise CrawlWarning("Failed to extract forum data")
-            if download_data.empty:
+            forum_threads: list[tuple[IliasPageElement, bool]] = []
+            for entry in cast(IliasPage, page).get_forum_entries():
+                path = cl.path / (_sanitize_path_name(entry.name) + ".html")
+                forum_threads.append((entry, self.should_try_download(path, mtime=entry.mtime)))
+
+            # Sort the ids. The forum download will *preserve* this ordering
+            forum_threads.sort(key=lambda elem: elem[0].id())
+
+            if not forum_threads:
                 log.explain("Forum had no threads")
                 return
-            html = await self._post_authenticated(download_data.url, download_data.form_data)
-            elements = parse_ilias_forum_export(soupify(html))
 
-        elements.sort(key=lambda elem: elem.title)
+            download_data = cast(IliasPage, page).get_download_forum_data(
+                [thread.id() for thread, download in forum_threads if download]
+            )
+            if not download_data:
+                raise CrawlWarning("Failed to extract forum data")
+
+            if not download_data.empty:
+                html = await self._post_authenticated(download_data.url, download_data.form_data)
+                elements = parse_ilias_forum_export(soupify(html))
+            else:
+                elements = []
+
+        # Verify that ILIAS does not change the order, as we depend on it later. Otherwise, we could not call
+        # download in the correct order, potentially messing up duplication handling.
+        expected_element_titles = [thread.name for thread, download in forum_threads if download]
+        actual_element_titles = [_sanitize_path_name(thread.name) for thread in elements]
+        if expected_element_titles != actual_element_titles:
+            raise CrawlWarning(
+                f"Forum thread order mismatch: {expected_element_titles} != {actual_element_titles}"
+            )
 
         tasks: List[Awaitable[None]] = []
-        for elem in elements:
-            tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, elem)))
+        for thread, download in forum_threads:
+            if download:
+                # This only works because ILIAS keeps the order in the export
+                elem = elements.pop(0)
+                tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, elem)))
+            else:
+                # We only downloaded the threads we "should_try_download"ed. This can be an
+                # over-approximation and all will be fine.
+                # If we selected too few, e.g. because there was a duplicate title and the mtime of the
+                # original is newer than the update of the duplicate.
+                # This causes stale data locally, but I consider this problem acceptable right now.
+                tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, thread)))
 
         # And execute them
         await self.gather(tasks)
@@ -746,17 +778,17 @@ instance's greatest bottleneck.
     async def _download_forum_thread(
         self,
         parent_path: PurePath,
-        element: IliasForumThread,
+        element: Union[IliasForumThread, IliasPageElement]
     ) -> None:
-        path = parent_path / (_sanitize_path_name(element.title) + ".html")
+        path = parent_path / (_sanitize_path_name(element.name) + ".html")
         maybe_dl = await self.download(path, mtime=element.mtime)
-        if not maybe_dl:
+        if not maybe_dl or not isinstance(element, IliasForumThread):
             return
 
         async with maybe_dl as (bar, sink):
             content = "<!DOCTYPE html>\n"
-            content += cast(str, element.title_tag.prettify())
-            content += cast(str, element.content_tag.prettify())
+            content += cast(str, element.name_tag.prettify())
+            content += cast(str, await self.internalize_images(element.content_tag.prettify()))
             sink.file.write(content.encode("utf-8"))
             sink.done()
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index a194856..7956b00 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -22,6 +22,7 @@ class IliasElementType(Enum):
     FILE = "file"
     FOLDER = "folder"
     FORUM = "forum"
+    FORUM_THREAD = "forum_thread"
     INFO_TAB = "info_tab"
     LEARNING_MODULE = "learning_module"
     LINK = "link"
@@ -54,6 +55,7 @@ class IliasPageElement:
             r"fold_(?P<id>\d+)",
             r"frm_(?P<id>\d+)",
             r"exc_(?P<id>\d+)",
+            r"thr_pk=(?P<id>\d+)",  # forums
             r"ref_id=(?P<id>\d+)",
             r"target=[a-z]+_(?P<id>\d+)",
             r"mm_(?P<id>\d+)"
@@ -123,8 +125,8 @@ class IliasDownloadForumData:
 
 @dataclass
 class IliasForumThread:
-    title: str
-    title_tag: Tag
+    name: str
+    name_tag: Tag
     content_tag: Tag
     mtime: Optional[datetime]
 
@@ -242,7 +244,36 @@ class IliasPage:
             return url
         return None
 
-    def get_download_forum_data(self) -> Optional[IliasDownloadForumData]:
+    def get_forum_entries(self) -> list[IliasPageElement]:
+        form = self._get_forum_form()
+        if not form:
+            return []
+        threads = []
+
+        for row in cast(list[Tag], form.select("table > tbody > tr")):
+            url_tag = cast(
+                Optional[Tag],
+                row.find(name="a", attrs={"href": lambda x: x is not None and "cmd=viewthread" in x.lower()})
+            )
+            if url_tag is None:
+                log.explain(f"Skipping row without URL: {row}")
+                continue
+            name = url_tag.get_text().strip()
+            columns = [td.get_text().strip() for td in cast(list[Tag], row.find_all(name="td"))]
+            potential_dates_opt = [IliasPage._find_date_in_text(column) for column in columns]
+            potential_dates = [x for x in potential_dates_opt if x is not None]
+            mtime = max(potential_dates) if potential_dates else None
+
+            threads.append(IliasPageElement.create_new(
+                IliasElementType.FORUM_THREAD,
+                self._abs_url_from_link(url_tag),
+                name,
+                mtime=mtime
+            ))
+
+        return threads
+
+    def get_download_forum_data(self, thread_ids: list[str]) -> Optional[IliasDownloadForumData]:
         form = cast(Optional[Tag], self._soup.find(
             "form",
             attrs={"action": lambda x: x is not None and "fallbackCmd=showThreads" in x}
@@ -251,7 +282,7 @@ class IliasPage:
             return None
         post_url = self._abs_url_from_relative(cast(str, form["action"]))
 
-        thread_ids = [f["value"] for f in cast(list[Tag], form.find_all(attrs={"name": "thread_ids[]"}))]
+        log.explain(f"Fetching forum threads {thread_ids}")
 
         form_data: Dict[str, Union[str, list[str]]] = {
             "thread_ids[]": cast(list[str], thread_ids),
@@ -262,6 +293,12 @@ class IliasPage:
 
         return IliasDownloadForumData(url=post_url, form_data=form_data, empty=len(thread_ids) == 0)
 
+    def _get_forum_form(self) -> Optional[Tag]:
+        return cast(Optional[Tag], self._soup.find(
+            "form",
+            attrs={"action": lambda x: x is not None and "fallbackCmd=showThreads" in x}
+        ))
+
     def get_next_stage_element(self) -> Optional[IliasPageElement]:
         if self._is_forum_page():
             if "trows=" in self._page_url:
@@ -950,16 +987,9 @@ class IliasPage:
         # The rest does not have a stable order. Grab the whole text and reg-ex the date
         # out of it
         all_properties_text = properties_parent.get_text().strip()
-        modification_date_match = re.search(
-            r"(((\d+\. \w+ \d+)|(Gestern|Yesterday)|(Heute|Today)|(Morgen|Tomorrow)), \d+:\d+)",
-            all_properties_text
-        )
-        if modification_date_match is None:
-            modification_date = None
+        modification_date = IliasPage._find_date_in_text(all_properties_text)
+        if modification_date is None:
             log.explain(f"Element {name} at {url} has no date.")
-        else:
-            modification_date_str = modification_date_match.group(1)
-            modification_date = demangle_date(modification_date_str)
 
         # Grab the name from the link text
         full_path = name + "." + file_type
@@ -1243,6 +1273,17 @@ class IliasPage:
             return True
         return False
 
+    @staticmethod
+    def _find_date_in_text(text: str) -> Optional[datetime]:
+        modification_date_match = re.search(
+            r"(((\d+\. \w+ \d+)|(Gestern|Yesterday)|(Heute|Today)|(Morgen|Tomorrow)), \d+:\d+)",
+            text
+        )
+        if modification_date_match is not None:
+            modification_date_str = modification_date_match.group(1)
+            return demangle_date(modification_date_str)
+        return None
+
     def get_permalink(self) -> Optional[str]:
         return IliasPage.get_soup_permalink(self._soup)
 
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 09cf133..94337b6 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -371,6 +371,22 @@ class OutputDirectory:
 
         raise OutputDirError("Failed to create temporary file")
 
+    def should_try_download(
+        self,
+        path: PurePath,
+        *,
+        etag_differs: Optional[bool] = None,
+        mtime: Optional[datetime] = None,
+        redownload: Optional[Redownload] = None,
+        on_conflict: Optional[OnConflict] = None,
+    ) -> bool:
+        heuristics = Heuristics(etag_differs, mtime)
+        redownload = self._redownload if redownload is None else redownload
+        on_conflict = self._on_conflict if on_conflict is None else on_conflict
+        local_path = self.resolve(path)
+
+        return self._should_download(local_path, heuristics, redownload, on_conflict)
+
     async def download(
             self,
             remote_path: PurePath,

From 72cd0f77e2d7e58a3505961f3c58b94636e24156 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 19 Feb 2025 14:29:37 +0100
Subject: [PATCH 189/224] Prettify forum thread exports

Co-authored-by: Tim <me@scriptim.dev>
---
 CHANGELOG.md                           |  3 +
 PFERD/crawl/ilias/file_templates.py    | 89 ++++++++++++++++++++++++++
 PFERD/crawl/ilias/ilias_web_crawler.py | 12 ++--
 3 files changed, 99 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 572f8c7..ae82e4f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Changed
+- Added prettier CSS to forum threads
+
 ## Fixed
 - File links in report on Windows
 - TOTP authentication in KIT Shibboleth
diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index 0a72199..e148875 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -126,6 +126,88 @@ _learning_module_template = """
 </html>
 """
 
+_forum_thread_template = """
+<!DOCTYPE html>
+<html lang="en">
+    <head>
+        <meta charset="UTF-8">
+        <title>ILIAS - Forum: {{name}}</title>
+        <style>
+            * {
+                box-sizing: border-box;
+            }
+            body {
+                font-family: 'Open Sans', Verdana, Arial, Helvetica, sans-serif;
+                padding: 8px;
+            }
+            ul, ol, p {
+                margin: 1.2em 0;
+            }
+            p {
+                margin-top: 8px;
+                margin-bottom: 8px;
+            }
+            a {
+                color: #00876c;
+                text-decoration: none;
+                cursor: pointer;
+            }
+            a:hover {
+                text-decoration: underline;
+            }
+            body > p:first-child > span:first-child {
+                font-size: 1.6em;
+            }
+            body > p:first-child > span:first-child ~ span.default {
+                display: inline-block;
+                font-size: 1.2em;
+                padding-bottom: 8px;
+            }
+            .ilFrmPostContent {
+                margin-top: 8px;
+                max-width: 64em;
+            }
+            .ilFrmPostContent > *:first-child {
+                margin-top: 0px;
+            }
+            .ilFrmPostTitle {
+                margin-top: 24px;
+                color: #00876c;
+                font-weight: bold;
+            }
+            #ilFrmPostList {
+                list-style: none;
+                padding-left: 0;
+            }
+            li.ilFrmPostRow {
+                padding: 3px 0 3px 3px;
+                margin-bottom: 24px;
+                border-left: 6px solid #dddddd;
+            }
+            .ilFrmPostRow > div {
+                display: flex;
+            }
+            .ilFrmPostImage img {
+                margin: 0 !important;
+                padding: 6px 9px 9px 6px;
+            }
+            .ilUserIcon {
+                width: 115px;
+            }
+            .small {
+                text-decoration: none;
+                font-size: 0.75rem;
+                color: #6f6f6f;
+            }
+        </style>
+    </head>
+    <body>
+    {{heading}}
+    {{content}}
+    </body>
+</html>
+""".strip()  # noqa: E501 line too long
+
 
 def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next: Optional[str]) -> str:
     # Seems to be comments, ignore those.
@@ -164,6 +246,13 @@ def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next
     return _learning_module_template.replace("{{body}}", body_str).replace("{{name}}", name)
 
 
+def forum_thread_template(name: str, heading: bs4.Tag, content: bs4.Tag) -> str:
+    return _forum_thread_template \
+        .replace("{{name}}", name) \
+        .replace("{{heading}}", cast(str, heading.prettify())) \
+        .replace("{{content}}", cast(str, content.prettify()))
+
+
 class Links(Enum):
     IGNORE = "ignore"
     PLAINTEXT = "plaintext"
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 7351593..bc90991 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -19,7 +19,7 @@ from ...utils import fmt_path, soupify, url_set_query_param
 from ..crawler import CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
 from ..http_crawler import HttpCrawler, HttpCrawlerSection
 from .async_helper import _iorepeat
-from .file_templates import Links, learning_module_template
+from .file_templates import Links, forum_thread_template, learning_module_template
 from .ilias_html_cleaner import clean, insert_base_markup
 from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
                              IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
@@ -786,10 +786,12 @@ instance's greatest bottleneck.
             return
 
         async with maybe_dl as (bar, sink):
-            content = "<!DOCTYPE html>\n"
-            content += cast(str, element.name_tag.prettify())
-            content += cast(str, await self.internalize_images(element.content_tag.prettify()))
-            sink.file.write(content.encode("utf-8"))
+            rendered = forum_thread_template(
+                element.name,
+                element.name_tag,
+                element.content_tag
+            )
+            sink.file.write(rendered.encode("utf-8"))
             sink.done()
 
     async def _handle_learning_module(

From edc482cdf487e4a3bcf93e2dad7d69cbd3f32974 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 19 Feb 2025 14:45:18 +0100
Subject: [PATCH 190/224] Internalize images in forum threads

---
 PFERD/crawl/ilias/ilias_web_crawler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index bc90991..76cbe6b 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -789,7 +789,7 @@ instance's greatest bottleneck.
             rendered = forum_thread_template(
                 element.name,
                 element.name_tag,
-                element.content_tag
+                await self.internalize_images(element.content_tag)
             )
             sink.file.write(rendered.encode("utf-8"))
             sink.done()

From c8eff04ae0dc4b6528e5e8f25fb38b52fe6fd249 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 19 Feb 2025 14:56:32 +0100
Subject: [PATCH 191/224] Make thread titles link to original ILIAS thread

---
 PFERD/crawl/ilias/file_templates.py    |  4 +++-
 PFERD/crawl/ilias/ilias_web_crawler.py | 20 +++++++++++---------
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index e148875..ae8bb1e 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -246,7 +246,9 @@ def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next
     return _learning_module_template.replace("{{body}}", body_str).replace("{{name}}", name)
 
 
-def forum_thread_template(name: str, heading: bs4.Tag, content: bs4.Tag) -> str:
+def forum_thread_template(name: str, url: str, heading: bs4.Tag, content: bs4.Tag) -> str:
+    if title := cast(Optional[bs4.Tag], heading.find(name="b")):
+        title.wrap(bs4.Tag(name="a", attrs={"href": url}))
     return _forum_thread_template \
         .replace("{{name}}", name) \
         .replace("{{heading}}", cast(str, heading.prettify())) \
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 76cbe6b..add49ee 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -761,14 +761,14 @@ instance's greatest bottleneck.
             if download:
                 # This only works because ILIAS keeps the order in the export
                 elem = elements.pop(0)
-                tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, elem)))
+                tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, elem, thread)))
             else:
                 # We only downloaded the threads we "should_try_download"ed. This can be an
                 # over-approximation and all will be fine.
                 # If we selected too few, e.g. because there was a duplicate title and the mtime of the
                 # original is newer than the update of the duplicate.
                 # This causes stale data locally, but I consider this problem acceptable right now.
-                tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, thread)))
+                tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, thread, thread)))
 
         # And execute them
         await self.gather(tasks)
@@ -778,18 +778,20 @@ instance's greatest bottleneck.
     async def _download_forum_thread(
         self,
         parent_path: PurePath,
-        element: Union[IliasForumThread, IliasPageElement]
+        thread: Union[IliasForumThread, IliasPageElement],
+        element: IliasPageElement
     ) -> None:
-        path = parent_path / (_sanitize_path_name(element.name) + ".html")
-        maybe_dl = await self.download(path, mtime=element.mtime)
-        if not maybe_dl or not isinstance(element, IliasForumThread):
+        path = parent_path / (_sanitize_path_name(thread.name) + ".html")
+        maybe_dl = await self.download(path, mtime=thread.mtime)
+        if not maybe_dl or not isinstance(thread, IliasForumThread):
             return
 
         async with maybe_dl as (bar, sink):
             rendered = forum_thread_template(
-                element.name,
-                element.name_tag,
-                await self.internalize_images(element.content_tag)
+                thread.name,
+                element.url,
+                thread.name_tag,
+                await self.internalize_images(thread.content_tag)
             )
             sink.file.write(rendered.encode("utf-8"))
             sink.done()

From 63f25277b04a46e415da4f994f17e2b211ddbaf9 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 9 Mar 2025 23:44:25 +0100
Subject: [PATCH 192/224] Fix crawling of empty forum threads

---
 PFERD/crawl/ilias/kit_ilias_html.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 7956b00..963ab05 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -1387,16 +1387,18 @@ def parse_ilias_forum_export(forum_export: BeautifulSoup) -> list[IliasForumThre
         title_tag = p
         content_tag = cast(Optional[Tag], p.find_next_sibling("ul"))
 
-        if not content_tag:
-            # ILIAS allows users to delete the initial post while keeping the thread open
-            # This produces empty threads without *any* content.
-            # I am not sure why you would want this, but ILIAS makes it easy to do.
-            continue
-
         title = cast(Tag, p.find("b")).text
         if ":" in title:
             title = title[title.find(":") + 1:]
         title = title.strip()
+
+        if not content_tag or content_tag.find_previous_sibling("p") != title_tag:
+            # ILIAS allows users to delete the initial post while keeping the thread open
+            # This produces empty threads without *any* content.
+            # I am not sure why you would want this, but ILIAS makes it easy to do.
+            elements.append(IliasForumThread(title, title_tag, forum_export.new_tag("ul"), None))
+            continue
+
         mtime = _guess_timestamp_from_forum_post_content(content_tag)
         elements.append(IliasForumThread(title, title_tag, content_tag, mtime))
 

From 477234ad0d1827c0ec3c7e0e7783af365639a943 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 12 Apr 2025 14:54:58 +0200
Subject: [PATCH 193/224] Support ILIAS 9

---
 CHANGELOG.md                           |   6 +
 PFERD/crawl/ilias/ilias_web_crawler.py | 238 ++++----
 PFERD/crawl/ilias/kit_ilias_html.py    | 759 +++++++++++++++----------
 3 files changed, 571 insertions(+), 432 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ae82e4f..0a26913 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,8 +22,14 @@ ambiguous situations.
 
 ## Unreleased
 
+### Added
+- Support for ILIAS 9
+
 ### Changed
 - Added prettier CSS to forum threads
+- Downloaded forum threads now link to the forum instead of the ILIAS thread
+- Increase minimum supported Python version to 3.11
+- Do not crawl nested courses (courses linked in other courses)
 
 ## Fixed
 - File links in report on Windows
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index add49ee..52ecf92 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -22,7 +22,7 @@ from .async_helper import _iorepeat
 from .file_templates import Links, forum_thread_template, learning_module_template
 from .ilias_html_cleaner import clean, insert_base_markup
 from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
-                             IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
+                             IliasPageElement, IliasSoup, _sanitize_path_name, parse_ilias_forum_export)
 from .shibboleth_login import ShibbolethLogin
 
 TargetType = Union[str, int]
@@ -105,7 +105,6 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
 
 
 _DIRECTORY_PAGES: Set[IliasElementType] = {
-    IliasElementType.COURSE,
     IliasElementType.EXERCISE,
     IliasElementType.EXERCISE_FILES,
     IliasElementType.FOLDER,
@@ -217,11 +216,19 @@ instance's greatest bottleneck.
 
     async def _crawl_desktop(self) -> None:
         await self._crawl_url(
-            urljoin(self._base_url, "/ilias.php?baseClass=ilDashboardGUI&cmd=show")
+            urljoin(self._base_url, "/ilias.php?baseClass=ilDashboardGUI&cmd=show"),
+            crawl_nested_courses=True
         )
 
-    async def _crawl_url(self, url: str, expected_id: Optional[int] = None) -> None:
-        if awaitable := await self._handle_ilias_page(url, None, PurePath("."), expected_id):
+    async def _crawl_url(
+        self,
+        url: str,
+        expected_id: Optional[int] = None,
+        crawl_nested_courses: bool = False
+    ) -> None:
+        if awaitable := await self._handle_ilias_page(
+            url, None, PurePath("."), expected_id, crawl_nested_courses
+        ):
             await awaitable
 
     async def _handle_ilias_page(
@@ -230,6 +237,7 @@ instance's greatest bottleneck.
         current_element: Optional[IliasPageElement],
         path: PurePath,
         expected_course_id: Optional[int] = None,
+        crawl_nested_courses: bool = False
     ) -> Optional[Coroutine[Any, Any, None]]:
         maybe_cl = await self.crawl(path)
         if not maybe_cl:
@@ -237,7 +245,9 @@ instance's greatest bottleneck.
         if current_element:
             self._ensure_not_seen(current_element, path)
 
-        return self._crawl_ilias_page(url, current_element, maybe_cl, expected_course_id)
+        return self._crawl_ilias_page(
+            url, current_element, maybe_cl, expected_course_id, crawl_nested_courses
+        )
 
     @anoncritical
     async def _crawl_ilias_page(
@@ -246,6 +256,7 @@ instance's greatest bottleneck.
         current_element: Optional[IliasPageElement],
         cl: CrawlToken,
         expected_course_id: Optional[int] = None,
+        crawl_nested_courses: bool = False,
     ) -> None:
         elements: List[IliasPageElement] = []
         # A list as variable redefinitions are not propagated to outer scopes
@@ -267,12 +278,12 @@ instance's greatest bottleneck.
                     # If we expect to find a root course, enforce it
                     if current_parent is None and expected_course_id is not None:
                         perma_link = IliasPage.get_soup_permalink(soup)
-                        if not perma_link or "crs_" not in perma_link:
+                        if not perma_link or "crs/" not in perma_link:
                             raise CrawlError("Invalid course id? Didn't find anything looking like a course")
                         if str(expected_course_id) not in perma_link:
                             raise CrawlError(f"Expected course id {expected_course_id} but got {perma_link}")
 
-                    page = IliasPage(soup, next_stage_url, current_parent)
+                    page = IliasPage(soup, current_parent)
                     if next_element := page.get_next_stage_element():
                         current_parent = next_element
                         next_stage_url = next_element.url
@@ -294,7 +305,7 @@ instance's greatest bottleneck.
 
         tasks: List[Awaitable[None]] = []
         for element in elements:
-            if handle := await self._handle_ilias_element(cl.path, element):
+            if handle := await self._handle_ilias_element(cl.path, element, crawl_nested_courses):
                 tasks.append(asyncio.create_task(handle))
 
         # And execute them
@@ -310,6 +321,7 @@ instance's greatest bottleneck.
         self,
         parent_path: PurePath,
         element: IliasPageElement,
+        crawl_nested_courses: bool = False
     ) -> Optional[Coroutine[Any, Any, None]]:
         # element.name might contain `/` if the crawler created nested elements,
         # so we can not sanitize it here. We trust in the output dir to thwart worst-case
@@ -362,6 +374,56 @@ instance's greatest bottleneck.
                 "[bright_black](scorm learning modules are not supported)"
             )
             return None
+        elif element.type == IliasElementType.LITERATURE_LIST:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](literature lists are not currently supported)"
+            )
+            return None
+        elif element.type == IliasElementType.LEARNING_MODULE_HTML:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](HTML learning modules are not supported)"
+            )
+            return None
+        elif element.type == IliasElementType.BLOG:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](blogs are not currently supported)"
+            )
+            return None
+        elif element.type == IliasElementType.DCL_RECORD_LIST:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](dcl record lists are not currently supported)"
+            )
+            return None
+        elif element.type == IliasElementType.MEDIA_POOL:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](media pools are not currently supported)"
+            )
+            return None
+        elif element.type == IliasElementType.COURSE:
+            if crawl_nested_courses:
+                return await self._handle_ilias_page(element.url, element, element_path)
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](not descending into linked course)"
+            )
+            return None
         elif element.type == IliasElementType.LEARNING_MODULE:
             return await self._handle_learning_module(element, element_path)
         elif element.type == IliasElementType.LINK:
@@ -590,7 +652,7 @@ instance's greatest bottleneck.
             )
 
         async with dl as (bar, sink):
-            page = IliasPage(await self._get_page(element.url), element.url, element)
+            page = IliasPage(await self._get_page(element.url), element)
             stream_elements = page.get_child_elements()
 
             if len(stream_elements) > 1:
@@ -600,7 +662,7 @@ instance's greatest bottleneck.
                 stream_element = stream_elements[0]
 
                 # We do not have a local cache yet
-                await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
+                await self._stream_from_url(stream_element, sink, bar, is_video=True)
                 add_to_report([str(self._transformer.transform(dl.path))])
                 return
 
@@ -615,7 +677,7 @@ instance's greatest bottleneck.
             async with maybe_dl as (bar, sink):
                 log.explain(f"Streaming video from real url {stream_element.url}")
                 contained_video_paths.append(str(self._transformer.transform(maybe_dl.path)))
-                await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
+                await self._stream_from_url(stream_element, sink, bar, is_video=True)
 
         add_to_report(contained_video_paths)
 
@@ -637,12 +699,19 @@ instance's greatest bottleneck.
     async def _download_file(self, element: IliasPageElement, dl: DownloadToken, is_video: bool) -> None:
         assert dl  # The function is only reached when dl is not None
         async with dl as (bar, sink):
-            await self._stream_from_url(element.url, sink, bar, is_video)
+            await self._stream_from_url(element, sink, bar, is_video)
+
+    async def _stream_from_url(
+        self,
+        element: IliasPageElement,
+        sink: FileSink,
+        bar: ProgressBar,
+        is_video: bool
+    ) -> None:
+        url = element.url
 
-    async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar, is_video: bool) -> None:
         async def try_stream() -> bool:
             next_url = url
-
             # Normal files redirect to the magazine if we are not authenticated. As files could be HTML,
             # we can not match on the content type here. Instead, we disallow redirects and inspect the
             # new location. If we are redirected anywhere but the ILIAS 8 "sendfile" command, we assume
@@ -690,7 +759,7 @@ instance's greatest bottleneck.
         await self.authenticate(auth_id)
 
         if not await try_stream():
-            raise CrawlError("File streaming failed after authenticate()")
+            raise CrawlError(f"File streaming failed after authenticate() {element!r}")
 
     async def _handle_forum(
         self,
@@ -705,70 +774,23 @@ instance's greatest bottleneck.
     @_iorepeat(3, "crawling forum")
     @anoncritical
     async def _crawl_forum(self, element: IliasPageElement, cl: CrawlToken) -> None:
-        elements: List[IliasForumThread] = []
-
         async with cl:
-            next_stage_url = element.url
-            page = None
-
-            while next_stage_url:
-                log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
-                log.explain(f"URL: {next_stage_url}")
-
-                soup = await self._get_page(next_stage_url)
-                page = IliasPage(soup, next_stage_url, element)
-
-                if next := page.get_next_stage_element():
-                    next_stage_url = next.url
-                else:
-                    break
-
-            forum_threads: list[tuple[IliasPageElement, bool]] = []
-            for entry in cast(IliasPage, page).get_forum_entries():
-                path = cl.path / (_sanitize_path_name(entry.name) + ".html")
-                forum_threads.append((entry, self.should_try_download(path, mtime=entry.mtime)))
-
-            # Sort the ids. The forum download will *preserve* this ordering
-            forum_threads.sort(key=lambda elem: elem[0].id())
-
-            if not forum_threads:
-                log.explain("Forum had no threads")
+            inner = IliasPage(await self._get_page(element.url), element)
+            export_url = inner.get_forum_export_url()
+            if not export_url:
+                log.warn("Could not extract forum export url")
                 return
 
-            download_data = cast(IliasPage, page).get_download_forum_data(
-                [thread.id() for thread, download in forum_threads if download]
-            )
-            if not download_data:
-                raise CrawlWarning("Failed to extract forum data")
+            export = await self._post(export_url, {
+                "format": "html",
+                "cmd[createExportFile]": ""
+            })
 
-            if not download_data.empty:
-                html = await self._post_authenticated(download_data.url, download_data.form_data)
-                elements = parse_ilias_forum_export(soupify(html))
-            else:
-                elements = []
-
-        # Verify that ILIAS does not change the order, as we depend on it later. Otherwise, we could not call
-        # download in the correct order, potentially messing up duplication handling.
-        expected_element_titles = [thread.name for thread, download in forum_threads if download]
-        actual_element_titles = [_sanitize_path_name(thread.name) for thread in elements]
-        if expected_element_titles != actual_element_titles:
-            raise CrawlWarning(
-                f"Forum thread order mismatch: {expected_element_titles} != {actual_element_titles}"
-            )
+            elements = parse_ilias_forum_export(soupify(export))
 
         tasks: List[Awaitable[None]] = []
-        for thread, download in forum_threads:
-            if download:
-                # This only works because ILIAS keeps the order in the export
-                elem = elements.pop(0)
-                tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, elem, thread)))
-            else:
-                # We only downloaded the threads we "should_try_download"ed. This can be an
-                # over-approximation and all will be fine.
-                # If we selected too few, e.g. because there was a duplicate title and the mtime of the
-                # original is newer than the update of the duplicate.
-                # This causes stale data locally, but I consider this problem acceptable right now.
-                tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, thread, thread)))
+        for thread in elements:
+            tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, thread, element.url)))
 
         # And execute them
         await self.gather(tasks)
@@ -779,7 +801,7 @@ instance's greatest bottleneck.
         self,
         parent_path: PurePath,
         thread: Union[IliasForumThread, IliasPageElement],
-        element: IliasPageElement
+        forum_url: str
     ) -> None:
         path = parent_path / (_sanitize_path_name(thread.name) + ".html")
         maybe_dl = await self.download(path, mtime=thread.mtime)
@@ -789,7 +811,7 @@ instance's greatest bottleneck.
         async with maybe_dl as (bar, sink):
             rendered = forum_thread_template(
                 thread.name,
-                element.url,
+                forum_url,
                 thread.name_tag,
                 await self.internalize_images(thread.content_tag)
             )
@@ -817,7 +839,7 @@ instance's greatest bottleneck.
             log.explain_topic(f"Parsing initial HTML page for {fmt_path(cl.path)}")
             log.explain(f"URL: {element.url}")
             soup = await self._get_page(element.url)
-            page = IliasPage(soup, element.url, element)
+            page = IliasPage(soup, element)
             if next := page.get_learning_module_data():
                 elements.extend(await self._crawl_learning_module_direction(
                     cl.path, next.previous_url, "left", element
@@ -860,7 +882,7 @@ instance's greatest bottleneck.
             log.explain_topic(f"Parsing HTML page for {fmt_path(path)} ({dir}-{counter})")
             log.explain(f"URL: {next_element_url}")
             soup = await self._get_page(next_element_url)
-            page = IliasPage(soup, next_element_url, parent_element)
+            page = IliasPage(soup, parent_element)
             if next := page.get_learning_module_data():
                 elements.append(next)
                 if dir == "left":
@@ -891,13 +913,13 @@ instance's greatest bottleneck.
         if prev:
             prev_p = self._transformer.transform(parent_path / (_sanitize_path_name(prev) + ".html"))
             if prev_p:
-                prev = os.path.relpath(prev_p, my_path.parent)
+                prev = cast(str, os.path.relpath(prev_p, my_path.parent))
             else:
                 prev = None
         if next:
             next_p = self._transformer.transform(parent_path / (_sanitize_path_name(next) + ".html"))
             if next_p:
-                next = os.path.relpath(next_p, my_path.parent)
+                next = cast(str, os.path.relpath(next_p, my_path.parent))
             else:
                 next = None
 
@@ -937,10 +959,10 @@ instance's greatest bottleneck.
             )
         self._visited_urls[element.url] = parent_path
 
-    async def _get_page(self, url: str, root_page_allowed: bool = False) -> BeautifulSoup:
+    async def _get_page(self, url: str, root_page_allowed: bool = False) -> IliasSoup:
         auth_id = await self._current_auth_id()
         async with self.session.get(url) as request:
-            soup = soupify(await request.read())
+            soup = IliasSoup(soupify(await request.read()), str(request.url))
             if IliasPage.is_logged_in(soup):
                 return self._verify_page(soup, url, root_page_allowed)
 
@@ -949,13 +971,13 @@ instance's greatest bottleneck.
 
         # Retry once after authenticating. If this fails, we will die.
         async with self.session.get(url) as request:
-            soup = soupify(await request.read())
+            soup = IliasSoup(soupify(await request.read()), str(request.url))
             if IliasPage.is_logged_in(soup):
                 return self._verify_page(soup, url, root_page_allowed)
         raise CrawlError(f"get_page failed even after authenticating on {url!r}")
 
     @staticmethod
-    def _verify_page(soup: BeautifulSoup, url: str, root_page_allowed: bool) -> BeautifulSoup:
+    def _verify_page(soup: IliasSoup, url: str, root_page_allowed: bool) -> IliasSoup:
         if IliasPage.is_root_page(soup) and not root_page_allowed:
             raise CrawlError(
                 "Unexpectedly encountered ILIAS root page. "
@@ -967,29 +989,19 @@ instance's greatest bottleneck.
             )
         return soup
 
-    async def _post_authenticated(
+    async def _post(
         self,
         url: str,
         data: dict[str, Union[str, List[str]]]
     ) -> bytes:
-        auth_id = await self._current_auth_id()
-
         form_data = aiohttp.FormData()
         for key, val in data.items():
             form_data.add_field(key, val)
 
-        async with self.session.post(url, data=form_data(), allow_redirects=False) as request:
+        async with self.session.post(url, data=form_data()) as request:
             if request.status == 200:
                 return await request.read()
-
-        # We weren't authenticated, so try to do that
-        await self.authenticate(auth_id)
-
-        # Retry once after authenticating. If this fails, we will die.
-        async with self.session.post(url, data=data, allow_redirects=False) as request:
-            if request.status == 200:
-                return await request.read()
-        raise CrawlError("post_authenticated failed even after authenticating")
+            raise CrawlError(f"post failed with status {request.status}")
 
     async def _get_authenticated(self, url: str) -> bytes:
         auth_id = await self._current_auth_id()
@@ -1037,34 +1049,6 @@ instance's greatest bottleneck.
 
             # do the actual login
             async with self.session.post(urljoin(self._base_url, login_url), data=login_data) as request:
-                soup = soupify(await request.read())
-                if not self._is_logged_in(soup):
+                soup = IliasSoup(soupify(await request.read()), str(request.url))
+                if not IliasPage.is_logged_in(soup):
                     self._auth.invalidate_credentials()
-
-    @staticmethod
-    def _is_logged_in(soup: BeautifulSoup) -> bool:
-        # Normal ILIAS pages
-        mainbar = cast(Optional[Tag], soup.find(class_="il-maincontrols-metabar"))
-        if mainbar is not None:
-            login_button = mainbar.find(attrs={"href": lambda x: x is not None and "login.php" in x})
-            shib_login = soup.find(id="button_shib_login")
-            return not login_button and not shib_login
-
-        # Personal Desktop
-        if soup.find("a", attrs={"href": lambda x: x is not None and "block_type=pditems" in x}):
-            return True
-
-        # Video listing embeds do not have complete ILIAS html. Try to match them by
-        # their video listing table
-        video_table = soup.find(
-            recursive=True,
-            name="table",
-            attrs={"id": lambda x: x is not None and x.startswith("tbl_xoct")}
-        )
-        if video_table is not None:
-            return True
-        # The individual video player wrapper page has nothing of the above.
-        # Match it by its playerContainer.
-        if soup.select_one("#playerContainer") is not None:
-            return True
-        return False
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 963ab05..5ea17d6 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -3,20 +3,100 @@ import re
 from dataclasses import dataclass
 from datetime import date, datetime, timedelta
 from enum import Enum
-from typing import Dict, Optional, Union, cast
+from typing import Callable, Dict, Optional, Union, cast
 from urllib.parse import urljoin, urlparse
 
 from bs4 import BeautifulSoup, Tag
 
+from PFERD.crawl import CrawlError
+from PFERD.crawl.crawler import CrawlWarning
 from PFERD.logging import log
 from PFERD.utils import url_set_query_params
 
 TargetType = Union[str, int]
 
 
+class TypeMatcher:
+    class UrlPath:
+        path: str
+
+        def __init__(self, path: str):
+            self.path = path
+
+    class UrlParameter:
+        query: str
+
+        def __init__(self, query: str):
+            self.query = query
+
+    class ImgSrc:
+        src: str
+
+        def __init__(self, src: str):
+            self.src = src
+
+    class ImgAlt:
+        alt: str
+
+        def __init__(self, alt: str):
+            self.alt = alt
+
+    class All:
+        matchers: list['IliasElementMatcher']
+
+        def __init__(self, matchers: list['IliasElementMatcher']):
+            self.matchers = matchers
+
+    class Any:
+        matchers: list['IliasElementMatcher']
+
+        def __init__(self, matchers: list['IliasElementMatcher']):
+            self.matchers = matchers
+
+    @staticmethod
+    def path(path: str) -> UrlPath:
+        return TypeMatcher.UrlPath(path)
+
+    @staticmethod
+    def query(query: str) -> UrlParameter:
+        return TypeMatcher.UrlParameter(query)
+
+    @staticmethod
+    def img_src(src: str) -> ImgSrc:
+        return TypeMatcher.ImgSrc(src)
+
+    @staticmethod
+    def img_alt(alt: str) -> ImgAlt:
+        return TypeMatcher.ImgAlt(alt)
+
+    @staticmethod
+    def all(*matchers: 'IliasElementMatcher') -> All:
+        return TypeMatcher.All(list(matchers))
+
+    @staticmethod
+    def any(*matchers: 'IliasElementMatcher') -> Any:
+        return TypeMatcher.Any(list(matchers))
+
+    @staticmethod
+    def never() -> Any:
+        return TypeMatcher.Any([])
+
+
+IliasElementMatcher = (
+    TypeMatcher.UrlPath
+    | TypeMatcher.UrlParameter
+    | TypeMatcher.ImgSrc
+    | TypeMatcher.ImgAlt
+    | TypeMatcher.All
+    | TypeMatcher.Any
+)
+
+
 class IliasElementType(Enum):
+    BLOG = "blog"
     BOOKING = "booking"
     COURSE = "course"
+    DCL_RECORD_LIST = "dcl_record_list"
     EXERCISE = "exercise"
     EXERCISE_FILES = "exercise_files"  # own submitted files
     FILE = "file"
@@ -25,7 +105,10 @@ class IliasElementType(Enum):
     FORUM_THREAD = "forum_thread"
     INFO_TAB = "info_tab"
     LEARNING_MODULE = "learning_module"
+    LEARNING_MODULE_HTML = "learning_module_html"
+    LITERATURE_LIST = "literature_list"
     LINK = "link"
+    MEDIA_POOL = "media_pool"
     MEDIACAST_VIDEO = "mediacast_video"
     MEDIACAST_VIDEO_FOLDER = "mediacast_video_folder"
     MEETING = "meeting"
@@ -38,6 +121,131 @@ class IliasElementType(Enum):
     SURVEY = "survey"
     TEST = "test"  # an online test. Will be ignored currently.
 
+    def matcher(self) -> IliasElementMatcher:
+        match self:
+            case IliasElementType.BLOG:
+                return TypeMatcher.any(
+                    TypeMatcher.img_src("_blog.svg")
+                )
+            case IliasElementType.BOOKING:
+                return TypeMatcher.any(
+                    TypeMatcher.path("/book/"),
+                    TypeMatcher.img_src("_book.svg")
+                )
+            case IliasElementType.COURSE:
+                return TypeMatcher.any(TypeMatcher.path("/crs/"), TypeMatcher.img_src("_crsr.svg"))
+            case IliasElementType.DCL_RECORD_LIST:
+                return TypeMatcher.any(
+                    TypeMatcher.img_src("_dcl.svg"),
+                    TypeMatcher.query("cmdclass=ildclrecordlistgui")
+                )
+            case IliasElementType.EXERCISE:
+                return TypeMatcher.any(
+                    TypeMatcher.path("/exc/"),
+                    TypeMatcher.path("_exc_"),
+                    TypeMatcher.img_src("_exc.svg"),
+                )
+            case IliasElementType.EXERCISE_FILES:
+                return TypeMatcher.never()
+            case IliasElementType.FILE:
+                return TypeMatcher.any(
+                    TypeMatcher.query("cmd=sendfile"),
+                    TypeMatcher.path("_file_"),
+                    TypeMatcher.img_src("/filedelivery/"),
+                )
+            case IliasElementType.FOLDER:
+                return TypeMatcher.any(
+                    TypeMatcher.path("/fold/"),
+                    TypeMatcher.img_src("_fold.svg"),
+
+                    TypeMatcher.path("/grp/"),
+                    TypeMatcher.img_src("_grp.svg"),
+
+                    TypeMatcher.path("/copa/"),
+                    TypeMatcher.path("_copa_"),
+                    TypeMatcher.img_src("_copa.svg"),
+
+                    # Not supported right now but warn users
+                    # TypeMatcher.query("baseclass=ilmediapoolpresentationgui"),
+                    # TypeMatcher.img_alt("medienpool"),
+                    # TypeMatcher.img_src("_mep.svg"),
+                )
+            case IliasElementType.FORUM:
+                return TypeMatcher.any(
+                    TypeMatcher.path("/frm/"),
+                    TypeMatcher.path("_frm_"),
+                    TypeMatcher.img_src("_frm.svg"),
+                )
+            case IliasElementType.FORUM_THREAD:
+                return TypeMatcher.never()
+            case IliasElementType.INFO_TAB:
+                return TypeMatcher.never()
+            case IliasElementType.LITERATURE_LIST:
+                return TypeMatcher.img_src("_bibl.svg")
+            case IliasElementType.LEARNING_MODULE:
+                return TypeMatcher.any(
+                    TypeMatcher.path("/lm/"),
+                    TypeMatcher.img_src("_lm.svg")
+                )
+            case IliasElementType.LEARNING_MODULE_HTML:
+                return TypeMatcher.any(
+                    TypeMatcher.query("baseclass=ilhtlmpresentationgui"),
+                    TypeMatcher.img_src("_htlm.svg")
+                )
+            case IliasElementType.LINK:
+                return TypeMatcher.any(
+                    TypeMatcher.all(
+                        TypeMatcher.query("baseclass=illinkresourcehandlergui"),
+                        TypeMatcher.query("calldirectlink"),
+                    ),
+                    TypeMatcher.img_src("_webr.svg")
+                )
+            case IliasElementType.MEDIA_POOL:
+                return TypeMatcher.any(
+                    TypeMatcher.query("baseclass=ilmediapoolpresentationgui"),
+                    TypeMatcher.img_src("_mep.svg")
+                )
+            case IliasElementType.MEDIACAST_VIDEO:
+                return TypeMatcher.never()
+            case IliasElementType.MEDIACAST_VIDEO_FOLDER:
+                return TypeMatcher.any(
+                    TypeMatcher.path("/mcst/"),
+                    TypeMatcher.query("baseclass=ilmediacasthandlergui"),
+                    TypeMatcher.img_src("_mcst.svg")
+                )
+            case IliasElementType.MEETING:
+                return TypeMatcher.any(
+                    TypeMatcher.img_src("_sess.svg")
+                )
+            case IliasElementType.MOB_VIDEO:
+                return TypeMatcher.never()
+            case IliasElementType.OPENCAST_VIDEO:
+                return TypeMatcher.never()
+            case IliasElementType.OPENCAST_VIDEO_FOLDER:
+                return TypeMatcher.never()
+            case IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED:
+                return TypeMatcher.img_alt("opencast")
+            case IliasElementType.OPENCAST_VIDEO_PLAYER:
+                return TypeMatcher.never()
+            case IliasElementType.SCORM_LEARNING_MODULE:
+                return TypeMatcher.any(
+                    TypeMatcher.query("baseclass=ilsahspresentationgui"),
+                    TypeMatcher.img_src("_sahs.svg")
+                )
+            case IliasElementType.SURVEY:
+                return TypeMatcher.any(
+                    TypeMatcher.path("/svy/"),
+                    TypeMatcher.img_src("svy.svg")
+                )
+            case IliasElementType.TEST:
+                return TypeMatcher.any(
+                    TypeMatcher.query("cmdclass=ilobjtestgui"),
+                    TypeMatcher.query("cmdclass=iltestscreengui"),
+                    TypeMatcher.img_src("_tst.svg")
+                )
+
+        raise CrawlWarning(f"Unknown matcher {self}")
+
 
 @dataclass
 class IliasPageElement:
@@ -50,11 +258,21 @@ class IliasPageElement:
     def id(self) -> str:
         regexes = [
             r"eid=(?P<id>[0-9a-z\-]+)",
-            r"file_(?P<id>\d+)",
-            r"copa_(?P<id>\d+)",
-            r"fold_(?P<id>\d+)",
-            r"frm_(?P<id>\d+)",
-            r"exc_(?P<id>\d+)",
+            r"book/(?P<id>\d+)",  # booking
+            r"cat/(?P<id>\d+)",
+            r"copa/(?P<id>\d+)",  # content page
+            r"crs/(?P<id>\d+)",  # course
+            r"exc/(?P<id>\d+)",  # exercise
+            r"file/(?P<id>\d+)",  # file
+            r"fold/(?P<id>\d+)",  # folder
+            r"frm/(?P<id>\d+)",  # forum
+            r"grp/(?P<id>\d+)",  # group
+            r"lm/(?P<id>\d+)",  # learning module
+            r"mcst/(?P<id>\d+)",  # mediacast
+            r"pg/(?P<id>(\d|_)+)",  # page?
+            r"svy/(?P<id>\d+)",  # survey
+            r"sess/(?P<id>\d+)",  # session
+            r"webr/(?P<id>\d+)",  # web referene (link)
             r"thr_pk=(?P<id>\d+)",  # forums
             r"ref_id=(?P<id>\d+)",
             r"target=[a-z]+_(?P<id>\d+)",
@@ -139,18 +357,28 @@ class IliasLearningModulePage:
     previous_url: Optional[str]
 
 
+class IliasSoup:
+    soup: BeautifulSoup
+    page_url: str
+
+    def __init__(self, soup: BeautifulSoup, page_url: str):
+        self.soup = soup
+        self.page_url = page_url
+
+
 class IliasPage:
 
-    def __init__(self, soup: BeautifulSoup, _page_url: str, source_element: Optional[IliasPageElement]):
-        self._soup = soup
-        self._page_url = _page_url
+    def __init__(self, ilias_soup: IliasSoup, source_element: Optional[IliasPageElement]):
+        self._ilias_soup = ilias_soup
+        self._soup = ilias_soup.soup
+        self._page_url = ilias_soup.page_url
         self._page_type = source_element.type if source_element else None
         self._source_name = source_element.name if source_element else ""
 
     @staticmethod
-    def is_root_page(soup: BeautifulSoup) -> bool:
+    def is_root_page(soup: IliasSoup) -> bool:
         if permalink := IliasPage.get_soup_permalink(soup):
-            return "goto.php?target=root_" in permalink
+            return "goto.php/root/" in permalink
         return False
 
     def get_child_elements(self) -> list[IliasPageElement]:
@@ -193,7 +421,10 @@ class IliasPage:
 
     def get_description(self) -> Optional[BeautifulSoup]:
         def is_interesting_class(name: str) -> bool:
-            return name in ["ilCOPageSection", "ilc_Paragraph", "ilc_va_ihcap_VAccordIHeadCap"]
+            return name in [
+                "ilCOPageSection", "ilc_Paragraph", "ilc_va_ihcap_VAccordIHeadCap",
+                "ilc_va_ihcap_AccordIHeadCap", "ilc_media_cont_MediaContainer"
+            ]
 
         paragraphs: list[Tag] = cast(list[Tag], self._soup.find_all(class_=is_interesting_class))
         if not paragraphs:
@@ -206,6 +437,21 @@ class IliasPage:
         for p in paragraphs:
             if p.find_parent(class_=is_interesting_class):
                 continue
+            if "ilc_media_cont_MediaContainer" in p["class"]:
+                # We have an embedded video which should be downloaded by _find_mob_videos
+                if video := p.select_one("video"):
+                    url, title = self._find_mob_video_url_title(video, p)
+                    raw_html += '<div style="min-width: 100px; min-height: 100px; border: 1px solid black;'
+                    raw_html += 'display: flex; justify-content: center; align-items: center;'
+                    raw_html += ' margin: 0.5rem;">'
+                    if url is not None and urlparse(url).hostname != urlparse(self._page_url).hostname:
+                        if url.startswith("//"):
+                            url = "https:" + url
+                        raw_html += f'<a href="{url}" target="_blank">External Video: {title}</a>'
+                    else:
+                        raw_html += f"Video elided. Filename: '{title}'."
+                    raw_html += "</div>\n"
+                    continue
 
             # Ignore special listings (like folder groupings)
             if "ilc_section_Special" in p["class"]:
@@ -244,79 +490,31 @@ class IliasPage:
             return url
         return None
 
-    def get_forum_entries(self) -> list[IliasPageElement]:
-        form = self._get_forum_form()
-        if not form:
-            return []
-        threads = []
-
-        for row in cast(list[Tag], form.select("table > tbody > tr")):
-            url_tag = cast(
-                Optional[Tag],
-                row.find(name="a", attrs={"href": lambda x: x is not None and "cmd=viewthread" in x.lower()})
-            )
-            if url_tag is None:
-                log.explain(f"Skipping row without URL: {row}")
-                continue
-            name = url_tag.get_text().strip()
-            columns = [td.get_text().strip() for td in cast(list[Tag], row.find_all(name="td"))]
-            potential_dates_opt = [IliasPage._find_date_in_text(column) for column in columns]
-            potential_dates = [x for x in potential_dates_opt if x is not None]
-            mtime = max(potential_dates) if potential_dates else None
-
-            threads.append(IliasPageElement.create_new(
-                IliasElementType.FORUM_THREAD,
-                self._abs_url_from_link(url_tag),
-                name,
-                mtime=mtime
-            ))
-
-        return threads
-
-    def get_download_forum_data(self, thread_ids: list[str]) -> Optional[IliasDownloadForumData]:
-        form = cast(Optional[Tag], self._soup.find(
-            "form",
-            attrs={"action": lambda x: x is not None and "fallbackCmd=showThreads" in x}
-        ))
-        if not form:
+    def get_forum_export_url(self) -> Optional[str]:
+        forum_link = self._soup.select_one("#tab_forums_threads > a")
+        if not forum_link:
+            log.explain("Found no forum link")
             return None
-        post_url = self._abs_url_from_relative(cast(str, form["action"]))
 
-        log.explain(f"Fetching forum threads {thread_ids}")
+        base_url = self._abs_url_from_link(forum_link)
+        base_url = re.sub(r"cmd=\w+", "cmd=post", base_url)
+        base_url = re.sub(r"cmdClass=\w+", "cmdClass=ilExportGUI", base_url)
 
-        form_data: Dict[str, Union[str, list[str]]] = {
-            "thread_ids[]": cast(list[str], thread_ids),
-            "selected_cmd2": "html",
-            "select_cmd2": "Ausführen",
-            "selected_cmd": "",
-        }
+        rtoken_form = cast(
+            Optional[Tag],
+            self._soup.find("form", attrs={"action": lambda x: x is not None and "rtoken=" in x})
+        )
+        if not rtoken_form:
+            log.explain("Found no rtoken anywhere")
+            return None
+        match = cast(re.Match[str], re.search(r"rtoken=(\w+)", str(rtoken_form.attrs["action"])))
+        rtoken = match.group(1)
 
-        return IliasDownloadForumData(url=post_url, form_data=form_data, empty=len(thread_ids) == 0)
+        base_url = base_url + "&rtoken=" + rtoken
 
-    def _get_forum_form(self) -> Optional[Tag]:
-        return cast(Optional[Tag], self._soup.find(
-            "form",
-            attrs={"action": lambda x: x is not None and "fallbackCmd=showThreads" in x}
-        ))
+        return base_url
 
     def get_next_stage_element(self) -> Optional[IliasPageElement]:
-        if self._is_forum_page():
-            if "trows=" in self._page_url:
-                log.explain("Manual row override detected, accepting it as good")
-                return None
-            log.explain("Requesting *all* forum threads")
-            thread_count = self._get_forum_thread_count()
-            if thread_count is not None and thread_count > 400:
-                log.warn(
-                    "Forum has more than 400 threads, fetching all threads will take a while. "
-                    "You might need to adjust your http_timeout config option."
-                )
-
-            # Fetch at least 400 in case we detect it wrong
-            if thread_count is not None and thread_count < 400:
-                thread_count = 400
-
-            return self._get_show_max_forum_entries_per_page_url(thread_count)
         if self._is_ilias_opencast_embedding():
             log.explain("Unwrapping opencast embedding")
             return self.get_child_elements()[0]
@@ -334,11 +532,6 @@ class IliasPage:
                 log.explain("Crawling info tab, skipping content select")
         return None
 
-    def _is_forum_page(self) -> bool:
-        if perma_link := self.get_permalink():
-            return "target=frm_" in perma_link
-        return False
-
     def _is_video_player(self) -> bool:
         return "paella_config_file" in str(self._soup)
 
@@ -378,7 +571,7 @@ class IliasPage:
 
     def _is_content_page(self) -> bool:
         if link := self.get_permalink():
-            return "target=copa_" in link
+            return "/copa/" in link
         return False
 
     def _is_learning_module_page(self) -> bool:
@@ -513,19 +706,17 @@ class IliasPage:
                 # Configure button/link does not have anything interesting
                 continue
 
-            type = self._find_type_from_link(name, link, url)
-            if not type:
+            typ = IliasPage._find_type_for_element(
+                name, url, lambda: IliasPage._find_icon_for_folder_entry(link)
+            )
+            if not typ:
                 _unexpected_html_warning()
                 log.warn_contd(f"Could not extract type for {link}")
                 continue
 
-            log.explain(f"Found {name!r}")
+            log.explain(f"Found {name!r} of type {typ}")
 
-            if type == IliasElementType.FILE and "_download" not in url:
-                url = re.sub(r"(target=file_\d+)", r"\1_download", url)
-                log.explain("Rewired file URL to include download part")
-
-            items.append(IliasPageElement.create_new(type, url, name))
+            items.append(IliasPageElement.create_new(typ, url, name))
 
         return items
 
@@ -786,15 +977,17 @@ class IliasPage:
         for link in links:
             abs_url = self._abs_url_from_link(link)
             # Make sure parents are sanitized. We do not want accidental parents
-            parents = [_sanitize_path_name(x) for x in self._find_upwards_folder_hierarchy(link)]
+            parents = [_sanitize_path_name(x) for x in IliasPage._find_upwards_folder_hierarchy(link)]
 
             if parents:
                 element_name = "/".join(parents) + "/" + _sanitize_path_name(link.get_text())
             else:
                 element_name = _sanitize_path_name(link.get_text())
 
-            element_type = self._find_type_from_link(element_name, link, abs_url)
-            description = self._find_link_description(link)
+            element_type = IliasPage._find_type_for_element(
+                element_name, abs_url, lambda: IliasPage._find_icon_for_folder_entry(link)
+            )
+            description = IliasPage._find_link_description(link)
 
             # The last meeting on every page is expanded by default.
             # Its content is then shown inline *and* in the meeting page itself.
@@ -805,10 +998,10 @@ class IliasPage:
             if not element_type:
                 continue
             elif element_type == IliasElementType.FILE:
-                result.append(self._file_to_element(element_name, abs_url, link))
+                result.append(IliasPage._file_to_element(element_name, abs_url, link))
                 continue
 
-            log.explain(f"Found {element_name!r}")
+            log.explain(f"Found {element_name!r} of type {element_type}")
             result.append(IliasPageElement.create_new(
                 element_type,
                 abs_url,
@@ -826,71 +1019,92 @@ class IliasPage:
     def _find_mediacast_videos(self) -> list[IliasPageElement]:
         videos: list[IliasPageElement] = []
 
-        for elem in cast(list[Tag], self._soup.select(".ilPlayerPreviewOverlayOuter")):
-            element_name = _sanitize_path_name(
-                cast(Tag, elem.select_one(".ilPlayerPreviewDescription")).get_text().strip()
-            )
-            if not element_name.endswith(".mp4"):
-                # just to make sure it has some kinda-alrightish ending
-                element_name = element_name + ".mp4"
-            video_element = cast(Optional[Tag], elem.find(name="video"))
-            if not video_element:
-                _unexpected_html_warning()
-                log.warn_contd(f"No <video> element found for mediacast video '{element_name}'")
-                continue
+        regex = re.compile(r"il\.VideoPlaylist\.init.+?\[(.+?)], ")
+        for script in cast(list[Tag], self._soup.find_all("script")):
+            for match in regex.finditer(script.text):
+                try:
+                    playlist = json.loads("[" + match.group(1) + "]")
+                except json.JSONDecodeError:
+                    log.warn("Could not decode playlist json")
+                    log.warn_contd(f"Playlist json: [{match.group(1)}]")
+                    continue
+                for elem in playlist:
+                    title = elem.get("title", None)
+                    description = elem.get("description", None)
+                    url = elem.get("resource", None)
+                    if title is None or description is None or url is None:
+                        log.explain(f"Mediacast json: {match.group(1)}")
+                        log.warn("Mediacast video json was not complete")
+                    if title is None:
+                        log.warn_contd("Missing title")
+                    if description is None:
+                        log.warn_contd("Missing description")
+                    if url is None:
+                        log.warn_contd("Missing URL")
 
-            videos.append(IliasPageElement.create_new(
-                typ=IliasElementType.MEDIACAST_VIDEO,
-                url=self._abs_url_from_relative(cast(str, video_element.get("src"))),
-                name=element_name,
-                mtime=self._find_mediacast_video_mtime(cast(Tag, elem.find_parent(name="td")))
-            ))
+                    if not title.endswith(".mp4") and not title.endswith(".webm"):
+                        # just to make sure it has some kinda-alrightish ending
+                        title = title + ".mp4"
+                    videos.append(IliasPageElement.create_new(
+                        typ=IliasElementType.MEDIACAST_VIDEO,
+                        url=self._abs_url_from_relative(cast(str, url)),
+                        name=_sanitize_path_name(title)
+                    ))
 
         return videos
 
     def _find_mob_videos(self) -> list[IliasPageElement]:
         videos: list[IliasPageElement] = []
 
-        for figure in self._soup.select("figure.ilc_media_cont_MediaContainerHighlighted"):
-            title = cast(Tag, figure.select_one("figcaption")).get_text().strip() + ".mp4"
+        selector = "figure.ilc_media_cont_MediaContainerHighlighted,figure.ilc_media_cont_MediaContainer"
+        for figure in self._soup.select(selector):
             video_element = figure.select_one("video")
             if not video_element:
-                _unexpected_html_warning()
-                log.warn_contd(f"No <video> element found for mob video '{title}'")
                 continue
 
-            url = None
-            for source in video_element.select("source"):
-                if source.get("type", "") == "video/mp4":
-                    url = cast(Optional[str], source.get("src"))
-                    break
+            url, title = self._find_mob_video_url_title(video_element, figure)
 
             if url is None:
                 _unexpected_html_warning()
                 log.warn_contd(f"No <source> element found for mob video '{title}'")
                 continue
 
+            if urlparse(url).hostname != urlparse(self._page_url).hostname:
+                log.explain(f"Found external video at {url}, ignoring")
+                continue
+
             videos.append(IliasPageElement.create_new(
                 typ=IliasElementType.MOB_VIDEO,
-                url=self._abs_url_from_relative(url),
+                url=url,
                 name=_sanitize_path_name(title),
                 mtime=None
             ))
 
         return videos
 
-    def _find_mediacast_video_mtime(self, enclosing_td: Tag) -> Optional[datetime]:
-        description_td = cast(Tag, enclosing_td.find_previous_sibling("td"))
-        if not description_td:
-            return None
+    def _find_mob_video_url_title(self, video_element: Tag, figure: Tag) -> tuple[Optional[str], str]:
+        url = None
+        for source in video_element.select("source"):
+            if source.get("type", "") == "video/mp4":
+                url = cast(Optional[str], source.get("src"))
+                break
 
-        meta_tag = cast(Optional[Tag], description_td.find_all("p")[-1])
-        if not meta_tag:
-            return None
+        if url is None and video_element.get("src"):
+            url = cast(Optional[str], video_element.get("src"))
 
-        updated_str = meta_tag.get_text().strip().replace("\n", " ")
-        updated_str = re.sub(".+?: ", "", updated_str)
-        return demangle_date(updated_str)
+        fig_caption = cast(Optional[Tag], figure.select_one("figcaption"))
+        if fig_caption:
+            title = cast(Tag, figure.select_one("figcaption")).get_text().strip() + ".mp4"
+        elif url is not None:
+            path = urlparse(self._abs_url_from_relative(url)).path
+            title = path.rsplit("/", 1)[-1]
+        else:
+            title = f"unknown video {figure}"
+
+        if url:
+            url = self._abs_url_from_relative(url)
+
+        return url, title
 
     def _is_in_expanded_meeting(self, tag: Tag) -> bool:
         """
@@ -907,12 +1121,17 @@ class IliasPage:
             # We should not crawl files under meetings
             if "ilContainerListItemContentCB" in cast(str, parent.get("class")):
                 link: Tag = parent.parent.find("a")  # type: ignore
-                type = IliasPage._find_type_from_folder_like(link, self._page_url)
-                return type == IliasElementType.MEETING
+                typ = IliasPage._find_type_for_element(
+                    "meeting",
+                    self._abs_url_from_link(link),
+                    lambda: IliasPage._find_icon_for_folder_entry(link)
+                )
+                return typ == IliasElementType.MEETING
 
         return False
 
-    def _find_upwards_folder_hierarchy(self, tag: Tag) -> list[str]:
+    @staticmethod
+    def _find_upwards_folder_hierarchy(tag: Tag) -> list[str]:
         """
         Interprets accordions and expandable blocks as virtual folders and returns them
         in order. This allows us to find a file named "Test" in an accordion "Acc" as "Acc/Test"
@@ -953,13 +1172,16 @@ class IliasPage:
         if outer_accordion_content:
             accordion_tag = cast(Tag, outer_accordion_content.parent)
             head_tag = cast(Tag, accordion_tag.find(attrs={
-                "class": lambda x: x is not None and "ilc_va_ihead_VAccordIHead" in x
+                "class": lambda x: x is not None and (
+                    "ilc_va_ihead_VAccordIHead" in x or "ilc_va_ihead_AccordIHead" in x
+                )
             }))
             found_titles.append(head_tag.get_text().strip())
 
         return [_sanitize_path_name(x) for x in reversed(found_titles)]
 
-    def _find_link_description(self, link: Tag) -> Optional[str]:
+    @staticmethod
+    def _find_link_description(link: Tag) -> Optional[str]:
         tile = cast(
             Tag,
             link.find_parent("div", {"class": lambda x: x is not None and "il_ContainerListItem" in x})
@@ -974,7 +1196,8 @@ class IliasPage:
             return None
         return description_element.get_text().strip()
 
-    def _file_to_element(self, name: str, url: str, link_element: Tag) -> IliasPageElement:
+    @staticmethod
+    def _file_to_element(name: str, url: str, link_element: Tag) -> IliasPageElement:
         # Files have a list of properties (type, modification date, size, etc.)
         # In a series of divs.
         # Find the parent containing all those divs, so we can filter our what we need
@@ -1007,27 +1230,38 @@ class IliasPage:
         for title in card_titles:
             url = self._abs_url_from_link(title)
             name = _sanitize_path_name(title.get_text().strip())
-            type = self._find_type_from_card(title)
+            typ = IliasPage._find_type_for_element(
+                name, url, lambda: IliasPage._find_icon_from_card(title)
+            )
 
-            if not type:
+            if not typ:
                 _unexpected_html_warning()
                 log.warn_contd(f"Could not extract type for {title}")
                 continue
 
-            result.append(IliasPageElement.create_new(type, url, name))
+            result.append(IliasPageElement.create_new(typ, url, name))
 
         card_button_tiles: list[Tag] = self._soup.select(".card-title button")
 
         for button in card_button_tiles:
-            regex = re.compile(button["id"] + r".*window.open\(['\"](.+?)['\"]")  # type: ignore
-            res = regex.search(str(self._soup))
-            if not res:
+            signal_regex = re.compile("#" + str(button["id"]) + r"[\s\S]*?\.trigger\('(.+?)'")
+            signal_match = signal_regex.search(str(self._soup))
+            if not signal_match:
                 _unexpected_html_warning()
-                log.warn_contd(f"Could not find click handler target for {button}")
+                log.warn_contd(f"Could not find click handler signal for {button}")
                 continue
-            url = self._abs_url_from_relative(res.group(1))
+            signal = signal_match.group(1)
+            open_regex = re.compile(r"\.on\('" + signal + r"[\s\S]*?window.open\(['\"](.+?)['\"]")
+            open_match = open_regex.search(str(self._soup))
+            if not open_match:
+                _unexpected_html_warning()
+                log.warn_contd(f"Could not find click handler target for signal {signal} for {button}")
+                continue
+            url = self._abs_url_from_relative(open_match.group(1))
             name = _sanitize_path_name(button.get_text().strip())
-            type = self._find_type_from_card(button)
+            typ = IliasPage._find_type_for_element(
+                name, url, lambda: IliasPage._find_icon_from_card(button)
+            )
             caption_parent = cast(Tag, button.find_parent(
                 "div",
                 attrs={"class": lambda x: x is not None and "caption" in x},
@@ -1038,143 +1272,59 @@ class IliasPage:
             else:
                 description = None
 
-            if not type:
+            if not typ:
                 _unexpected_html_warning()
                 log.warn_contd(f"Could not extract type for {button}")
                 continue
 
-            result.append(IliasPageElement.create_new(type, url, name, description=description))
+            result.append(IliasPageElement.create_new(typ, url, name, description=description))
 
         return result
 
-    def _find_type_from_card(self, card_title: Tag) -> Optional[IliasElementType]:
-        def is_card_root(element: Tag) -> bool:
-            return "il-card" in element["class"] and "thumbnail" in element["class"]
-
-        card_root: Optional[Tag] = None
-
-        # We look for the card root
-        for parent in card_title.parents:
-            if is_card_root(parent):
-                card_root = parent
-                break
-
-        if card_root is None:
-            _unexpected_html_warning()
-            log.warn_contd(f"Tried to figure out element type, but did not find an icon for {card_title}")
-            return None
-
-        icon = cast(Tag, card_root.select_one(".il-card-repository-head .icon"))
-
-        if "opencast" in icon["class"] or "xoct" in icon["class"]:
-            return IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED
-        if "exc" in icon["class"]:
-            return IliasElementType.EXERCISE
-        if "grp" in icon["class"]:
-            return IliasElementType.FOLDER
-        if "webr" in icon["class"]:
-            return IliasElementType.LINK
-        if "book" in icon["class"]:
-            return IliasElementType.BOOKING
-        if "crsr" in icon["class"]:
-            return IliasElementType.COURSE
-        if "frm" in icon["class"]:
-            return IliasElementType.FORUM
-        if "sess" in icon["class"]:
-            return IliasElementType.MEETING
-        if "tst" in icon["class"]:
-            return IliasElementType.TEST
-        if "fold" in icon["class"]:
-            return IliasElementType.FOLDER
-        if "copa" in icon["class"]:
-            return IliasElementType.FOLDER
-        if "svy" in icon["class"]:
-            return IliasElementType.SURVEY
-        if "file" in icon["class"]:
-            return IliasElementType.FILE
-        if "mcst" in icon["class"]:
-            return IliasElementType.MEDIACAST_VIDEO_FOLDER
-
-        _unexpected_html_warning()
-        log.warn_contd(f"Could not extract type from {icon} for card title {card_title}")
-        return None
-
     @staticmethod
-    def _find_type_from_link(
+    def _find_type_for_element(
         element_name: str,
-        link_element: Tag,
-        url: str
+        url: str,
+        icon_for_element: Callable[[], Optional[Tag]],
     ) -> Optional[IliasElementType]:
         """
         Decides which sub crawler to use for a given top level element.
         """
         parsed_url = urlparse(url)
+        icon = icon_for_element()
 
-        # file URLs contain "target=file"
-        if "target=file_" in parsed_url.query:
-            return IliasElementType.FILE
+        def try_matcher(matcher: IliasElementMatcher) -> bool:
+            match matcher:
+                case TypeMatcher.All(matchers=ms):
+                    return all(try_matcher(m) for m in ms)
+                case TypeMatcher.Any(matchers=ms):
+                    return any(try_matcher(m) for m in ms)
+                case TypeMatcher.ImgAlt(alt=alt):
+                    return icon is not None and alt in str(icon["alt"]).lower()
+                case TypeMatcher.ImgSrc(src=src):
+                    return icon is not None and src in str(icon["src"]).lower()
+                case TypeMatcher.UrlPath(path=path):
+                    return path in parsed_url.path.lower()
+                case TypeMatcher.UrlParameter(query=query):
+                    return query in parsed_url.query.lower()
 
-        if "target=grp_" in parsed_url.query:
-            return IliasElementType.FOLDER
+            raise CrawlError(f"Unknown matcher {matcher}")
 
-        if "target=crs_" in parsed_url.query:
-            return IliasElementType.FOLDER
-
-        if "baseClass=ilExerciseHandlerGUI" in parsed_url.query:
-            return IliasElementType.EXERCISE
-
-        if "baseClass=ilLinkResourceHandlerGUI" in parsed_url.query and "calldirectlink" in parsed_url.query:
-            return IliasElementType.LINK
-
-        if "cmd=showThreads" in parsed_url.query or "target=frm_" in parsed_url.query:
-            return IliasElementType.FORUM
-
-        if "cmdClass=ilobjtestgui" in parsed_url.query:
-            return IliasElementType.TEST
-
-        if "baseClass=ilLMPresentationGUI" in parsed_url.query:
-            return IliasElementType.LEARNING_MODULE
-
-        if "baseClass=ilMediaCastHandlerGUI" in parsed_url.query:
-            return IliasElementType.MEDIACAST_VIDEO_FOLDER
-
-        if "baseClass=ilSAHSPresentationGUI" in parsed_url.query:
-            return IliasElementType.SCORM_LEARNING_MODULE
-
-        # other universities might have content type specified in URL path
-        if "_file_" in parsed_url.path:
-            return IliasElementType.FILE
-
-        if "_fold_" in parsed_url.path or "_copa_" in parsed_url.path:
-            return IliasElementType.FOLDER
-
-        if "_frm_" in parsed_url.path:
-            return IliasElementType.FORUM
-
-        if "_exc_" in parsed_url.path:
-            return IliasElementType.EXERCISE
-
-        # Booking and Meeting can not be detected based on the link. They do have a ref_id though, so
-        # try to guess it from the image.
-
-        # Everything with a ref_id can *probably* be opened to reveal nested things
-        # video groups, directories, exercises, etc
-        if "ref_id=" in parsed_url.query or "goto.php" in parsed_url.path:
-            return IliasPage._find_type_from_folder_like(link_element, url)
+        for typ in IliasElementType:
+            if try_matcher(typ.matcher()):
+                return typ
 
         _unexpected_html_warning()
-        log.warn_contd(
-            f"Tried to figure out element type, but failed for {element_name!r} / {link_element!r})"
-        )
+        log.warn_contd(f"Tried to figure out element type, but failed for {element_name!r} / {url!r})")
+
+        if "ref_id=" in parsed_url.query.lower() or "goto.php" in parsed_url.path.lower():
+            log.warn_contd("Defaulting to FOLDER as it contains a ref_id/goto")
+            return IliasElementType.FOLDER
+
         return None
 
     @staticmethod
-    def _find_type_from_folder_like(link_element: Tag, url: str) -> Optional[IliasElementType]:
-        """
-        Try crawling something that looks like a folder.
-        """
-        # pylint: disable=too-many-return-statements
-
+    def _find_icon_for_folder_entry(link_element: Tag) -> Optional[Tag]:
         found_parent: Optional[Tag] = None
 
         # We look for the outer div of our inner link, to find information around it
@@ -1186,7 +1336,9 @@ class IliasPage:
 
         if found_parent is None:
             _unexpected_html_warning()
-            log.warn_contd(f"Tried to figure out element type, but did not find an icon for {url}")
+            log.warn_contd(
+                f"Tried to figure out element type, but did not find an icon for {link_element!r}"
+            )
             return None
 
         # Find the small descriptive icon to figure out the type
@@ -1203,42 +1355,35 @@ class IliasPage:
             log.explain("Found session expansion button, skipping it as it has no content")
             return None
 
-        if img_tag is None:
-            _unexpected_html_warning()
-            log.warn_contd(f"Tried to figure out element type, but did not find an image for {url}")
-            return None
+        if img_tag is not None:
+            return img_tag
 
-        if "opencast" in str(img_tag["alt"]).lower():
-            return IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED
-
-        if str(img_tag["src"]).endswith("icon_exc.svg"):
-            return IliasElementType.EXERCISE
-
-        if str(img_tag["src"]).endswith("icon_webr.svg"):
-            return IliasElementType.LINK
-
-        if str(img_tag["src"]).endswith("icon_book.svg"):
-            return IliasElementType.BOOKING
-
-        if str(img_tag["src"]).endswith("frm.svg"):
-            return IliasElementType.FORUM
-
-        if str(img_tag["src"]).endswith("sess.svg"):
-            return IliasElementType.MEETING
-
-        if str(img_tag["src"]).endswith("icon_tst.svg"):
-            return IliasElementType.TEST
-
-        if str(img_tag["src"]).endswith("icon_mcst.svg"):
-            return IliasElementType.MEDIACAST_VIDEO_FOLDER
-
-        if str(img_tag["src"]).endswith("icon_sahs.svg"):
-            return IliasElementType.SCORM_LEARNING_MODULE
-
-        return IliasElementType.FOLDER
+        log.explain(f"Tried to figure out element type, but did not find an image for {link_element!r}")
+        return None
 
     @staticmethod
-    def is_logged_in(soup: BeautifulSoup) -> bool:
+    def _find_icon_from_card(card_title: Tag) -> Optional[Tag]:
+        def is_card_root(element: Tag) -> bool:
+            return "il-card" in element["class"] and "thumbnail" in element["class"]
+
+        card_root: Optional[Tag] = None
+
+        # We look for the card root
+        for parent in card_title.parents:
+            if is_card_root(parent):
+                card_root = parent
+                break
+
+        if card_root is None:
+            _unexpected_html_warning()
+            log.warn_contd(f"Tried to figure out element type, but did not find an icon for {card_title}")
+            return None
+
+        return cast(Tag, card_root.select_one(".il-card-repository-head .icon"))
+
+    @staticmethod
+    def is_logged_in(ilias_soup: IliasSoup) -> bool:
+        soup = ilias_soup.soup
         # Normal ILIAS pages
         mainbar = cast(Optional[Tag], soup.find(class_="il-maincontrols-metabar"))
         if mainbar is not None:
@@ -1285,7 +1430,7 @@ class IliasPage:
         return None
 
     def get_permalink(self) -> Optional[str]:
-        return IliasPage.get_soup_permalink(self._soup)
+        return IliasPage.get_soup_permalink(self._ilias_soup)
 
     def _abs_url_from_link(self, link_tag: Tag) -> str:
         """
@@ -1300,11 +1445,15 @@ class IliasPage:
         return urljoin(self._page_url, relative_url)
 
     @staticmethod
-    def get_soup_permalink(soup: BeautifulSoup) -> Optional[str]:
-        perma_link_element = cast(Tag, soup.select_one(".il-footer-permanent-url > a"))
-        if not perma_link_element or not perma_link_element.get("href"):
-            return None
-        return cast(Optional[str], perma_link_element.get("href"))
+    def get_soup_permalink(ilias_soup: IliasSoup) -> Optional[str]:
+        scripts = cast(list[Tag], ilias_soup.soup.find_all("script"))
+        pattern = re.compile(r"il\.Footer\.permalink\.copyText\(\"(.+?)\"\)")
+        for script in scripts:
+            if match := pattern.search(script.text):
+                url = match.group(1)
+                url = url.replace(r"\/", "/")
+                return url
+        return None
 
 
 def _unexpected_html_warning() -> None:

From b97b6fae6b3b1563609db393850b99261c34bc5b Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 14 Apr 2025 21:13:25 +0200
Subject: [PATCH 194/224] Update minimum Python version to 3.11

---
 .github/workflows/build-and-release.yml |  2 +-
 PFERD/crawl/crawler.py                  |  4 +---
 PFERD/logging.py                        |  9 ++++-----
 PFERD/report.py                         | 11 +----------
 README.md                               |  2 +-
 flake.lock                              |  8 ++++----
 flake.nix                               |  2 +-
 pyproject.toml                          |  2 +-
 8 files changed, 14 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/build-and-release.yml b/.github/workflows/build-and-release.yml
index 1f60c59..0117222 100644
--- a/.github/workflows/build-and-release.yml
+++ b/.github/workflows/build-and-release.yml
@@ -14,7 +14,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, windows-latest, macos-13, macos-latest]
-        python: ["3.9"]
+        python: ["3.11"]
     steps:
       - uses: actions/checkout@v4
 
diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index 74616e0..7ef5fe4 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -149,9 +149,7 @@ class CrawlerSection(Section):
         return self.s.getboolean("skip", fallback=False)
 
     def output_dir(self, name: str) -> Path:
-        # TODO Use removeprefix() after switching to 3.9
-        if name.startswith("crawl:"):
-            name = name[len("crawl:"):]
+        name = name.removeprefix("crawl:")
         return Path(self.s.get("output_dir", name)).expanduser()
 
     def redownload(self) -> Redownload:
diff --git a/PFERD/logging.py b/PFERD/logging.py
index b958fb2..c19e4a0 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -1,9 +1,8 @@
 import asyncio
 import sys
 import traceback
-from contextlib import asynccontextmanager, contextmanager
-# TODO In Python 3.9 and above, ContextManager is deprecated
-from typing import AsyncIterator, ContextManager, Iterator, List, Optional
+from contextlib import AbstractContextManager, asynccontextmanager, contextmanager
+from typing import AsyncIterator, Iterator, List, Optional
 
 from rich.console import Console, Group
 from rich.live import Live
@@ -261,7 +260,7 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
             action: str,
             text: str,
             total: Optional[float] = None,
-    ) -> ContextManager[ProgressBar]:
+    ) -> AbstractContextManager[ProgressBar]:
         """
         Allows markup in the "style" argument which will be applied to the
         "action" string.
@@ -277,7 +276,7 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
             action: str,
             text: str,
             total: Optional[float] = None,
-    ) -> ContextManager[ProgressBar]:
+    ) -> AbstractContextManager[ProgressBar]:
         """
         Allows markup in the "style" argument which will be applied to the
         "action" string.
diff --git a/PFERD/report.py b/PFERD/report.py
index 0eaaca9..72e2727 100644
--- a/PFERD/report.py
+++ b/PFERD/report.py
@@ -34,15 +34,6 @@ class MarkConflictError(Exception):
         self.collides_with = collides_with
 
 
-# TODO Use PurePath.is_relative_to when updating to 3.9
-def is_relative_to(a: PurePath, b: PurePath) -> bool:
-    try:
-        a.relative_to(b)
-        return True
-    except ValueError:
-        return False
-
-
 class Report:
     """
     A report of a synchronization. Includes all files found by the crawler, as
@@ -173,7 +164,7 @@ class Report:
             if path == other:
                 raise MarkDuplicateError(path)
 
-            if is_relative_to(path, other) or is_relative_to(other, path):
+            if path.is_relative_to(other) or other.is_relative_to(path):
                 raise MarkConflictError(path, other)
 
         self.known_files.add(path)
diff --git a/README.md b/README.md
index d5d7980..c96fea0 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@ Binaries for Linux, Windows and Mac can be downloaded directly from the
 
 ### With pip
 
-Ensure you have at least Python 3.9 installed. Run the following command to
+Ensure you have at least Python 3.11 installed. Run the following command to
 install PFERD or upgrade it to the latest version:
 
 ```
diff --git a/flake.lock b/flake.lock
index 6428667..d9326af 100644
--- a/flake.lock
+++ b/flake.lock
@@ -2,16 +2,16 @@
   "nodes": {
     "nixpkgs": {
       "locked": {
-        "lastModified": 1708979614,
-        "narHash": "sha256-FWLWmYojIg6TeqxSnHkKpHu5SGnFP5um1uUjH+wRV6g=",
+        "lastModified": 1744440957,
+        "narHash": "sha256-FHlSkNqFmPxPJvy+6fNLaNeWnF1lZSgqVCl/eWaJRc4=",
         "owner": "NixOS",
         "repo": "nixpkgs",
-        "rev": "b7ee09cf5614b02d289cd86fcfa6f24d4e078c2a",
+        "rev": "26d499fc9f1d567283d5d56fcf367edd815dba1d",
         "type": "github"
       },
       "original": {
         "owner": "NixOS",
-        "ref": "nixos-23.11",
+        "ref": "nixos-24.11",
         "repo": "nixpkgs",
         "type": "github"
       }
diff --git a/flake.nix b/flake.nix
index 4fc47b2..c8dbe0c 100644
--- a/flake.nix
+++ b/flake.nix
@@ -2,7 +2,7 @@
   description = "Tool for downloading course-related files from ILIAS";
 
   inputs = {
-    nixpkgs.url = "github:NixOS/nixpkgs/nixos-23.11";
+    nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11";
   };
 
   outputs = { self, nixpkgs }:
diff --git a/pyproject.toml b/pyproject.toml
index bc67e1c..e22fe85 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,7 +12,7 @@ dependencies = [
   "certifi>=2021.10.8"
 ]
 dynamic = ["version"]
-requires-python = ">=3.9"
+requires-python = ">=3.11"
 
 [project.scripts]
 pferd = "PFERD.__main__:main"

From 3f60638d335e4c65e4eda434f2d4f72731773066 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 16 Apr 2025 00:47:05 +0200
Subject: [PATCH 195/224] Bump version to 3.8.0

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0a26913..f3854f2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.8.0 - 2025-04-16
+
 ### Added
 - Support for ILIAS 9
 
diff --git a/PFERD/version.py b/PFERD/version.py
index 21118d3..77c0c6c 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.7.0"
+VERSION = "3.8.0"

From 653bf139f0055536e5c7c59fe138082d49be6ed3 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 16 Apr 2025 10:45:06 +0200
Subject: [PATCH 196/224] Fix encoding of descriptions and force images to
 light mode

---
 CHANGELOG.md                            |  4 ++++
 PFERD/crawl/ilias/ilias_html_cleaner.py | 11 +++++++----
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f3854f2..4dbd832 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,10 @@ ambiguous situations.
 
 ## Unreleased
 
+## Fixed
+- Description html files now specify at UTF-8 encoding
+- Images in descriptions now always have a white background
+
 ## 3.8.0 - 2025-04-16
 
 ### Added
diff --git a/PFERD/crawl/ilias/ilias_html_cleaner.py b/PFERD/crawl/ilias/ilias_html_cleaner.py
index fb35bc0..35a7ea0 100644
--- a/PFERD/crawl/ilias/ilias_html_cleaner.py
+++ b/PFERD/crawl/ilias/ilias_html_cleaner.py
@@ -39,6 +39,10 @@ _STYLE_TAG_CONTENT = """
       margin: 0.5rem 0;
     }
 
+    img {
+        background-color: white;
+    }
+
     body {
       padding: 1em;
       grid-template-columns: 1fr min(60rem, 90%) 1fr;
@@ -56,12 +60,11 @@ _ARTICLE_WORTHY_CLASSES = [
 def insert_base_markup(soup: BeautifulSoup) -> BeautifulSoup:
     head = soup.new_tag("head")
     soup.insert(0, head)
+    # Force UTF-8 encoding
+    head.append(soup.new_tag("meta", charset="utf-8"))
 
-    simplecss_link: Tag = soup.new_tag("link")
     # <link rel="stylesheet" href="https://cdn.simplecss.org/simple.css">
-    simplecss_link["rel"] = "stylesheet"
-    simplecss_link["href"] = "https://cdn.simplecss.org/simple.css"
-    head.append(simplecss_link)
+    head.append(soup.new_tag("link", rel="stylesheet", href="https://cdn.simplecss.org/simple.css"))
 
     # Basic style tags for compat
     style: Tag = soup.new_tag("style")

From 77fce7daf85101719ef4385ba583dd0aeff35a35 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 17 Apr 2025 11:22:35 +0200
Subject: [PATCH 197/224] Bump version to 3.8.1

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4dbd832..af5bcfb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.8.1 - 2025-04-17
+
 ## Fixed
 - Description html files now specify at UTF-8 encoding
 - Images in descriptions now always have a white background
diff --git a/PFERD/version.py b/PFERD/version.py
index 77c0c6c..d67e528 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.8.0"
+VERSION = "3.8.1"

From bdf17f5c870a51a8bfe7a2072ab17b6c1e66d11c Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 23 Apr 2025 16:03:37 +0200
Subject: [PATCH 198/224] Ignore wikis

---
 CHANGELOG.md                           | 3 +++
 PFERD/crawl/ilias/ilias_web_crawler.py | 8 ++++++++
 PFERD/crawl/ilias/kit_ilias_html.py    | 6 ++++++
 3 files changed, 17 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index af5bcfb..2bc00b6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+## Changed
+- Explicitly mention that wikis are not supported at the moment and ignore them
+
 ## 3.8.1 - 2025-04-17
 
 ## Fixed
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 52ecf92..8ba959a 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -424,6 +424,14 @@ instance's greatest bottleneck.
                 "[bright_black](not descending into linked course)"
             )
             return None
+        elif element.type == IliasElementType.WIKI:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](wikis are not currently supported)"
+            )
+            return None
         elif element.type == IliasElementType.LEARNING_MODULE:
             return await self._handle_learning_module(element, element_path)
         elif element.type == IliasElementType.LINK:
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 5ea17d6..6d3e487 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -120,6 +120,7 @@ class IliasElementType(Enum):
     SCORM_LEARNING_MODULE = "scorm_learning_module"
     SURVEY = "survey"
     TEST = "test"  # an online test. Will be ignored currently.
+    WIKI = "wiki"
 
     def matcher(self) -> IliasElementMatcher:
         match self:
@@ -243,6 +244,11 @@ class IliasElementType(Enum):
                     TypeMatcher.query("cmdclass=iltestscreengui"),
                     TypeMatcher.img_src("_tst.svg")
                 )
+            case IliasElementType.WIKI:
+                return TypeMatcher.any(
+                    TypeMatcher.query("baseClass=ilwikihandlergui"),
+                    TypeMatcher.img_src("wiki.svg")
+                )
 
         raise CrawlWarning(f"Unknown matcher {self}")
 

From b305e1ce2337399b233daf3c881e43308ce065f3 Mon Sep 17 00:00:00 2001
From: Nikolas Heise <nikolas.heise@uni-konstanz.de>
Date: Tue, 22 Apr 2025 13:30:32 +0200
Subject: [PATCH 199/224] Fix login using the native ilias login form

---
 CHANGELOG.md                           |  3 +++
 PFERD/crawl/ilias/ilias_web_crawler.py | 12 +++++-------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2bc00b6..0e8dc10 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,9 @@ ambiguous situations.
 ## Changed
 - Explicitly mention that wikis are not supported at the moment and ignore them
 
+## Fixed
+- Ilias-native login
+
 ## 3.8.1 - 2025-04-17
 
 ## Fixed
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 8ba959a..48396f9 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -1039,7 +1039,7 @@ instance's greatest bottleneck.
             async with self.session.get(urljoin(self._base_url, "/login.php"), params=params) as request:
                 login_page = soupify(await request.read())
 
-            login_form = cast(Optional[Tag], login_page.find("form", attrs={"name": "formlogin"}))
+            login_form = cast(Optional[Tag], login_page.find("form", attrs={"name": "login_form"}))
             if login_form is None:
                 raise CrawlError("Could not find the login form! Specified client id might be invalid.")
 
@@ -1049,14 +1049,12 @@ instance's greatest bottleneck.
 
             username, password = await self._auth.credentials()
 
-            login_data = {
-                "username": username,
-                "password": password,
-                "cmd[doStandardAuthentication]": "Login",
-            }
+            login_form_data = aiohttp.FormData()
+            login_form_data.add_field('login_form/input_3/input_4', username)
+            login_form_data.add_field('login_form/input_3/input_5', password)
 
             # do the actual login
-            async with self.session.post(urljoin(self._base_url, login_url), data=login_data) as request:
+            async with self.session.post(urljoin(self._base_url, login_url), data=login_form_data) as request:
                 soup = IliasSoup(soupify(await request.read()), str(request.url))
                 if not IliasPage.is_logged_in(soup):
                     self._auth.invalidate_credentials()

From 4c230ef6dd216e5fcadc17388e1c17d8a2ee4619 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Fri, 25 Apr 2025 13:45:57 +0200
Subject: [PATCH 200/224] Fix exercise crawling

---
 CHANGELOG.md                           |   1 +
 PFERD/crawl/ilias/ilias_web_crawler.py |   1 +
 PFERD/crawl/ilias/kit_ilias_html.py    | 158 +++++++++++++++----------
 3 files changed, 95 insertions(+), 65 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0e8dc10..e7273a0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,7 @@ ambiguous situations.
 
 ## Fixed
 - Ilias-native login
+- Exercise crawling
 
 ## 3.8.1 - 2025-04-17
 
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 48396f9..3b78e5d 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -107,6 +107,7 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
 _DIRECTORY_PAGES: Set[IliasElementType] = {
     IliasElementType.EXERCISE,
     IliasElementType.EXERCISE_FILES,
+    IliasElementType.EXERCISE_OVERVIEW,
     IliasElementType.FOLDER,
     IliasElementType.INFO_TAB,
     IliasElementType.MEDIACAST_VIDEO_FOLDER,
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 6d3e487..0a09ecc 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -97,7 +97,8 @@ class IliasElementType(Enum):
     BOOKING = "booking"
     COURSE = "course"
     DCL_RECORD_LIST = "dcl_record_list"
-    EXERCISE = "exercise"
+    EXERCISE_OVERVIEW = "exercise_overview"
+    EXERCISE = "exercise"  # own submitted files
     EXERCISE_FILES = "exercise_files"  # own submitted files
     FILE = "file"
     FOLDER = "folder"
@@ -141,13 +142,15 @@ class IliasElementType(Enum):
                     TypeMatcher.query("cmdclass=ildclrecordlistgui")
                 )
             case IliasElementType.EXERCISE:
+                return TypeMatcher.never()
+            case IliasElementType.EXERCISE_FILES:
+                return TypeMatcher.never()
+            case IliasElementType.EXERCISE_OVERVIEW:
                 return TypeMatcher.any(
                     TypeMatcher.path("/exc/"),
                     TypeMatcher.path("_exc_"),
                     TypeMatcher.img_src("_exc.svg"),
                 )
-            case IliasElementType.EXERCISE_FILES:
-                return TypeMatcher.never()
             case IliasElementType.FILE:
                 return TypeMatcher.any(
                     TypeMatcher.query("cmd=sendfile"),
@@ -530,6 +533,8 @@ class IliasPage:
         if self._contains_collapsed_future_meetings():
             log.explain("Requesting *all* future meetings")
             return self._uncollapse_future_meetings_url()
+        if self._is_exercise_not_all_shown():
+            return self._show_all_exercises()
         if not self._is_content_tab_selected():
             if self._page_type != IliasElementType.INFO_TAB:
                 log.explain("Selecting content tab")
@@ -561,7 +566,7 @@ class IliasPage:
 
     def _is_exercise_file(self) -> bool:
         # we know it from before
-        if self._page_type == IliasElementType.EXERCISE:
+        if self._page_type == IliasElementType.EXERCISE_OVERVIEW:
             return True
 
         # We have no suitable parent - let's guesss
@@ -598,6 +603,17 @@ class IliasPage:
         link = self._abs_url_from_link(element)
         return IliasPageElement.create_new(IliasElementType.FOLDER, link, "show all meetings")
 
+    def _is_exercise_not_all_shown(self) -> bool:
+        return (self._page_type == IliasElementType.EXERCISE_OVERVIEW
+                and "mode=all" not in self._page_url.lower())
+
+    def _show_all_exercises(self) -> Optional[IliasPageElement]:
+        return IliasPageElement.create_new(
+            IliasElementType.EXERCISE_OVERVIEW,
+            self._page_url + "&mode=all",
+            "show all exercises"
+        )
+
     def _is_content_tab_selected(self) -> bool:
         return self._select_content_page_url() is None
 
@@ -863,15 +879,62 @@ class IliasPage:
 
     def _find_exercise_entries(self) -> list[IliasPageElement]:
         if self._soup.find(id="tab_submission"):
-            log.explain("Found submission tab. This is an exercise detail page")
-            return self._find_exercise_entries_detail_page()
+            log.explain("Found submission tab. This is an exercise detail or files page")
+            if self._soup.select_one("#tab_submission.active") is None:
+                log.explain("  This is a details page")
+                return self._find_exercise_entries_detail_page()
+            else:
+                log.explain("  This is a files page")
+                return self._find_exercise_entries_files_page()
+
         log.explain("Found no submission tab. This is an exercise root page")
         return self._find_exercise_entries_root_page()
 
     def _find_exercise_entries_detail_page(self) -> list[IliasPageElement]:
         results: list[IliasPageElement] = []
 
-        # Find all download links in the container (this will contain all the files)
+        if link := cast(Optional[Tag], self._soup.select_one("#tab_submission > a")):
+            results.append(IliasPageElement.create_new(
+                IliasElementType.EXERCISE_FILES,
+                self._abs_url_from_link(link),
+                "Submission"
+            ))
+        else:
+            log.explain("Found no submission link for exercise, maybe it has not started yet?")
+
+        # Find all download links in the container (this will contain all the *feedback* files)
+        download_links = cast(list[Tag], self._soup.find_all(
+            name="a",
+            # download links contain the given command class
+            attrs={"href": lambda x: x is not None and "cmd=download" in x},
+            text="Download"
+        ))
+
+        for link in download_links:
+            parent_row: Tag = cast(Tag, link.find_parent(
+                attrs={"class": lambda x: x is not None and "row" in x}))
+            name_tag = cast(Optional[Tag], parent_row.find(name="div"))
+
+            if not name_tag:
+                log.warn("Could not find name tag for exercise entry")
+                _unexpected_html_warning()
+                continue
+
+            name = _sanitize_path_name(name_tag.get_text().strip())
+            log.explain(f"Found exercise detail entry {name!r}")
+
+            results.append(IliasPageElement.create_new(
+                IliasElementType.FILE,
+                self._abs_url_from_link(link),
+                name
+            ))
+
+        return results
+
+    def _find_exercise_entries_files_page(self) -> list[IliasPageElement]:
+        results: list[IliasPageElement] = []
+
+        # Find all download links in the container
         download_links = cast(list[Tag], self._soup.find_all(
             name="a",
             # download links contain the given command class
@@ -884,7 +947,7 @@ class IliasPage:
             children = cast(list[Tag], parent_row.find_all("td"))
 
             name = _sanitize_path_name(children[1].get_text().strip())
-            log.explain(f"Found exercise detail entry {name!r}")
+            log.explain(f"Found exercise file entry {name!r}")
 
             date = None
             for child in reversed(children):
@@ -892,7 +955,7 @@ class IliasPage:
                 if date is not None:
                     break
             if date is None:
-                log.warn(f"Date parsing failed for exercise entry {name!r}")
+                log.warn(f"Date parsing failed for exercise file entry {name!r}")
 
             results.append(IliasPageElement.create_new(
                 IliasElementType.FILE,
@@ -906,66 +969,31 @@ class IliasPage:
     def _find_exercise_entries_root_page(self) -> list[IliasPageElement]:
         results: list[IliasPageElement] = []
 
-        # Each assignment is in an accordion container
-        assignment_containers: list[Tag] = self._soup.select(".il_VAccordionInnerContainer")
+        content_tab = cast(Optional[Tag], self._soup.find(id="ilContentContainer"))
+        if not content_tab:
+            log.warn("Could not find content tab in exercise overview page")
+            _unexpected_html_warning()
+            return []
 
-        for container in assignment_containers:
-            # Fetch the container name out of the header to use it in the path
-            container_name = cast(Tag, container.select_one(".ilAssignmentHeader")).get_text().strip()
-            log.explain(f"Found exercise container {container_name!r}")
+        individual_exercises = content_tab.find_all(
+            name="a",
+            attrs={
+                "href": lambda x: x is not None
+                and "ass_id=" in x
+                and "cmdClass=ilAssignmentPresentationGUI" in x
+            }
+        )
 
-            # Find all download links in the container (this will contain all the files)
-            files = cast(list[Tag], container.find_all(
-                name="a",
-                # download links contain the given command class
-                attrs={"href": lambda x: x is not None and "cmdClass=ilexsubmissiongui" in x},
-                text="Download"
+        for exercise in cast(list[Tag], individual_exercises):
+            name = _sanitize_path_name(exercise.get_text().strip())
+            results.append(IliasPageElement.create_new(
+                IliasElementType.EXERCISE,
+                self._abs_url_from_link(exercise),
+                name
             ))
 
-            # Grab each file as you now have the link
-            for file_link in files:
-                # Two divs, side by side. Left is the name, right is the link ==> get left
-                # sibling
-                file_name = cast(
-                    Tag,
-                    cast(Tag, file_link.parent).find_previous(name="div")
-                ).get_text().strip()
-                url = self._abs_url_from_link(file_link)
-
-                log.explain(f"Found exercise entry {file_name!r}")
-                results.append(IliasPageElement.create_new(
-                    IliasElementType.FILE,
-                    url,
-                    _sanitize_path_name(container_name) + "/" + _sanitize_path_name(file_name),
-                    mtime=None,  # We do not have any timestamp
-                    skip_sanitize=True
-                ))
-
-            # Find all links to file listings (e.g. "Submitted Files" for groups)
-            file_listings = cast(list[Tag], container.find_all(
-                name="a",
-                # download links contain the given command class
-                attrs={"href": lambda x: x is not None and "cmdclass=ilexsubmissionfilegui" in x.lower()}
-            ))
-
-            # Add each listing as a new
-            for listing in file_listings:
-                parent_container = cast(Tag, listing.find_parent(
-                    "div", attrs={"class": lambda x: x is not None and "form-group" in x}
-                ))
-                label_container = cast(Tag, parent_container.find(
-                    attrs={"class": lambda x: x is not None and "control-label" in x}
-                ))
-                file_name = label_container.get_text().strip()
-                url = self._abs_url_from_link(listing)
-                log.explain(f"Found exercise detail {file_name!r} at {url}")
-                results.append(IliasPageElement.create_new(
-                    IliasElementType.EXERCISE_FILES,
-                    url,
-                    _sanitize_path_name(container_name) + "/" + _sanitize_path_name(file_name),
-                    None,  # we do not have any timestamp
-                    skip_sanitize=True
-                ))
+        for result in results:
+            log.explain(f"Found exercise {result.name!r}")
 
         return results
 

From 77a23265a9c7433012fab4d893ae96ed294207f4 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 29 Apr 2025 17:55:57 +0200
Subject: [PATCH 201/224] Bump version to 3.8.2

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e7273a0..20a39b0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.8.2 - 2025-04-29
+
 ## Changed
 - Explicitly mention that wikis are not supported at the moment and ignore them
 
diff --git a/PFERD/version.py b/PFERD/version.py
index d67e528..12c568a 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.8.1"
+VERSION = "3.8.2"

From 8caad0008d049d5676a6c40f2e77110e106a6291 Mon Sep 17 00:00:00 2001
From: PinieP <59698589+PinieP@users.noreply.github.com>
Date: Mon, 5 May 2025 22:05:54 +0200
Subject: [PATCH 202/224] Fix check for nonexistent `ilias_url` command
 attribute to `base_url` (#113)

---
 PFERD/cli/command_ilias_web.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/PFERD/cli/command_ilias_web.py b/PFERD/cli/command_ilias_web.py
index 77a1657..5efec20 100644
--- a/PFERD/cli/command_ilias_web.py
+++ b/PFERD/cli/command_ilias_web.py
@@ -45,8 +45,8 @@ def load(
     load_crawler(args, section)
 
     section["type"] = COMMAND_NAME
-    if args.ilias_url is not None:
-        section["base_url"] = args.ilias_url
+    if args.base_url is not None:
+        section["base_url"] = args.base_url
     if args.client_id is not None:
         section["client_id"] = args.client_id
 

From 2b0d20a1f626292f310ffa21dc7a2683ae6b9066 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 29 Apr 2025 18:30:33 +0200
Subject: [PATCH 203/224] Fix crawling of exercises with instructions

We do not want a second path and the instruction field has an identical
link...
---
 CHANGELOG.md                        |  3 +++
 PFERD/crawl/ilias/kit_ilias_html.py | 19 ++++++++++---------
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 20a39b0..de29b58 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+## Fixed
+- Crawling of exercises with instructions
+
 ## 3.8.2 - 2025-04-29
 
 ## Changed
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 0a09ecc..105c606 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -975,16 +975,17 @@ class IliasPage:
             _unexpected_html_warning()
             return []
 
-        individual_exercises = content_tab.find_all(
-            name="a",
-            attrs={
-                "href": lambda x: x is not None
-                and "ass_id=" in x
-                and "cmdClass=ilAssignmentPresentationGUI" in x
-            }
-        )
+        exercise_links = content_tab.select(".il-item-title a")
+
+        for exercise in cast(list[Tag], exercise_links):
+            if "href" not in exercise.attrs:
+                continue
+            href = exercise.attrs["href"]
+            if type(href) is not str:
+                continue
+            if "ass_id=" not in href or "cmdclass=ilassignmentpresentationgui" not in href.lower():
+                continue
 
-        for exercise in cast(list[Tag], individual_exercises):
             name = _sanitize_path_name(exercise.get_text().strip())
             results.append(IliasPageElement.create_new(
                 IliasElementType.EXERCISE,

From 34564cedb44f4712656a2e48ae3b8fd0a8837c41 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 26 May 2025 15:59:25 +0200
Subject: [PATCH 204/224] Add support for link collections

---
 CHANGELOG.md                           |   6 ++
 PFERD/crawl/ilias/file_templates.py    |  95 ++++++++++++++---
 PFERD/crawl/ilias/ilias_web_crawler.py | 142 +++++++++++++++----------
 PFERD/crawl/ilias/kit_ilias_html.py    |   8 +-
 4 files changed, 180 insertions(+), 71 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index de29b58..f9bf6d0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,12 @@ ambiguous situations.
 
 ## Unreleased
 
+## Added
+- Support for link collections.  
+  In "fancy" mode, a single HTML file with multiple links is generated.
+  In all other modes, PFERD creates a folder for the collection and a new file
+  for every link inside.
+
 ## Fixed
 - Crawling of exercises with instructions
 
diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index ae8bb1e..f959917 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -1,3 +1,5 @@
+import dataclasses
+import re
 from enum import Enum
 from typing import Optional, cast
 
@@ -12,7 +14,9 @@ _link_template_fancy = """
     <head>
         <meta charset="UTF-8">
         <title>ILIAS - Link: {{name}}</title>
+        <!-- REPEAT REMOVE START -->
         <meta http-equiv = "refresh" content = "{{redirect_delay}}; url = {{link}}" />
+        <!-- REPEAT REMOVE END -->
     </head>
 
     <style>
@@ -23,6 +27,8 @@ _link_template_fancy = """
         display: flex;
         align-items: center;
         justify-content: center;
+        flex-direction: column;
+        gap: 4px;
     }
     body {
         padding: 0;
@@ -31,11 +37,16 @@ _link_template_fancy = """
         font-family: "Open Sans", Verdana, Arial, Helvetica, sans-serif;
         height: 100vh;
     }
-    .row {
-        background-color: white;
+    .column {
         min-width: 500px;
         max-width: 90vw;
         display: flex;
+        flex-direction: column;
+        row-gap: 5px;
+    }
+    .row {
+        background-color: white;
+        display: flex;
         padding: 1em;
     }
     .logo {
@@ -75,19 +86,23 @@ _link_template_fancy = """
     }
     </style>
     <body class="center-flex">
-        <div class="row">
-            <div class="logo center-flex">
-                <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24">
-                    <path d="M12 0c-6.627 0-12 5.373-12 12s5.373 12 12 12 12-5.373 12-12-5.373-12-12-12zm9.567 9.098c-.059-.058-.127-.108-.206-.138-.258-.101-1.35.603-1.515.256-.108-.231-.327.148-.578.008-.121-.067-.459-.52-.611-.465-.312.112.479.974.694 1.087.203-.154.86-.469 1.002-.039.271.812-.745 1.702-1.264 2.171-.775.702-.63-.454-1.159-.86-.277-.213-.274-.667-.555-.824-.125-.071-.7-.732-.694-.821l-.017.167c-.095.072-.297-.27-.319-.325 0 .298.485.772.646 1.011.273.409.42 1.005.756 1.339.179.18.866.923 1.045.908l.921-.437c.649.154-1.531 3.237-1.738 3.619-.171.321.139 1.112.114 1.49-.029.437-.374.579-.7.817-.35.255-.268.752-.562.934-.521.321-.897 1.366-1.639 1.361-.219-.001-1.151.364-1.273.007-.095-.258-.223-.455-.356-.71-.131-.25-.015-.51-.175-.731-.11-.154-.479-.502-.513-.684-.002-.157.118-.632.283-.715.231-.118.044-.462.016-.663-.048-.357-.27-.652-.535-.859-.393-.302-.189-.542-.098-.974 0-.206-.126-.476-.402-.396-.57.166-.396-.445-.812-.417-.299.021-.543.211-.821.295-.349.104-.707-.083-1.053-.126-1.421-.179-1.885-1.804-1.514-2.976.037-.192-.115-.547-.048-.696.159-.352.485-.752.768-1.021.16-.152.365-.113.553-.231.29-.182.294-.558.578-.789.404-.328.956-.321 1.482-.392.281-.037 1.35-.268 1.518-.06 0 .039.193.611-.019.578.438.023 1.061.756 1.476.585.213-.089.135-.744.573-.427.265.19 1.45.275 1.696.07.152-.125.236-.939.053-1.031.117.116-.618.125-.686.099-.122-.044-.235.115-.43.025.117.055-.651-.358-.22-.674-.181.132-.349-.037-.544.109-.135.109.062.181-.13.277-.305.155-.535-.53-.649-.607-.118-.077-1.024-.713-.777-.298l.797.793c-.04.026-.209-.289-.209-.059.053-.136.02.585-.105.35-.056-.09.091-.14.006-.271 0-.085-.23-.169-.275-.228-.126-.157-.462-.502-.644-.585-.05-.024-.771.088-.832.111-.071.099-.131.203-.181.314-.149.055-.29.127-.423.216l-.159.356c-.068.061-.772.294-.776.303.03-.076-.492-.172-.457-.324.038-.167.215-.687.169-.877-.048-.199 1.085.287 1.158-.238.029-.227.047-.492-.316-.531.069.008.702-.249.807-.364.148-.169.486-.447.731-.447.286 0 .225-.417.356-.622.133.053-.071.38.088.512-.01-.104.45.057.494.033.105-.056.691-.023.601-.299-.101-.28.052-.197.183-.255-.02.008.248-.458.363-.456-.104-.089-.398.112-.516.103-.308-.024-.177-.525-.061-.672.09-.116-.246-.258-.25-.036-.006.332-.314.633-.243 1.075.109.666-.743-.161-.816-.115-.283.172-.515-.216-.368-.449.149-.238.51-.226.659-.48.104-.179.227-.389.388-.524.541-.454.689-.091 1.229-.042.526.048.178.125.105.327-.07.192.289.261.413.1.071-.092.232-.326.301-.499.07-.175.578-.2.527-.365 2.72 1.148 4.827 3.465 5.694 6.318zm-11.113-3.779l.068-.087.073-.019c.042-.034.086-.118.151-.104.043.009.146.095.111.148-.037.054-.066-.049-.081.101-.018.169-.188.167-.313.222-.087.037-.175-.018-.09-.104l.088-.108-.007-.049zm.442.245c.046-.045.138-.008.151-.094.014-.084.078-.178-.008-.335-.022-.042.116-.082.051-.137l-.109.032s.155-.668.364-.366l-.089.103c.135.134.172.47.215.687.127.066.324.078.098.192.117-.02-.618.314-.715.178-.072-.083.317-.139.307-.173-.004-.011-.317-.02-.265-.087zm1.43-3.547l-.356.326c-.36.298-1.28.883-1.793.705-.524-.18-1.647.667-1.826.673-.067.003.002-.641.36-.689-.141.021.993-.575 1.185-.805.678-.146 1.381-.227 2.104-.227l.326.017zm-5.086 1.19c.07.082.278.092-.026.288-.183.11-.377.809-.548.809-.51.223-.542-.439-1.109.413-.078.115-.395.158-.644.236.685-.688 1.468-1.279 2.327-1.746zm-5.24 8.793c0-.541.055-1.068.139-1.586l.292.185c.113.135.113.719.169.911.139.482.484.751.748 1.19.155.261.414.923.332 1.197.109-.179 1.081.824 1.259 1.033.418.492.74 1.088.061 1.574-.219.158.334 1.14.049 1.382l-.365.094c-.225.138-.235.397-.166.631-1.562-1.765-2.518-4.076-2.518-6.611zm14.347-5.823c.083-.01-.107.167-.107.167.033.256.222.396.581.527.437.157.038.455-.213.385-.139-.039-.854-.255-.879.025 0 .167-.679.001-.573-.175.073-.119.05-.387.186-.562.193-.255.38-.116.386.032-.001.394.398-.373.619-.399z"/>
-                </svg>
-            </div>
-            <div class="tile">
-                <div class="top-row">
-                    <a href="{{link}}">{{name}}</a>
+        <div class="column">
+        <!-- REPEAT START -->
+            <div class="row">
+                <div class="logo center-flex">
+                    <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24">
+                        <path d="M12 0c-6.627 0-12 5.373-12 12s5.373 12 12 12 12-5.373 12-12-5.373-12-12-12zm9.567 9.098c-.059-.058-.127-.108-.206-.138-.258-.101-1.35.603-1.515.256-.108-.231-.327.148-.578.008-.121-.067-.459-.52-.611-.465-.312.112.479.974.694 1.087.203-.154.86-.469 1.002-.039.271.812-.745 1.702-1.264 2.171-.775.702-.63-.454-1.159-.86-.277-.213-.274-.667-.555-.824-.125-.071-.7-.732-.694-.821l-.017.167c-.095.072-.297-.27-.319-.325 0 .298.485.772.646 1.011.273.409.42 1.005.756 1.339.179.18.866.923 1.045.908l.921-.437c.649.154-1.531 3.237-1.738 3.619-.171.321.139 1.112.114 1.49-.029.437-.374.579-.7.817-.35.255-.268.752-.562.934-.521.321-.897 1.366-1.639 1.361-.219-.001-1.151.364-1.273.007-.095-.258-.223-.455-.356-.71-.131-.25-.015-.51-.175-.731-.11-.154-.479-.502-.513-.684-.002-.157.118-.632.283-.715.231-.118.044-.462.016-.663-.048-.357-.27-.652-.535-.859-.393-.302-.189-.542-.098-.974 0-.206-.126-.476-.402-.396-.57.166-.396-.445-.812-.417-.299.021-.543.211-.821.295-.349.104-.707-.083-1.053-.126-1.421-.179-1.885-1.804-1.514-2.976.037-.192-.115-.547-.048-.696.159-.352.485-.752.768-1.021.16-.152.365-.113.553-.231.29-.182.294-.558.578-.789.404-.328.956-.321 1.482-.392.281-.037 1.35-.268 1.518-.06 0 .039.193.611-.019.578.438.023 1.061.756 1.476.585.213-.089.135-.744.573-.427.265.19 1.45.275 1.696.07.152-.125.236-.939.053-1.031.117.116-.618.125-.686.099-.122-.044-.235.115-.43.025.117.055-.651-.358-.22-.674-.181.132-.349-.037-.544.109-.135.109.062.181-.13.277-.305.155-.535-.53-.649-.607-.118-.077-1.024-.713-.777-.298l.797.793c-.04.026-.209-.289-.209-.059.053-.136.02.585-.105.35-.056-.09.091-.14.006-.271 0-.085-.23-.169-.275-.228-.126-.157-.462-.502-.644-.585-.05-.024-.771.088-.832.111-.071.099-.131.203-.181.314-.149.055-.29.127-.423.216l-.159.356c-.068.061-.772.294-.776.303.03-.076-.492-.172-.457-.324.038-.167.215-.687.169-.877-.048-.199 1.085.287 1.158-.238.029-.227.047-.492-.316-.531.069.008.702-.249.807-.364.148-.169.486-.447.731-.447.286 0 .225-.417.356-.622.133.053-.071.38.088.512-.01-.104.45.057.494.033.105-.056.691-.023.601-.299-.101-.28.052-.197.183-.255-.02.008.248-.458.363-.456-.104-.089-.398.112-.516.103-.308-.024-.177-.525-.061-.672.09-.116-.246-.258-.25-.036-.006.332-.314.633-.243 1.075.109.666-.743-.161-.816-.115-.283.172-.515-.216-.368-.449.149-.238.51-.226.659-.48.104-.179.227-.389.388-.524.541-.454.689-.091 1.229-.042.526.048.178.125.105.327-.07.192.289.261.413.1.071-.092.232-.326.301-.499.07-.175.578-.2.527-.365 2.72 1.148 4.827 3.465 5.694 6.318zm-11.113-3.779l.068-.087.073-.019c.042-.034.086-.118.151-.104.043.009.146.095.111.148-.037.054-.066-.049-.081.101-.018.169-.188.167-.313.222-.087.037-.175-.018-.09-.104l.088-.108-.007-.049zm.442.245c.046-.045.138-.008.151-.094.014-.084.078-.178-.008-.335-.022-.042.116-.082.051-.137l-.109.032s.155-.668.364-.366l-.089.103c.135.134.172.47.215.687.127.066.324.078.098.192.117-.02-.618.314-.715.178-.072-.083.317-.139.307-.173-.004-.011-.317-.02-.265-.087zm1.43-3.547l-.356.326c-.36.298-1.28.883-1.793.705-.524-.18-1.647.667-1.826.673-.067.003.002-.641.36-.689-.141.021.993-.575 1.185-.805.678-.146 1.381-.227 2.104-.227l.326.017zm-5.086 1.19c.07.082.278.092-.026.288-.183.11-.377.809-.548.809-.51.223-.542-.439-1.109.413-.078.115-.395.158-.644.236.685-.688 1.468-1.279 2.327-1.746zm-5.24 8.793c0-.541.055-1.068.139-1.586l.292.185c.113.135.113.719.169.911.139.482.484.751.748 1.19.155.261.414.923.332 1.197.109-.179 1.081.824 1.259 1.033.418.492.74 1.088.061 1.574-.219.158.334 1.14.049 1.382l-.365.094c-.225.138-.235.397-.166.631-1.562-1.765-2.518-4.076-2.518-6.611zm14.347-5.823c.083-.01-.107.167-.107.167.033.256.222.396.581.527.437.157.038.455-.213.385-.139-.039-.854-.255-.879.025 0 .167-.679.001-.573-.175.073-.119.05-.387.186-.562.193-.255.38-.116.386.032-.001.394.398-.373.619-.399z"/>
+                    </svg>
                 </div>
-                <div class="bottom-row">{{description}}</div>
+                <div class="tile">
+                    <div class="top-row">
+                        <a href="{{link}}">{{name}}</a>
+                    </div>
+                    <div class="bottom-row">{{description}}</div>
+                </div>
+                <div class="menu-button center-flex"> ⯆ </div>
             </div>
-            <div class="menu-button center-flex"> ⯆ </div>
+        <!-- REPEAT END -->
         </div>
     </body>
 </html>
@@ -255,6 +270,13 @@ def forum_thread_template(name: str, url: str, heading: bs4.Tag, content: bs4.Ta
         .replace("{{content}}", cast(str, content.prettify()))
 
 
+@dataclasses.dataclass
+class LinkData:
+    name: str
+    url: str
+    description: str
+
+
 class Links(Enum):
     IGNORE = "ignore"
     PLAINTEXT = "plaintext"
@@ -272,6 +294,11 @@ class Links(Enum):
             return None
         raise ValueError("Missing switch case")
 
+    def collection_as_one(self) -> bool:
+        if self == Links.FANCY:
+            return True
+        return False
+
     def extension(self) -> Optional[str]:
         if self == Links.FANCY:
             return ".html"
@@ -283,10 +310,48 @@ class Links(Enum):
             return None
         raise ValueError("Missing switch case")
 
+    def interpolate(self, redirect_delay: int, collection_name: str, links: list[LinkData]) -> str:
+        template = self.template()
+        if template is None:
+            raise ValueError("Cannot interpolate ignored links")
+
+        if len(links) == 1:
+            link = links[0]
+            content = template
+            content = content.replace("{{link}}", link.url)
+            content = content.replace("{{name}}", link.name)
+            content = content.replace("{{description}}", link.description)
+            content = content.replace("{{redirect_delay}}", str(redirect_delay))
+            return content
+        if self == Links.PLAINTEXT or self == Links.INTERNET_SHORTCUT:
+            return "\n".join(f"{link.url}" for link in links)
+
+        # All others get coerced to fancy
+        content = cast(str, Links.FANCY.template())
+        repeated_content = cast(
+            re.Match[str],
+            re.search(r"<!-- REPEAT START -->([\s\S]+)<!-- REPEAT END -->", content)
+        ).group(1)
+
+        parts = []
+        for link in links:
+            instance = repeated_content
+            instance = instance.replace("{{link}}", link.url)
+            instance = instance.replace("{{name}}", link.name)
+            instance = instance.replace("{{description}}", link.description)
+            instance = instance.replace("{{redirect_delay}}", str(redirect_delay))
+            parts.append(instance)
+
+        content = content.replace(repeated_content, "\n".join(parts))
+        content = content.replace("{{name}}", collection_name)
+        content = re.sub(r"<!-- REPEAT REMOVE START -->[\s\S]+<!-- REPEAT REMOVE END -->", "", content)
+
+        return content
+
     @staticmethod
     def from_string(string: str) -> "Links":
         try:
             return Links(string)
         except ValueError:
-            raise ValueError("must be one of 'ignore', 'plaintext',"
-                             " 'html', 'internet-shortcut'")
+            options = [f"'{option.value}'" for option in Links]
+            raise ValueError(f"must be one of {', '.join(options)}")
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 3b78e5d..b682c0a 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -19,7 +19,7 @@ from ...utils import fmt_path, soupify, url_set_query_param
 from ..crawler import CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
 from ..http_crawler import HttpCrawler, HttpCrawlerSection
 from .async_helper import _iorepeat
-from .file_templates import Links, forum_thread_template, learning_module_template
+from .file_templates import LinkData, Links, forum_thread_template, learning_module_template
 from .ilias_html_cleaner import clean, insert_base_markup
 from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
                              IliasPageElement, IliasSoup, _sanitize_path_name, parse_ilias_forum_export)
@@ -437,6 +437,8 @@ instance's greatest bottleneck.
             return await self._handle_learning_module(element, element_path)
         elif element.type == IliasElementType.LINK:
             return await self._handle_link(element, element_path)
+        elif element.type == IliasElementType.LINK_COLLECTION:
+            return await self._handle_link(element, element_path)
         elif element.type == IliasElementType.BOOKING:
             return await self._handle_booking(element, element_path)
         elif element.type == IliasElementType.OPENCAST_VIDEO:
@@ -462,44 +464,97 @@ instance's greatest bottleneck.
         log.explain_topic(f"Decision: Crawl Link {fmt_path(element_path)}")
         log.explain(f"Links type is {self._links}")
 
-        link_template_maybe = self._links.template()
-        link_extension = self._links.extension()
-        if not link_template_maybe or not link_extension:
+        export_url = url_set_query_param(element.url, "cmd", "exportHTML")
+        resolved = await self._resolve_link_target(export_url)
+        if resolved == "none":
+            links = [LinkData(element.name, "", element.description or "")]
+        else:
+            links = self._parse_link_content(element, cast(BeautifulSoup, resolved))
+
+        maybe_extension = self._links.extension()
+
+        if not maybe_extension:
             log.explain("Answer: No")
             return None
         else:
             log.explain("Answer: Yes")
-        element_path = element_path.with_name(element_path.name + link_extension)
 
-        maybe_dl = await self.download(element_path, mtime=element.mtime)
-        if not maybe_dl:
+        if len(links) <= 1 or self._links.collection_as_one():
+            element_path = element_path.with_name(element_path.name + maybe_extension)
+            maybe_dl = await self.download(element_path, mtime=element.mtime)
+            if not maybe_dl:
+                return None
+            return self._download_link(self._links, element.name, links, maybe_dl)
+
+        maybe_cl = await self.crawl(element_path)
+        if not maybe_cl:
             return None
+        # Required for download_all closure
+        cl = maybe_cl
+        extension = maybe_extension
 
-        return self._download_link(element, link_template_maybe, maybe_dl)
+        async def download_all() -> None:
+            for link in links:
+                path = cl.path / (_sanitize_path_name(link.name) + extension)
+                if dl := await self.download(path, mtime=element.mtime):
+                    await self._download_link(self._links, element.name, [link], dl)
+
+        return download_all()
 
     @anoncritical
     @_iorepeat(3, "resolving link")
-    async def _download_link(self, element: IliasPageElement, link_template: str, dl: DownloadToken) -> None:
-        async with dl as (bar, sink):
-            export_url = element.url.replace("cmd=calldirectlink", "cmd=exportHTML")
-            real_url = await self._resolve_link_target(export_url)
-            self._write_link_content(link_template, real_url, element.name, element.description, sink)
-
-    def _write_link_content(
+    async def _download_link(
         self,
-        link_template: str,
-        url: str,
-        name: str,
-        description: Optional[str],
-        sink: FileSink,
+        link_renderer: Links,
+        collection_name: str,
+        links: list[LinkData],
+        dl: DownloadToken
     ) -> None:
-        content = link_template
-        content = content.replace("{{link}}", url)
-        content = content.replace("{{name}}", name)
-        content = content.replace("{{description}}", str(description))
-        content = content.replace("{{redirect_delay}}", str(self._link_file_redirect_delay))
-        sink.file.write(content.encode("utf-8"))
-        sink.done()
+        async with dl as (bar, sink):
+            rendered = link_renderer.interpolate(self._link_file_redirect_delay, collection_name, links)
+            sink.file.write(rendered.encode("utf-8"))
+            sink.done()
+
+    async def _resolve_link_target(self, export_url: str) -> Union[BeautifulSoup, Literal['none']]:
+        async def impl() -> Optional[Union[BeautifulSoup, Literal['none']]]:
+            async with self.session.get(export_url, allow_redirects=False) as resp:
+                # No redirect means we were authenticated
+                if hdrs.LOCATION not in resp.headers:
+                    return soupify(await resp.read())  # .select_one("a").get("href").strip()  # type: ignore
+                # We are either unauthenticated or the link is not active
+                new_url = resp.headers[hdrs.LOCATION].lower()
+                if "baseclass=illinkresourcehandlergui" in new_url and "cmd=infoscreen" in new_url:
+                    return "none"
+                return None
+
+        auth_id = await self._current_auth_id()
+        target = await impl()
+        if target is not None:
+            return target
+
+        await self.authenticate(auth_id)
+
+        target = await impl()
+        if target is not None:
+            return target
+
+        raise CrawlError("resolve_link_target failed even after authenticating")
+
+    @staticmethod
+    def _parse_link_content(element: IliasPageElement, content: BeautifulSoup) -> list[LinkData]:
+        links = cast(list[Tag], list(content.select("a")))
+        if len(links) == 1:
+            url = str(links[0].get("href")).strip()
+            return [LinkData(name=element.name, description=element.description or "", url=url)]
+
+        results = []
+        for link in links:
+            url = str(link.get("href")).strip()
+            name = link.get_text(strip=True)
+            description = cast(Tag, link.find_next_sibling("dd")).get_text(strip=True)
+            results.append(LinkData(name=name, description=description, url=url.strip()))
+
+        return results
 
     async def _handle_booking(
         self,
@@ -524,7 +579,7 @@ instance's greatest bottleneck.
 
         self._ensure_not_seen(element, element_path)
 
-        return self._download_booking(element, link_template_maybe, maybe_dl)
+        return self._download_booking(element, maybe_dl)
 
     @anoncritical
     @_iorepeat(1, "downloading description")
@@ -545,36 +600,13 @@ instance's greatest bottleneck.
     async def _download_booking(
         self,
         element: IliasPageElement,
-        link_template: str,
         dl: DownloadToken,
     ) -> None:
         async with dl as (bar, sink):
-            self._write_link_content(link_template, element.url, element.name, element.description, sink)
-
-    async def _resolve_link_target(self, export_url: str) -> str:
-        async def impl() -> Optional[str]:
-            async with self.session.get(export_url, allow_redirects=False) as resp:
-                # No redirect means we were authenticated
-                if hdrs.LOCATION not in resp.headers:
-                    return soupify(await resp.read()).select_one("a").get("href").strip()  # type: ignore
-                # We are either unauthenticated or the link is not active
-                new_url = resp.headers[hdrs.LOCATION].lower()
-                if "baseclass=illinkresourcehandlergui" in new_url and "cmd=infoscreen" in new_url:
-                    return ""
-                return None
-
-        auth_id = await self._current_auth_id()
-        target = await impl()
-        if target is not None:
-            return target
-
-        await self.authenticate(auth_id)
-
-        target = await impl()
-        if target is not None:
-            return target
-
-        raise CrawlError("resolve_link_target failed even after authenticating")
+            links = [LinkData(name=element.name, description=element.description or "", url=element.url)]
+            rendered = self._links.interpolate(self._link_file_redirect_delay, element.name, links)
+            sink.file.write(rendered.encode("utf-8"))
+            sink.done()
 
     async def _handle_opencast_video(
         self,
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 105c606..70ec3d7 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -109,6 +109,7 @@ class IliasElementType(Enum):
     LEARNING_MODULE_HTML = "learning_module_html"
     LITERATURE_LIST = "literature_list"
     LINK = "link"
+    LINK_COLLECTION = "link_collection"
     MEDIA_POOL = "media_pool"
     MEDIACAST_VIDEO = "mediacast_video"
     MEDIACAST_VIDEO_FOLDER = "mediacast_video_folder"
@@ -202,7 +203,12 @@ class IliasElementType(Enum):
                         TypeMatcher.query("baseclass=illinkresourcehandlergui"),
                         TypeMatcher.query("calldirectlink"),
                     ),
-                    TypeMatcher.img_src("_webr.svg")
+                    TypeMatcher.img_src("_webr.svg")  # duplicated :(
+                )
+            case IliasElementType.LINK_COLLECTION:
+                return TypeMatcher.any(
+                    TypeMatcher.query("baseclass=illinkresourcehandlergui"),
+                    TypeMatcher.img_src("_webr.svg")  # duplicated :(
                 )
             case IliasElementType.MEDIA_POOL:
                 return TypeMatcher.any(

From 549ce6cce911f298eb0ea16c6e00dca2880d7dc4 Mon Sep 17 00:00:00 2001
From: "Mr. Pine" <git@mr-pine.de>
Date: Wed, 28 May 2025 17:04:57 +0200
Subject: [PATCH 205/224] Ignore unavailable elements (#119)

---
 CHANGELOG.md                           | 3 +++
 PFERD/crawl/ilias/ilias_web_crawler.py | 9 +++++++++
 2 files changed, 12 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f9bf6d0..59cc6fe 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,6 +30,9 @@ ambiguous situations.
 
 ## Fixed
 - Crawling of exercises with instructions
+- Don't download unavailable elements.  
+  Elements that are unavailable (for example, because their availability is
+  time restricted) will not download the HTML for the info page anymore.
 
 ## 3.8.2 - 2025-04-29
 
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index b682c0a..2eb8e9c 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -329,6 +329,15 @@ instance's greatest bottleneck.
         # directory escape attacks.
         element_path = PurePath(parent_path, element.name)
 
+        # This is symptomatic of no access to the element, for example, because
+        # of time availability restrictions.
+        if "cmdClass=ilInfoScreenGUI" in element.url and "cmd=showSummary" in element.url:
+            log.explain(
+                "Skipping element as url points to info screen, "
+                "this should only happen with not-yet-released elements"
+            )
+            return None
+
         if element.type in _VIDEO_ELEMENTS:
             if not self._videos:
                 log.status(

From 56e30659504b1583a8ce165d517bb93933b3c9f6 Mon Sep 17 00:00:00 2001
From: Christian Schliz <github@foxat.de>
Date: Fri, 30 May 2025 17:13:45 +0200
Subject: [PATCH 206/224] Document usage of pilot.ilias.studium.kit.edu (#111)

---
 CONFIG.md | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index 9b79be8..201ddde 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -163,13 +163,14 @@ out of the box for the corresponding universities:
 
 [ilias-dl]: https://github.com/V3lop5/ilias-downloader/blob/main/configs "ilias-downloader configs"
 
-| University    | `base_url`                              | `login_type` | `client_id`   |
-|---------------|-----------------------------------------|--------------|---------------|
-| FH Aachen     | https://www.ili.fh-aachen.de            | local        | elearning     |
-| Uni Köln      | https://www.ilias.uni-koeln.de/ilias    | local        | uk            |
-| Uni Konstanz  | https://ilias.uni-konstanz.de           | local        | ILIASKONSTANZ |
-| Uni Stuttgart | https://ilias3.uni-stuttgart.de         | local        | Uni_Stuttgart |
-| Uni Tübingen  | https://ovidius.uni-tuebingen.de/ilias3 | shibboleth   |               |
+| University      | `base_url`                              | `login_type` | `client_id`   |
+|-----------------|-----------------------------------------|--------------|---------------|
+| FH Aachen       | https://www.ili.fh-aachen.de            | local        | elearning     |
+| Uni Köln        | https://www.ilias.uni-koeln.de/ilias    | local        | uk            |
+| Uni Konstanz    | https://ilias.uni-konstanz.de           | local        | ILIASKONSTANZ |
+| Uni Stuttgart   | https://ilias3.uni-stuttgart.de         | local        | Uni_Stuttgart |
+| Uni Tübingen    | https://ovidius.uni-tuebingen.de/ilias3 | shibboleth   |               |
+| KIT ILIAS Pilot | https://pilot.ilias.studium.kit.edu     | shibboleth   | pilot         |
 
 If your university isn't listed, try navigating to your instance's login page.
 Assuming no custom login service is used, the URL will look something like this:

From 27e69af2f3cc0371f457e387b451c73eded43929 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 1 Jul 2025 14:26:10 +0200
Subject: [PATCH 207/224] Update changelog for 8caad00

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 59cc6fe..997d780 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -33,6 +33,7 @@ ambiguous situations.
 - Don't download unavailable elements.  
   Elements that are unavailable (for example, because their availability is
   time restricted) will not download the HTML for the info page anymore.
+- `base_url` argument for `ilias-web` crawler causing crashes
 
 ## 3.8.2 - 2025-04-29
 

From 465f8b28c0dc2a4abcebc846a3b2066701c78785 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 1 Jul 2025 14:28:30 +0200
Subject: [PATCH 208/224] Bump version to 3.8.3

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 997d780..5fdec53 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.8.3 - 2025-07-01
+
 ## Added
 - Support for link collections.  
   In "fancy" mode, a single HTML file with multiple links is generated.
diff --git a/PFERD/version.py b/PFERD/version.py
index 12c568a..c6c8b14 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.8.2"
+VERSION = "3.8.3"

From 3755f593ff60e06ffbeab01135f924c1f3664453 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 1 Jul 2025 14:33:11 +0200
Subject: [PATCH 209/224] Update nix flake to 25.05

---
 flake.lock | 8 ++++----
 flake.nix  | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/flake.lock b/flake.lock
index d9326af..8d211fc 100644
--- a/flake.lock
+++ b/flake.lock
@@ -2,16 +2,16 @@
   "nodes": {
     "nixpkgs": {
       "locked": {
-        "lastModified": 1744440957,
-        "narHash": "sha256-FHlSkNqFmPxPJvy+6fNLaNeWnF1lZSgqVCl/eWaJRc4=",
+        "lastModified": 1751211869,
+        "narHash": "sha256-1Cu92i1KSPbhPCKxoiVG5qnoRiKTgR5CcGSRyLpOd7Y=",
         "owner": "NixOS",
         "repo": "nixpkgs",
-        "rev": "26d499fc9f1d567283d5d56fcf367edd815dba1d",
+        "rev": "b43c397f6c213918d6cfe6e3550abfe79b5d1c51",
         "type": "github"
       },
       "original": {
         "owner": "NixOS",
-        "ref": "nixos-24.11",
+        "ref": "nixos-25.05",
         "repo": "nixpkgs",
         "type": "github"
       }
diff --git a/flake.nix b/flake.nix
index c8dbe0c..7027e20 100644
--- a/flake.nix
+++ b/flake.nix
@@ -2,7 +2,7 @@
   description = "Tool for downloading course-related files from ILIAS";
 
   inputs = {
-    nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11";
+    nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05";
   };
 
   outputs = { self, nixpkgs }:

From 207af51aa49d021d2ea4fd774044a0772a103a08 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 27 Jul 2025 20:13:05 +0200
Subject: [PATCH 210/224] Include description in internet-shortcut links

---
 CHANGELOG.md                        | 3 +++
 PFERD/crawl/ilias/file_templates.py | 1 +
 2 files changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5fdec53..7da225b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+## Added
+- Store the description when using the `internet-shortcut` link format
+
 ## 3.8.3 - 2025-07-01
 
 ## Added
diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index f959917..f256dd8 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -111,6 +111,7 @@ _link_template_fancy = """
 _link_template_internet_shortcut = """
 [InternetShortcut]
 URL={{link}}
+Desc={{description}}
 """.strip()
 
 _learning_module_template = """

From f6c713d62198f9970d81b56dbf86fcb04f760629 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 19 Oct 2025 15:08:21 +0200
Subject: [PATCH 211/224] Fix mypy errors

---
 PFERD/crawl/ilias/ilias_web_crawler.py | 2 --
 PFERD/crawl/ilias/kit_ilias_html.py    | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 2eb8e9c..ee1de9c 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -985,8 +985,6 @@ instance's greatest bottleneck.
         """
         log.explain_topic("Internalizing images")
         for elem in tag.find_all(recursive=True):
-            if not isinstance(elem, Tag):
-                continue
             if elem.name == "img":
                 if src := elem.attrs.get("src", None):
                     url = urljoin(self._base_url, cast(str, src))
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 70ec3d7..5b88e8d 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -435,7 +435,7 @@ class IliasPage:
         return None
 
     def get_description(self) -> Optional[BeautifulSoup]:
-        def is_interesting_class(name: str) -> bool:
+        def is_interesting_class(name: str | None) -> bool:
             return name in [
                 "ilCOPageSection", "ilc_Paragraph", "ilc_va_ihcap_VAccordIHeadCap",
                 "ilc_va_ihcap_AccordIHeadCap", "ilc_media_cont_MediaContainer"
@@ -1243,7 +1243,7 @@ class IliasPage:
         # In a series of divs.
         # Find the parent containing all those divs, so we can filter our what we need
         properties_parent = cast(Tag, cast(Tag, link_element.find_parent(
-            "div", {"class": lambda x: "il_ContainerListItem" in x}
+            "div", {"class": lambda x: x is not None and "il_ContainerListItem" in x}
         )).select_one(".il_ItemProperties"))
         # The first one is always the filetype
         file_type = cast(Tag, properties_parent.select_one("span.il_ItemProperty")).get_text().strip()

From ee4625be784263c979414d7c688c9243c0970967 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 19 Oct 2025 15:08:29 +0200
Subject: [PATCH 212/224] Hardcode max line length in scripts/check

---
 scripts/check | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/check b/scripts/check
index aea2783..6f4f4c2 100755
--- a/scripts/check
+++ b/scripts/check
@@ -3,4 +3,4 @@
 set -e
 
 mypy .
-flake8 PFERD
+flake8 PFERD --max-line-length 110

From 2cf0e060ed126537dd993896b6aa793e2a6b9e80 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 19 Oct 2025 15:19:43 +0200
Subject: [PATCH 213/224] Reformat and switch to ruff

---
 PFERD/auth/__init__.py                  |  28 +-
 PFERD/auth/keyring.py                   |   1 -
 PFERD/cli/command_ilias_web.py          |   9 +-
 PFERD/cli/command_kit_ilias_web.py      |   4 +-
 PFERD/cli/command_kit_ipd.py            |  10 +-
 PFERD/cli/command_local.py              |  14 +-
 PFERD/cli/common_ilias_args.py          |  26 +-
 PFERD/cli/parser.py                     | 103 +--
 PFERD/config.py                         |   8 +-
 PFERD/crawl/__init__.py                 |  27 +-
 PFERD/crawl/crawler.py                  |  49 +-
 PFERD/crawl/http_crawler.py             |  44 +-
 PFERD/crawl/ilias/__init__.py           |   8 +-
 PFERD/crawl/ilias/file_templates.py     |  14 +-
 PFERD/crawl/ilias/ilias_html_cleaner.py |   2 +-
 PFERD/crawl/ilias/ilias_web_crawler.py  | 136 ++--
 PFERD/crawl/ilias/kit_ilias_html.py     | 370 +++++-----
 PFERD/crawl/ilias/shibboleth_login.py   |  10 +-
 PFERD/crawl/kit_ipd_crawler.py          |  15 +-
 PFERD/crawl/local_crawler.py            |  37 +-
 PFERD/deduplicator.py                   |  25 +-
 PFERD/limiter.py                        |   7 +-
 PFERD/logging.py                        |  47 +-
 PFERD/output_dir.py                     | 107 +--
 PFERD/pferd.py                          |   8 +-
 PFERD/transformer.py                    |  36 +-
 PFERD/utils.py                          |   8 +-
 pyproject.toml                          |  31 +-
 scripts/check                           |   2 +-
 scripts/format                          |   3 +-
 uv.lock                                 | 905 ++++++++++++++++++++++++
 31 files changed, 1507 insertions(+), 587 deletions(-)
 create mode 100644 uv.lock

diff --git a/PFERD/auth/__init__.py b/PFERD/auth/__init__.py
index aa3ba8e..80d4586 100644
--- a/PFERD/auth/__init__.py
+++ b/PFERD/auth/__init__.py
@@ -9,21 +9,19 @@ from .pass_ import PassAuthenticator, PassAuthSection
 from .simple import SimpleAuthenticator, SimpleAuthSection
 from .tfa import TfaAuthenticator
 
-AuthConstructor = Callable[[
-    str,                # Name (without the "auth:" prefix)
-    SectionProxy,       # Authenticator's section of global config
-    Config,             # Global config
-], Authenticator]
+AuthConstructor = Callable[
+    [
+        str,  # Name (without the "auth:" prefix)
+        SectionProxy,  # Authenticator's section of global config
+        Config,  # Global config
+    ],
+    Authenticator,
+]
 
 AUTHENTICATORS: Dict[str, AuthConstructor] = {
-    "credential-file": lambda n, s, c:
-        CredentialFileAuthenticator(n, CredentialFileAuthSection(s), c),
-    "keyring": lambda n, s, c:
-        KeyringAuthenticator(n, KeyringAuthSection(s)),
-    "pass": lambda n, s, c:
-        PassAuthenticator(n, PassAuthSection(s)),
-    "simple": lambda n, s, c:
-        SimpleAuthenticator(n, SimpleAuthSection(s)),
-    "tfa": lambda n, s, c:
-        TfaAuthenticator(n),
+    "credential-file": lambda n, s, c: CredentialFileAuthenticator(n, CredentialFileAuthSection(s), c),
+    "keyring": lambda n, s, c: KeyringAuthenticator(n, KeyringAuthSection(s)),
+    "pass": lambda n, s, c: PassAuthenticator(n, PassAuthSection(s)),
+    "simple": lambda n, s, c: SimpleAuthenticator(n, SimpleAuthSection(s)),
+    "tfa": lambda n, s, c: TfaAuthenticator(n),
 }
diff --git a/PFERD/auth/keyring.py b/PFERD/auth/keyring.py
index 02a9269..7ff2673 100644
--- a/PFERD/auth/keyring.py
+++ b/PFERD/auth/keyring.py
@@ -17,7 +17,6 @@ class KeyringAuthSection(AuthSection):
 
 
 class KeyringAuthenticator(Authenticator):
-
     def __init__(self, name: str, section: KeyringAuthSection) -> None:
         super().__init__(name)
 
diff --git a/PFERD/cli/command_ilias_web.py b/PFERD/cli/command_ilias_web.py
index 5efec20..b68e48f 100644
--- a/PFERD/cli/command_ilias_web.py
+++ b/PFERD/cli/command_ilias_web.py
@@ -21,23 +21,20 @@ GROUP.add_argument(
     "--base-url",
     type=str,
     metavar="BASE_URL",
-    help="The base url of the ilias instance"
+    help="The base url of the ilias instance",
 )
 
 GROUP.add_argument(
     "--client-id",
     type=str,
     metavar="CLIENT_ID",
-    help="The client id of the ilias instance"
+    help="The client id of the ilias instance",
 )
 
 configure_common_group_args(GROUP)
 
 
-def load(
-        args: argparse.Namespace,
-        parser: configparser.ConfigParser,
-) -> None:
+def load(args: argparse.Namespace, parser: configparser.ConfigParser) -> None:
     log.explain(f"Creating config for command '{COMMAND_NAME}'")
 
     parser["crawl:ilias"] = {}
diff --git a/PFERD/cli/command_kit_ilias_web.py b/PFERD/cli/command_kit_ilias_web.py
index 10797c2..b3b45c5 100644
--- a/PFERD/cli/command_kit_ilias_web.py
+++ b/PFERD/cli/command_kit_ilias_web.py
@@ -21,8 +21,8 @@ configure_common_group_args(GROUP)
 
 
 def load(
-        args: argparse.Namespace,
-        parser: configparser.ConfigParser,
+    args: argparse.Namespace,
+    parser: configparser.ConfigParser,
 ) -> None:
     log.explain(f"Creating config for command '{COMMAND_NAME}'")
 
diff --git a/PFERD/cli/command_kit_ipd.py b/PFERD/cli/command_kit_ipd.py
index b53e67e..589d9a3 100644
--- a/PFERD/cli/command_kit_ipd.py
+++ b/PFERD/cli/command_kit_ipd.py
@@ -18,25 +18,25 @@ GROUP.add_argument(
     "--link-regex",
     type=str,
     metavar="REGEX",
-    help="href-matching regex to identify downloadable files"
+    help="href-matching regex to identify downloadable files",
 )
 GROUP.add_argument(
     "target",
     type=str,
     metavar="TARGET",
-    help="url to crawl"
+    help="url to crawl",
 )
 GROUP.add_argument(
     "output",
     type=Path,
     metavar="OUTPUT",
-    help="output directory"
+    help="output directory",
 )
 
 
 def load(
-        args: argparse.Namespace,
-        parser: configparser.ConfigParser,
+    args: argparse.Namespace,
+    parser: configparser.ConfigParser,
 ) -> None:
     log.explain("Creating config for command 'kit-ipd'")
 
diff --git a/PFERD/cli/command_local.py b/PFERD/cli/command_local.py
index 309c42f..6016afa 100644
--- a/PFERD/cli/command_local.py
+++ b/PFERD/cli/command_local.py
@@ -18,37 +18,37 @@ GROUP.add_argument(
     "target",
     type=Path,
     metavar="TARGET",
-    help="directory to crawl"
+    help="directory to crawl",
 )
 GROUP.add_argument(
     "output",
     type=Path,
     metavar="OUTPUT",
-    help="output directory"
+    help="output directory",
 )
 GROUP.add_argument(
     "--crawl-delay",
     type=float,
     metavar="SECONDS",
-    help="artificial delay to simulate for crawl requests"
+    help="artificial delay to simulate for crawl requests",
 )
 GROUP.add_argument(
     "--download-delay",
     type=float,
     metavar="SECONDS",
-    help="artificial delay to simulate for download requests"
+    help="artificial delay to simulate for download requests",
 )
 GROUP.add_argument(
     "--download-speed",
     type=int,
     metavar="BYTES_PER_SECOND",
-    help="download speed to simulate"
+    help="download speed to simulate",
 )
 
 
 def load(
-        args: argparse.Namespace,
-        parser: configparser.ConfigParser,
+    args: argparse.Namespace,
+    parser: configparser.ConfigParser,
 ) -> None:
     log.explain("Creating config for command 'local'")
 
diff --git a/PFERD/cli/common_ilias_args.py b/PFERD/cli/common_ilias_args.py
index bbbbee5..edad6da 100644
--- a/PFERD/cli/common_ilias_args.py
+++ b/PFERD/cli/common_ilias_args.py
@@ -12,58 +12,60 @@ def configure_common_group_args(group: argparse._ArgumentGroup) -> None:
         "target",
         type=str,
         metavar="TARGET",
-        help="course id, 'desktop', or ILIAS URL to crawl"
+        help="course id, 'desktop', or ILIAS URL to crawl",
     )
     group.add_argument(
         "output",
         type=Path,
         metavar="OUTPUT",
-        help="output directory"
+        help="output directory",
     )
     group.add_argument(
-        "--username", "-u",
+        "--username",
+        "-u",
         type=str,
         metavar="USERNAME",
-        help="user name for authentication"
+        help="user name for authentication",
     )
     group.add_argument(
         "--keyring",
         action=BooleanOptionalAction,
-        help="use the system keyring to store and retrieve passwords"
+        help="use the system keyring to store and retrieve passwords",
     )
     group.add_argument(
         "--credential-file",
         type=Path,
         metavar="PATH",
-        help="read username and password from a credential file"
+        help="read username and password from a credential file",
     )
     group.add_argument(
         "--links",
         type=show_value_error(Links.from_string),
         metavar="OPTION",
-        help="how to represent external links"
+        help="how to represent external links",
     )
     group.add_argument(
         "--link-redirect-delay",
         type=int,
         metavar="SECONDS",
-        help="time before 'fancy' links redirect to to their target (-1 to disable)"
+        help="time before 'fancy' links redirect to to their target (-1 to disable)",
     )
     group.add_argument(
         "--videos",
         action=BooleanOptionalAction,
-        help="crawl and download videos"
+        help="crawl and download videos",
     )
     group.add_argument(
         "--forums",
         action=BooleanOptionalAction,
-        help="crawl and download forum posts"
+        help="crawl and download forum posts",
     )
     group.add_argument(
-        "--http-timeout", "-t",
+        "--http-timeout",
+        "-t",
         type=float,
         metavar="SECONDS",
-        help="timeout for all HTTP requests"
+        help="timeout for all HTTP requests",
     )
 
 
diff --git a/PFERD/cli/parser.py b/PFERD/cli/parser.py
index be483fd..12bfeac 100644
--- a/PFERD/cli/parser.py
+++ b/PFERD/cli/parser.py
@@ -15,15 +15,15 @@ class ParserLoadError(Exception):
 # TODO Replace with argparse version when updating to 3.9?
 class BooleanOptionalAction(argparse.Action):
     def __init__(
-            self,
-            option_strings: List[str],
-            dest: Any,
-            default: Any = None,
-            type: Any = None,
-            choices: Any = None,
-            required: Any = False,
-            help: Any = None,
-            metavar: Any = None,
+        self,
+        option_strings: List[str],
+        dest: Any,
+        default: Any = None,
+        type: Any = None,
+        choices: Any = None,
+        required: Any = False,
+        help: Any = None,
+        metavar: Any = None,
     ):
         if len(option_strings) != 1:
             raise ValueError("There must be exactly one option string")
@@ -48,11 +48,11 @@ class BooleanOptionalAction(argparse.Action):
         )
 
     def __call__(
-            self,
-            parser: argparse.ArgumentParser,
-            namespace: argparse.Namespace,
-            values: Union[str, Sequence[Any], None],
-            option_string: Optional[str] = None,
+        self,
+        parser: argparse.ArgumentParser,
+        namespace: argparse.Namespace,
+        values: Union[str, Sequence[Any], None],
+        option_string: Optional[str] = None,
     ) -> None:
         if option_string and option_string in self.option_strings:
             value = not option_string.startswith("--no-")
@@ -67,11 +67,13 @@ def show_value_error(inner: Callable[[str], Any]) -> Callable[[str], Any]:
     Some validation functions (like the from_string in our enums) raise a ValueError.
     Argparse only pretty-prints ArgumentTypeErrors though, so we need to wrap our ValueErrors.
     """
+
     def wrapper(input: str) -> Any:
         try:
             return inner(input)
         except ValueError as e:
             raise ArgumentTypeError(e)
+
     return wrapper
 
 
@@ -81,52 +83,57 @@ CRAWLER_PARSER_GROUP = CRAWLER_PARSER.add_argument_group(
     description="arguments common to all crawlers",
 )
 CRAWLER_PARSER_GROUP.add_argument(
-    "--redownload", "-r",
+    "--redownload",
+    "-r",
     type=show_value_error(Redownload.from_string),
     metavar="OPTION",
-    help="when to download a file that's already present locally"
+    help="when to download a file that's already present locally",
 )
 CRAWLER_PARSER_GROUP.add_argument(
     "--on-conflict",
     type=show_value_error(OnConflict.from_string),
     metavar="OPTION",
-    help="what to do when local and remote files or directories differ"
+    help="what to do when local and remote files or directories differ",
 )
 CRAWLER_PARSER_GROUP.add_argument(
-    "--transform", "-T",
+    "--transform",
+    "-T",
     action="append",
     type=str,
     metavar="RULE",
-    help="add a single transformation rule. Can be specified multiple times"
+    help="add a single transformation rule. Can be specified multiple times",
 )
 CRAWLER_PARSER_GROUP.add_argument(
-    "--tasks", "-n",
+    "--tasks",
+    "-n",
     type=int,
     metavar="N",
-    help="maximum number of concurrent tasks (crawling, downloading)"
+    help="maximum number of concurrent tasks (crawling, downloading)",
 )
 CRAWLER_PARSER_GROUP.add_argument(
-    "--downloads", "-N",
+    "--downloads",
+    "-N",
     type=int,
     metavar="N",
-    help="maximum number of tasks that may download data at the same time"
+    help="maximum number of tasks that may download data at the same time",
 )
 CRAWLER_PARSER_GROUP.add_argument(
-    "--task-delay", "-d",
+    "--task-delay",
+    "-d",
     type=float,
     metavar="SECONDS",
-    help="time the crawler should wait between subsequent tasks"
+    help="time the crawler should wait between subsequent tasks",
 )
 CRAWLER_PARSER_GROUP.add_argument(
     "--windows-paths",
     action=BooleanOptionalAction,
-    help="whether to repair invalid paths on windows"
+    help="whether to repair invalid paths on windows",
 )
 
 
 def load_crawler(
-        args: argparse.Namespace,
-        section: configparser.SectionProxy,
+    args: argparse.Namespace,
+    section: configparser.SectionProxy,
 ) -> None:
     if args.redownload is not None:
         section["redownload"] = args.redownload.value
@@ -152,79 +159,79 @@ PARSER.add_argument(
     version=f"{NAME} {VERSION} (https://github.com/Garmelon/PFERD)",
 )
 PARSER.add_argument(
-    "--config", "-c",
+    "--config",
+    "-c",
     type=Path,
     metavar="PATH",
-    help="custom config file"
+    help="custom config file",
 )
 PARSER.add_argument(
     "--dump-config",
     action="store_true",
-    help="dump current configuration to the default config path and exit"
+    help="dump current configuration to the default config path and exit",
 )
 PARSER.add_argument(
     "--dump-config-to",
     metavar="PATH",
-    help="dump current configuration to a file and exit."
-    " Use '-' as path to print to stdout instead"
+    help="dump current configuration to a file and exit. Use '-' as path to print to stdout instead",
 )
 PARSER.add_argument(
     "--debug-transforms",
     action="store_true",
-    help="apply transform rules to files of previous run"
+    help="apply transform rules to files of previous run",
 )
 PARSER.add_argument(
-    "--crawler", "-C",
+    "--crawler",
+    "-C",
     action="append",
     type=str,
     metavar="NAME",
-    help="only execute a single crawler."
-    " Can be specified multiple times to execute multiple crawlers"
+    help="only execute a single crawler. Can be specified multiple times to execute multiple crawlers",
 )
 PARSER.add_argument(
-    "--skip", "-S",
+    "--skip",
+    "-S",
     action="append",
     type=str,
     metavar="NAME",
-    help="don't execute this particular crawler."
-    " Can be specified multiple times to skip multiple crawlers"
+    help="don't execute this particular crawler. Can be specified multiple times to skip multiple crawlers",
 )
 PARSER.add_argument(
     "--working-dir",
     type=Path,
     metavar="PATH",
-    help="custom working directory"
+    help="custom working directory",
 )
 PARSER.add_argument(
     "--explain",
     action=BooleanOptionalAction,
-    help="log and explain in detail what PFERD is doing"
+    help="log and explain in detail what PFERD is doing",
 )
 PARSER.add_argument(
     "--status",
     action=BooleanOptionalAction,
-    help="print status updates while PFERD is crawling"
+    help="print status updates while PFERD is crawling",
 )
 PARSER.add_argument(
     "--report",
     action=BooleanOptionalAction,
-    help="print a report of all local changes before exiting"
+    help="print a report of all local changes before exiting",
 )
 PARSER.add_argument(
     "--share-cookies",
     action=BooleanOptionalAction,
-    help="whether crawlers should share cookies where applicable"
+    help="whether crawlers should share cookies where applicable",
 )
 PARSER.add_argument(
     "--show-not-deleted",
     action=BooleanOptionalAction,
-    help="print messages in status and report when PFERD did not delete a local only file"
+    help="print messages in status and report when PFERD did not delete a local only file",
 )
 
 
 def load_default_section(
-        args: argparse.Namespace,
-        parser: configparser.ConfigParser,
+    args: argparse.Namespace,
+    parser: configparser.ConfigParser,
 ) -> None:
     section = parser[parser.default_section]
 
diff --git a/PFERD/config.py b/PFERD/config.py
index b2cff4e..1a0f017 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -53,10 +53,10 @@ class Section:
         raise ConfigOptionError(self.s.name, key, desc)
 
     def invalid_value(
-            self,
-            key: str,
-            value: Any,
-            reason: Optional[str],
+        self,
+        key: str,
+        value: Any,
+        reason: Optional[str],
     ) -> NoReturn:
         if reason is None:
             self.error(key, f"Invalid value {value!r}")
diff --git a/PFERD/crawl/__init__.py b/PFERD/crawl/__init__.py
index 9a0e080..04a5e3f 100644
--- a/PFERD/crawl/__init__.py
+++ b/PFERD/crawl/__init__.py
@@ -8,20 +8,19 @@ from .ilias import IliasWebCrawler, IliasWebCrawlerSection, KitIliasWebCrawler,
 from .kit_ipd_crawler import KitIpdCrawler, KitIpdCrawlerSection
 from .local_crawler import LocalCrawler, LocalCrawlerSection
 
-CrawlerConstructor = Callable[[
-    str,                       # Name (without the "crawl:" prefix)
-    SectionProxy,              # Crawler's section of global config
-    Config,                    # Global config
-    Dict[str, Authenticator],  # Loaded authenticators by name
-], Crawler]
+CrawlerConstructor = Callable[
+    [
+        str,  # Name (without the "crawl:" prefix)
+        SectionProxy,  # Crawler's section of global config
+        Config,  # Global config
+        Dict[str, Authenticator],  # Loaded authenticators by name
+    ],
+    Crawler,
+]
 
 CRAWLERS: Dict[str, CrawlerConstructor] = {
-    "local": lambda n, s, c, a:
-        LocalCrawler(n, LocalCrawlerSection(s), c),
-    "ilias-web": lambda n, s, c, a:
-        IliasWebCrawler(n, IliasWebCrawlerSection(s), c, a),
-    "kit-ilias-web": lambda n, s, c, a:
-        KitIliasWebCrawler(n, KitIliasWebCrawlerSection(s), c, a),
-    "kit-ipd": lambda n, s, c, a:
-        KitIpdCrawler(n, KitIpdCrawlerSection(s), c),
+    "local": lambda n, s, c, a: LocalCrawler(n, LocalCrawlerSection(s), c),
+    "ilias-web": lambda n, s, c, a: IliasWebCrawler(n, IliasWebCrawlerSection(s), c, a),
+    "kit-ilias-web": lambda n, s, c, a: KitIliasWebCrawler(n, KitIliasWebCrawlerSection(s), c, a),
+    "kit-ipd": lambda n, s, c, a: KitIpdCrawler(n, KitIpdCrawlerSection(s), c),
 }
diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index 7ef5fe4..f1aec5a 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -132,8 +132,9 @@ class DownloadToken(ReusableAsyncContextManager[Tuple[ProgressBar, FileSink]]):
         await self._stack.enter_async_context(self._limiter.limit_download())
         sink = await self._stack.enter_async_context(self._fs_token)
         # The "Downloaded ..." message is printed in the output dir, not here
-        bar = self._stack.enter_context(log.download_bar("[bold bright_cyan]", "Downloading",
-                                                         fmt_path(self._path)))
+        bar = self._stack.enter_context(
+            log.download_bar("[bold bright_cyan]", "Downloading", fmt_path(self._path))
+        )
 
         return bar, sink
 
@@ -216,10 +217,10 @@ class CrawlerSection(Section):
 
 class Crawler(ABC):
     def __init__(
-            self,
-            name: str,
-            section: CrawlerSection,
-            config: Config,
+        self,
+        name: str,
+        section: CrawlerSection,
+        config: Config,
     ) -> None:
         """
         Initialize a crawler from its name and its section in the config file.
@@ -293,13 +294,13 @@ class Crawler(ABC):
         return CrawlToken(self._limiter, path)
 
     def should_try_download(
-            self,
-            path: PurePath,
-            *,
-            etag_differs: Optional[bool] = None,
-            mtime: Optional[datetime] = None,
-            redownload: Optional[Redownload] = None,
-            on_conflict: Optional[OnConflict] = None,
+        self,
+        path: PurePath,
+        *,
+        etag_differs: Optional[bool] = None,
+        mtime: Optional[datetime] = None,
+        redownload: Optional[Redownload] = None,
+        on_conflict: Optional[OnConflict] = None,
     ) -> bool:
         log.explain_topic(f"Decision: Should Download {fmt_path(path)}")
 
@@ -308,11 +309,7 @@ class Crawler(ABC):
             return False
 
         should_download = self._output_dir.should_try_download(
-            path,
-            etag_differs=etag_differs,
-            mtime=mtime,
-            redownload=redownload,
-            on_conflict=on_conflict
+            path, etag_differs=etag_differs, mtime=mtime, redownload=redownload, on_conflict=on_conflict
         )
         if should_download:
             log.explain("Answer: Yes")
@@ -322,13 +319,13 @@ class Crawler(ABC):
             return False
 
     async def download(
-            self,
-            path: PurePath,
-            *,
-            etag_differs: Optional[bool] = None,
-            mtime: Optional[datetime] = None,
-            redownload: Optional[Redownload] = None,
-            on_conflict: Optional[OnConflict] = None,
+        self,
+        path: PurePath,
+        *,
+        etag_differs: Optional[bool] = None,
+        mtime: Optional[datetime] = None,
+        redownload: Optional[Redownload] = None,
+        on_conflict: Optional[OnConflict] = None,
     ) -> Optional[DownloadToken]:
         log.explain_topic(f"Decision: Download {fmt_path(path)}")
         path = self._deduplicator.mark(path)
@@ -346,7 +343,7 @@ class Crawler(ABC):
             etag_differs=etag_differs,
             mtime=mtime,
             redownload=redownload,
-            on_conflict=on_conflict
+            on_conflict=on_conflict,
         )
         if fs_token is None:
             log.explain("Answer: No")
diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index 471bf1e..572b39d 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -29,11 +29,11 @@ class HttpCrawler(Crawler):
     COOKIE_FILE = PurePath(".cookies")
 
     def __init__(
-            self,
-            name: str,
-            section: HttpCrawlerSection,
-            config: Config,
-            shared_auth: Optional[Authenticator] = None,
+        self,
+        name: str,
+        section: HttpCrawlerSection,
+        config: Config,
+        shared_auth: Optional[Authenticator] = None,
     ) -> None:
         super().__init__(name, section, config)
 
@@ -252,23 +252,23 @@ class HttpCrawler(Crawler):
         self._load_cookies()
 
         async with aiohttp.ClientSession(
-                headers={"User-Agent": f"{NAME}/{VERSION}"},
-                cookie_jar=self._cookie_jar,
-                connector=aiohttp.TCPConnector(ssl=ssl.create_default_context(cafile=certifi.where())),
-                timeout=ClientTimeout(
-                    # 30 minutes. No download in the history of downloads was longer than 30 minutes.
-                    # This is enough to transfer a 600 MB file over a 3 Mib/s connection.
-                    # Allowing an arbitrary value could be annoying for overnight batch jobs
-                    total=15 * 60,
-                    connect=self._http_timeout,
-                    sock_connect=self._http_timeout,
-                    sock_read=self._http_timeout,
-                ),
-                # See https://github.com/aio-libs/aiohttp/issues/6626
-                # Without this aiohttp will mangle the redirect header from Shibboleth, invalidating the
-                # passed signature. Shibboleth will not accept the broken signature and authentication will
-                # fail.
-                requote_redirect_url=False
+            headers={"User-Agent": f"{NAME}/{VERSION}"},
+            cookie_jar=self._cookie_jar,
+            connector=aiohttp.TCPConnector(ssl=ssl.create_default_context(cafile=certifi.where())),
+            timeout=ClientTimeout(
+                # 30 minutes. No download in the history of downloads was longer than 30 minutes.
+                # This is enough to transfer a 600 MB file over a 3 Mib/s connection.
+                # Allowing an arbitrary value could be annoying for overnight batch jobs
+                total=15 * 60,
+                connect=self._http_timeout,
+                sock_connect=self._http_timeout,
+                sock_read=self._http_timeout,
+            ),
+            # See https://github.com/aio-libs/aiohttp/issues/6626
+            # Without this aiohttp will mangle the redirect header from Shibboleth, invalidating the
+            # passed signature. Shibboleth will not accept the broken signature and authentication will
+            # fail.
+            requote_redirect_url=False,
         ) as session:
             self.session = session
             try:
diff --git a/PFERD/crawl/ilias/__init__.py b/PFERD/crawl/ilias/__init__.py
index 287bd3d..fa1aaed 100644
--- a/PFERD/crawl/ilias/__init__.py
+++ b/PFERD/crawl/ilias/__init__.py
@@ -1,5 +1,9 @@
-from .kit_ilias_web_crawler import (IliasWebCrawler, IliasWebCrawlerSection, KitIliasWebCrawler,
-                                    KitIliasWebCrawlerSection)
+from .kit_ilias_web_crawler import (
+    IliasWebCrawler,
+    IliasWebCrawlerSection,
+    KitIliasWebCrawler,
+    KitIliasWebCrawlerSection,
+)
 
 __all__ = [
     "IliasWebCrawler",
diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index f256dd8..814bb7b 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -254,8 +254,8 @@ def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next
         )
 
     if bot_nav := body.select_one(".ilc_page_bnav_BottomNavigation"):
-        bot_nav.replace_with(soupify(nav_template.replace(
-            "{{left}}", left).replace("{{right}}", right).encode())
+        bot_nav.replace_with(
+            soupify(nav_template.replace("{{left}}", left).replace("{{right}}", right).encode())
         )
 
     body_str = cast(str, body.prettify())
@@ -265,10 +265,11 @@ def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next
 def forum_thread_template(name: str, url: str, heading: bs4.Tag, content: bs4.Tag) -> str:
     if title := cast(Optional[bs4.Tag], heading.find(name="b")):
         title.wrap(bs4.Tag(name="a", attrs={"href": url}))
-    return _forum_thread_template \
-        .replace("{{name}}", name) \
-        .replace("{{heading}}", cast(str, heading.prettify())) \
+    return (
+        _forum_thread_template.replace("{{name}}", name)
+        .replace("{{heading}}", cast(str, heading.prettify()))
         .replace("{{content}}", cast(str, content.prettify()))
+    )
 
 
 @dataclasses.dataclass
@@ -330,8 +331,7 @@ class Links(Enum):
         # All others get coerced to fancy
         content = cast(str, Links.FANCY.template())
         repeated_content = cast(
-            re.Match[str],
-            re.search(r"<!-- REPEAT START -->([\s\S]+)<!-- REPEAT END -->", content)
+            re.Match[str], re.search(r"<!-- REPEAT START -->([\s\S]+)<!-- REPEAT END -->", content)
         ).group(1)
 
         parts = []
diff --git a/PFERD/crawl/ilias/ilias_html_cleaner.py b/PFERD/crawl/ilias/ilias_html_cleaner.py
index 35a7ea0..958860a 100644
--- a/PFERD/crawl/ilias/ilias_html_cleaner.py
+++ b/PFERD/crawl/ilias/ilias_html_cleaner.py
@@ -86,7 +86,7 @@ def clean(soup: BeautifulSoup) -> BeautifulSoup:
 
     for block in cast(list[Tag], soup.find_all(class_="ilc_va_ihcap_VAccordIHeadCap")):
         block.name = "h3"
-        block["class"] += ["accordion-head"]  # type: ignore
+        block["class"] += ["accordion-head"]
 
     for dummy in soup.select(".ilc_text_block_Standard.ilc_Paragraph"):
         children = list(dummy.children)
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index ee1de9c..e6929b5 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -21,8 +21,16 @@ from ..http_crawler import HttpCrawler, HttpCrawlerSection
 from .async_helper import _iorepeat
 from .file_templates import LinkData, Links, forum_thread_template, learning_module_template
 from .ilias_html_cleaner import clean, insert_base_markup
-from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
-                             IliasPageElement, IliasSoup, _sanitize_path_name, parse_ilias_forum_export)
+from .kit_ilias_html import (
+    IliasElementType,
+    IliasForumThread,
+    IliasLearningModulePage,
+    IliasPage,
+    IliasPageElement,
+    IliasSoup,
+    _sanitize_path_name,
+    parse_ilias_forum_export,
+)
 from .shibboleth_login import ShibbolethLogin
 
 TargetType = Union[str, int]
@@ -55,9 +63,7 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
 
         self.invalid_value("login_type", login_type, "Should be <shibboleth | local>")
 
-    def tfa_auth(
-        self, authenticators: Dict[str, Authenticator]
-    ) -> Optional[Authenticator]:
+    def tfa_auth(self, authenticators: Dict[str, Authenticator]) -> Optional[Authenticator]:
         value: Optional[str] = self.s.get("tfa_auth")
         if value is None:
             return None
@@ -166,17 +172,19 @@ class IliasWebCrawler(HttpCrawler):
         name: str,
         section: IliasWebCrawlerSection,
         config: Config,
-        authenticators: Dict[str, Authenticator]
+        authenticators: Dict[str, Authenticator],
     ):
         # Setting a main authenticator for cookie sharing
         auth = section.auth(authenticators)
         super().__init__(name, section, config, shared_auth=auth)
 
         if section.tasks() > 1:
-            log.warn("""
+            log.warn(
+                """
 Please avoid using too many parallel requests as these are the KIT ILIAS
 instance's greatest bottleneck.
-            """.strip())
+            """.strip()
+            )
 
         self._auth = auth
         self._base_url = section.base_url()
@@ -210,22 +218,19 @@ instance's greatest bottleneck.
         # Start crawling at the given course
         root_url = url_set_query_param(
             urljoin(self._base_url + "/", "goto.php"),
-            "target", f"crs_{course_id}",
+            "target",
+            f"crs_{course_id}",
         )
 
         await self._crawl_url(root_url, expected_id=course_id)
 
     async def _crawl_desktop(self) -> None:
         await self._crawl_url(
-            urljoin(self._base_url, "/ilias.php?baseClass=ilDashboardGUI&cmd=show"),
-            crawl_nested_courses=True
+            urljoin(self._base_url, "/ilias.php?baseClass=ilDashboardGUI&cmd=show"), crawl_nested_courses=True
         )
 
     async def _crawl_url(
-        self,
-        url: str,
-        expected_id: Optional[int] = None,
-        crawl_nested_courses: bool = False
+        self, url: str, expected_id: Optional[int] = None, crawl_nested_courses: bool = False
     ) -> None:
         if awaitable := await self._handle_ilias_page(
             url, None, PurePath("."), expected_id, crawl_nested_courses
@@ -238,7 +243,7 @@ instance's greatest bottleneck.
         current_element: Optional[IliasPageElement],
         path: PurePath,
         expected_course_id: Optional[int] = None,
-        crawl_nested_courses: bool = False
+        crawl_nested_courses: bool = False,
     ) -> Optional[Coroutine[Any, Any, None]]:
         maybe_cl = await self.crawl(path)
         if not maybe_cl:
@@ -319,10 +324,7 @@ instance's greatest bottleneck.
     # works correctly.
     @anoncritical
     async def _handle_ilias_element(
-        self,
-        parent_path: PurePath,
-        element: IliasPageElement,
-        crawl_nested_courses: bool = False
+        self, parent_path: PurePath, element: IliasPageElement, crawl_nested_courses: bool = False
     ) -> Optional[Coroutine[Any, Any, None]]:
         # element.name might contain `/` if the crawler created nested elements,
         # so we can not sanitize it here. We trust in the output dir to thwart worst-case
@@ -344,7 +346,7 @@ instance's greatest bottleneck.
                     "[bold bright_black]",
                     "Ignored",
                     fmt_path(element_path),
-                    "[bright_black](enable with option 'videos')"
+                    "[bright_black](enable with option 'videos')",
                 )
                 return None
 
@@ -356,7 +358,7 @@ instance's greatest bottleneck.
                     "[bold bright_black]",
                     "Ignored",
                     fmt_path(element_path),
-                    "[bright_black](enable with option 'forums')"
+                    "[bright_black](enable with option 'forums')",
                 )
                 return None
             return await self._handle_forum(element, element_path)
@@ -365,7 +367,7 @@ instance's greatest bottleneck.
                 "[bold bright_black]",
                 "Ignored",
                 fmt_path(element_path),
-                "[bright_black](tests contain no relevant data)"
+                "[bright_black](tests contain no relevant data)",
             )
             return None
         elif element.type == IliasElementType.SURVEY:
@@ -373,7 +375,7 @@ instance's greatest bottleneck.
                 "[bold bright_black]",
                 "Ignored",
                 fmt_path(element_path),
-                "[bright_black](surveys contain no relevant data)"
+                "[bright_black](surveys contain no relevant data)",
             )
             return None
         elif element.type == IliasElementType.SCORM_LEARNING_MODULE:
@@ -381,7 +383,7 @@ instance's greatest bottleneck.
                 "[bold bright_black]",
                 "Ignored",
                 fmt_path(element_path),
-                "[bright_black](scorm learning modules are not supported)"
+                "[bright_black](scorm learning modules are not supported)",
             )
             return None
         elif element.type == IliasElementType.LITERATURE_LIST:
@@ -389,7 +391,7 @@ instance's greatest bottleneck.
                 "[bold bright_black]",
                 "Ignored",
                 fmt_path(element_path),
-                "[bright_black](literature lists are not currently supported)"
+                "[bright_black](literature lists are not currently supported)",
             )
             return None
         elif element.type == IliasElementType.LEARNING_MODULE_HTML:
@@ -397,7 +399,7 @@ instance's greatest bottleneck.
                 "[bold bright_black]",
                 "Ignored",
                 fmt_path(element_path),
-                "[bright_black](HTML learning modules are not supported)"
+                "[bright_black](HTML learning modules are not supported)",
             )
             return None
         elif element.type == IliasElementType.BLOG:
@@ -405,7 +407,7 @@ instance's greatest bottleneck.
                 "[bold bright_black]",
                 "Ignored",
                 fmt_path(element_path),
-                "[bright_black](blogs are not currently supported)"
+                "[bright_black](blogs are not currently supported)",
             )
             return None
         elif element.type == IliasElementType.DCL_RECORD_LIST:
@@ -413,7 +415,7 @@ instance's greatest bottleneck.
                 "[bold bright_black]",
                 "Ignored",
                 fmt_path(element_path),
-                "[bright_black](dcl record lists are not currently supported)"
+                "[bright_black](dcl record lists are not currently supported)",
             )
             return None
         elif element.type == IliasElementType.MEDIA_POOL:
@@ -421,7 +423,7 @@ instance's greatest bottleneck.
                 "[bold bright_black]",
                 "Ignored",
                 fmt_path(element_path),
-                "[bright_black](media pools are not currently supported)"
+                "[bright_black](media pools are not currently supported)",
             )
             return None
         elif element.type == IliasElementType.COURSE:
@@ -431,7 +433,7 @@ instance's greatest bottleneck.
                 "[bold bright_black]",
                 "Ignored",
                 fmt_path(element_path),
-                "[bright_black](not descending into linked course)"
+                "[bright_black](not descending into linked course)",
             )
             return None
         elif element.type == IliasElementType.WIKI:
@@ -439,7 +441,7 @@ instance's greatest bottleneck.
                 "[bold bright_black]",
                 "Ignored",
                 fmt_path(element_path),
-                "[bright_black](wikis are not currently supported)"
+                "[bright_black](wikis are not currently supported)",
             )
             return None
         elif element.type == IliasElementType.LEARNING_MODULE:
@@ -513,19 +515,15 @@ instance's greatest bottleneck.
     @anoncritical
     @_iorepeat(3, "resolving link")
     async def _download_link(
-        self,
-        link_renderer: Links,
-        collection_name: str,
-        links: list[LinkData],
-        dl: DownloadToken
+        self, link_renderer: Links, collection_name: str, links: list[LinkData], dl: DownloadToken
     ) -> None:
         async with dl as (bar, sink):
             rendered = link_renderer.interpolate(self._link_file_redirect_delay, collection_name, links)
             sink.file.write(rendered.encode("utf-8"))
             sink.done()
 
-    async def _resolve_link_target(self, export_url: str) -> Union[BeautifulSoup, Literal['none']]:
-        async def impl() -> Optional[Union[BeautifulSoup, Literal['none']]]:
+    async def _resolve_link_target(self, export_url: str) -> Union[BeautifulSoup, Literal["none"]]:
+        async def impl() -> Optional[Union[BeautifulSoup, Literal["none"]]]:
             async with self.session.get(export_url, allow_redirects=False) as resp:
                 # No redirect means we were authenticated
                 if hdrs.LOCATION not in resp.headers:
@@ -626,7 +624,7 @@ instance's greatest bottleneck.
         if self.prev_report:
             self.report.add_custom_value(
                 _get_video_cache_key(element),
-                self.prev_report.get_custom_value(_get_video_cache_key(element))
+                self.prev_report.get_custom_value(_get_video_cache_key(element)),
             )
 
         # A video might contain other videos, so let's "crawl" the video first
@@ -698,7 +696,7 @@ instance's greatest bottleneck.
         def add_to_report(paths: list[str]) -> None:
             self.report.add_custom_value(
                 _get_video_cache_key(element),
-                {"known_paths": paths, "own_path": str(self._transformer.transform(dl.path))}
+                {"known_paths": paths, "own_path": str(self._transformer.transform(dl.path))},
             )
 
         async with dl as (bar, sink):
@@ -752,11 +750,7 @@ instance's greatest bottleneck.
             await self._stream_from_url(element, sink, bar, is_video)
 
     async def _stream_from_url(
-        self,
-        element: IliasPageElement,
-        sink: FileSink,
-        bar: ProgressBar,
-        is_video: bool
+        self, element: IliasPageElement, sink: FileSink, bar: ProgressBar, is_video: bool
     ) -> None:
         url = element.url
 
@@ -831,10 +825,10 @@ instance's greatest bottleneck.
                 log.warn("Could not extract forum export url")
                 return
 
-            export = await self._post(export_url, {
-                "format": "html",
-                "cmd[createExportFile]": ""
-            })
+            export = await self._post(
+                export_url,
+                {"format": "html", "cmd[createExportFile]": ""},
+            )
 
             elements = parse_ilias_forum_export(soupify(export))
 
@@ -848,10 +842,7 @@ instance's greatest bottleneck.
     @anoncritical
     @_iorepeat(3, "saving forum thread")
     async def _download_forum_thread(
-        self,
-        parent_path: PurePath,
-        thread: Union[IliasForumThread, IliasPageElement],
-        forum_url: str
+        self, parent_path: PurePath, thread: Union[IliasForumThread, IliasPageElement], forum_url: str
     ) -> None:
         path = parent_path / (_sanitize_path_name(thread.name) + ".html")
         maybe_dl = await self.download(path, mtime=thread.mtime)
@@ -860,10 +851,7 @@ instance's greatest bottleneck.
 
         async with maybe_dl as (bar, sink):
             rendered = forum_thread_template(
-                thread.name,
-                forum_url,
-                thread.name_tag,
-                await self.internalize_images(thread.content_tag)
+                thread.name, forum_url, thread.name_tag, await self.internalize_images(thread.content_tag)
             )
             sink.file.write(rendered.encode("utf-8"))
             sink.done()
@@ -891,13 +879,13 @@ instance's greatest bottleneck.
             soup = await self._get_page(element.url)
             page = IliasPage(soup, element)
             if next := page.get_learning_module_data():
-                elements.extend(await self._crawl_learning_module_direction(
-                    cl.path, next.previous_url, "left", element
-                ))
+                elements.extend(
+                    await self._crawl_learning_module_direction(cl.path, next.previous_url, "left", element)
+                )
                 elements.append(next)
-                elements.extend(await self._crawl_learning_module_direction(
-                    cl.path, next.next_url, "right", element
-                ))
+                elements.extend(
+                    await self._crawl_learning_module_direction(cl.path, next.next_url, "right", element)
+                )
 
         # Reflect their natural ordering in the file names
         for index, lm_element in enumerate(elements):
@@ -907,9 +895,9 @@ instance's greatest bottleneck.
         for index, elem in enumerate(elements):
             prev_url = elements[index - 1].title if index > 0 else None
             next_url = elements[index + 1].title if index < len(elements) - 1 else None
-            tasks.append(asyncio.create_task(
-                self._download_learning_module_page(cl.path, elem, prev_url, next_url)
-            ))
+            tasks.append(
+                asyncio.create_task(self._download_learning_module_page(cl.path, elem, prev_url, next_url))
+            )
 
         # And execute them
         await self.gather(tasks)
@@ -919,7 +907,7 @@ instance's greatest bottleneck.
         path: PurePath,
         start_url: Optional[str],
         dir: Union[Literal["left"], Literal["right"]],
-        parent_element: IliasPageElement
+        parent_element: IliasPageElement,
     ) -> List[IliasLearningModulePage]:
         elements: List[IliasLearningModulePage] = []
 
@@ -950,7 +938,7 @@ instance's greatest bottleneck.
         parent_path: PurePath,
         element: IliasLearningModulePage,
         prev: Optional[str],
-        next: Optional[str]
+        next: Optional[str],
     ) -> None:
         path = parent_path / (_sanitize_path_name(element.title) + ".html")
         maybe_dl = await self.download(path)
@@ -1037,11 +1025,7 @@ instance's greatest bottleneck.
             )
         return soup
 
-    async def _post(
-        self,
-        url: str,
-        data: dict[str, Union[str, List[str]]]
-    ) -> bytes:
+    async def _post(self, url: str, data: dict[str, Union[str, List[str]]]) -> bytes:
         form_data = aiohttp.FormData()
         for key, val in data.items():
             form_data.add_field(key, val)
@@ -1090,8 +1074,8 @@ instance's greatest bottleneck.
             username, password = await self._auth.credentials()
 
             login_form_data = aiohttp.FormData()
-            login_form_data.add_field('login_form/input_3/input_4', username)
-            login_form_data.add_field('login_form/input_3/input_5', password)
+            login_form_data.add_field("login_form/input_3/input_4", username)
+            login_form_data.add_field("login_form/input_3/input_5", password)
 
             # do the actual login
             async with self.session.post(urljoin(self._base_url, login_url), data=login_form_data) as request:
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 5b88e8d..4abb350 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -42,15 +42,15 @@ class TypeMatcher:
             self.alt = alt
 
     class All:
-        matchers: list['IliasElementMatcher']
+        matchers: list["IliasElementMatcher"]
 
-        def __init__(self, matchers: list['IliasElementMatcher']):
+        def __init__(self, matchers: list["IliasElementMatcher"]):
             self.matchers = matchers
 
     class Any:
-        matchers: list['IliasElementMatcher']
+        matchers: list["IliasElementMatcher"]
 
-        def __init__(self, matchers: list['IliasElementMatcher']):
+        def __init__(self, matchers: list["IliasElementMatcher"]):
             self.matchers = matchers
 
     @staticmethod
@@ -70,11 +70,11 @@ class TypeMatcher:
         return TypeMatcher.ImgAlt(alt)
 
     @staticmethod
-    def all(*matchers: 'IliasElementMatcher') -> All:
+    def all(*matchers: "IliasElementMatcher") -> All:
         return TypeMatcher.All(list(matchers))
 
     @staticmethod
-    def any(*matchers: 'IliasElementMatcher') -> Any:
+    def any(*matchers: "IliasElementMatcher") -> Any:
         return TypeMatcher.Any(list(matchers))
 
     @staticmethod
@@ -127,20 +127,14 @@ class IliasElementType(Enum):
     def matcher(self) -> IliasElementMatcher:
         match self:
             case IliasElementType.BLOG:
-                return TypeMatcher.any(
-                    TypeMatcher.img_src("_blog.svg")
-                )
+                return TypeMatcher.any(TypeMatcher.img_src("_blog.svg"))
             case IliasElementType.BOOKING:
-                return TypeMatcher.any(
-                    TypeMatcher.path("/book/"),
-                    TypeMatcher.img_src("_book.svg")
-                )
+                return TypeMatcher.any(TypeMatcher.path("/book/"), TypeMatcher.img_src("_book.svg"))
             case IliasElementType.COURSE:
                 return TypeMatcher.any(TypeMatcher.path("/crs/"), TypeMatcher.img_src("_crsr.svg"))
             case IliasElementType.DCL_RECORD_LIST:
                 return TypeMatcher.any(
-                    TypeMatcher.img_src("_dcl.svg"),
-                    TypeMatcher.query("cmdclass=ildclrecordlistgui")
+                    TypeMatcher.img_src("_dcl.svg"), TypeMatcher.query("cmdclass=ildclrecordlistgui")
                 )
             case IliasElementType.EXERCISE:
                 return TypeMatcher.never()
@@ -162,14 +156,11 @@ class IliasElementType(Enum):
                 return TypeMatcher.any(
                     TypeMatcher.path("/fold/"),
                     TypeMatcher.img_src("_fold.svg"),
-
                     TypeMatcher.path("/grp/"),
                     TypeMatcher.img_src("_grp.svg"),
-
                     TypeMatcher.path("/copa/"),
                     TypeMatcher.path("_copa_"),
                     TypeMatcher.img_src("_copa.svg"),
-
                     # Not supported right now but warn users
                     # TypeMatcher.query("baseclass=ilmediapoolpresentationgui"),
                     # TypeMatcher.img_alt("medienpool"),
@@ -188,14 +179,10 @@ class IliasElementType(Enum):
             case IliasElementType.LITERATURE_LIST:
                 return TypeMatcher.img_src("_bibl.svg")
             case IliasElementType.LEARNING_MODULE:
-                return TypeMatcher.any(
-                    TypeMatcher.path("/lm/"),
-                    TypeMatcher.img_src("_lm.svg")
-                )
+                return TypeMatcher.any(TypeMatcher.path("/lm/"), TypeMatcher.img_src("_lm.svg"))
             case IliasElementType.LEARNING_MODULE_HTML:
                 return TypeMatcher.any(
-                    TypeMatcher.query("baseclass=ilhtlmpresentationgui"),
-                    TypeMatcher.img_src("_htlm.svg")
+                    TypeMatcher.query("baseclass=ilhtlmpresentationgui"), TypeMatcher.img_src("_htlm.svg")
                 )
             case IliasElementType.LINK:
                 return TypeMatcher.any(
@@ -203,17 +190,16 @@ class IliasElementType(Enum):
                         TypeMatcher.query("baseclass=illinkresourcehandlergui"),
                         TypeMatcher.query("calldirectlink"),
                     ),
-                    TypeMatcher.img_src("_webr.svg")  # duplicated :(
+                    TypeMatcher.img_src("_webr.svg"),  # duplicated :(
                 )
             case IliasElementType.LINK_COLLECTION:
                 return TypeMatcher.any(
                     TypeMatcher.query("baseclass=illinkresourcehandlergui"),
-                    TypeMatcher.img_src("_webr.svg")  # duplicated :(
+                    TypeMatcher.img_src("_webr.svg"),  # duplicated :(
                 )
             case IliasElementType.MEDIA_POOL:
                 return TypeMatcher.any(
-                    TypeMatcher.query("baseclass=ilmediapoolpresentationgui"),
-                    TypeMatcher.img_src("_mep.svg")
+                    TypeMatcher.query("baseclass=ilmediapoolpresentationgui"), TypeMatcher.img_src("_mep.svg")
                 )
             case IliasElementType.MEDIACAST_VIDEO:
                 return TypeMatcher.never()
@@ -221,12 +207,10 @@ class IliasElementType(Enum):
                 return TypeMatcher.any(
                     TypeMatcher.path("/mcst/"),
                     TypeMatcher.query("baseclass=ilmediacasthandlergui"),
-                    TypeMatcher.img_src("_mcst.svg")
+                    TypeMatcher.img_src("_mcst.svg"),
                 )
             case IliasElementType.MEETING:
-                return TypeMatcher.any(
-                    TypeMatcher.img_src("_sess.svg")
-                )
+                return TypeMatcher.any(TypeMatcher.img_src("_sess.svg"))
             case IliasElementType.MOB_VIDEO:
                 return TypeMatcher.never()
             case IliasElementType.OPENCAST_VIDEO:
@@ -239,24 +223,19 @@ class IliasElementType(Enum):
                 return TypeMatcher.never()
             case IliasElementType.SCORM_LEARNING_MODULE:
                 return TypeMatcher.any(
-                    TypeMatcher.query("baseclass=ilsahspresentationgui"),
-                    TypeMatcher.img_src("_sahs.svg")
+                    TypeMatcher.query("baseclass=ilsahspresentationgui"), TypeMatcher.img_src("_sahs.svg")
                 )
             case IliasElementType.SURVEY:
-                return TypeMatcher.any(
-                    TypeMatcher.path("/svy/"),
-                    TypeMatcher.img_src("svy.svg")
-                )
+                return TypeMatcher.any(TypeMatcher.path("/svy/"), TypeMatcher.img_src("svy.svg"))
             case IliasElementType.TEST:
                 return TypeMatcher.any(
                     TypeMatcher.query("cmdclass=ilobjtestgui"),
                     TypeMatcher.query("cmdclass=iltestscreengui"),
-                    TypeMatcher.img_src("_tst.svg")
+                    TypeMatcher.img_src("_tst.svg"),
                 )
             case IliasElementType.WIKI:
                 return TypeMatcher.any(
-                    TypeMatcher.query("baseClass=ilwikihandlergui"),
-                    TypeMatcher.img_src("wiki.svg")
+                    TypeMatcher.query("baseClass=ilwikihandlergui"), TypeMatcher.img_src("wiki.svg")
                 )
 
         raise CrawlWarning(f"Unknown matcher {self}")
@@ -291,7 +270,7 @@ class IliasPageElement:
             r"thr_pk=(?P<id>\d+)",  # forums
             r"ref_id=(?P<id>\d+)",
             r"target=[a-z]+_(?P<id>\d+)",
-            r"mm_(?P<id>\d+)"
+            r"mm_(?P<id>\d+)",
         ]
 
         for regex in regexes:
@@ -309,8 +288,8 @@ class IliasPageElement:
         name: str,
         mtime: Optional[datetime] = None,
         description: Optional[str] = None,
-        skip_sanitize: bool = False
-    ) -> 'IliasPageElement':
+        skip_sanitize: bool = False,
+    ) -> "IliasPageElement":
         if typ == IliasElementType.MEETING:
             normalized = IliasPageElement._normalize_meeting_name(name)
             log.explain(f"Normalized meeting name from {name!r} to {normalized!r}")
@@ -382,7 +361,6 @@ class IliasSoup:
 
 
 class IliasPage:
-
     def __init__(self, ilias_soup: IliasSoup, source_element: Optional[IliasPageElement]):
         self._ilias_soup = ilias_soup
         self._soup = ilias_soup.soup
@@ -422,23 +400,26 @@ class IliasPage:
         return self._find_normal_entries()
 
     def get_info_tab(self) -> Optional[IliasPageElement]:
-        tab: Optional[Tag] = cast(Optional[Tag], self._soup.find(
-            name="a",
-            attrs={"href": lambda x: x is not None and "cmdClass=ilinfoscreengui" in x}
-        ))
+        tab: Optional[Tag] = cast(
+            Optional[Tag],
+            self._soup.find(
+                name="a", attrs={"href": lambda x: x is not None and "cmdClass=ilinfoscreengui" in x}
+            ),
+        )
         if tab is not None:
             return IliasPageElement.create_new(
-                IliasElementType.INFO_TAB,
-                self._abs_url_from_link(tab),
-                "infos"
+                IliasElementType.INFO_TAB, self._abs_url_from_link(tab), "infos"
             )
         return None
 
     def get_description(self) -> Optional[BeautifulSoup]:
         def is_interesting_class(name: str | None) -> bool:
             return name in [
-                "ilCOPageSection", "ilc_Paragraph", "ilc_va_ihcap_VAccordIHeadCap",
-                "ilc_va_ihcap_AccordIHeadCap", "ilc_media_cont_MediaContainer"
+                "ilCOPageSection",
+                "ilc_Paragraph",
+                "ilc_va_ihcap_VAccordIHeadCap",
+                "ilc_va_ihcap_AccordIHeadCap",
+                "ilc_media_cont_MediaContainer",
             ]
 
         paragraphs: list[Tag] = cast(list[Tag], self._soup.find_all(class_=is_interesting_class))
@@ -457,7 +438,7 @@ class IliasPage:
                 if video := p.select_one("video"):
                     url, title = self._find_mob_video_url_title(video, p)
                     raw_html += '<div style="min-width: 100px; min-height: 100px; border: 1px solid black;'
-                    raw_html += 'display: flex; justify-content: center; align-items: center;'
+                    raw_html += "display: flex; justify-content: center; align-items: center;"
                     raw_html += ' margin: 0.5rem;">'
                     if url is not None and urlparse(url).hostname != urlparse(self._page_url).hostname:
                         if url.startswith("//"):
@@ -486,7 +467,7 @@ class IliasPage:
             title=title,
             content=content,
             next_url=self._find_learning_module_next(),
-            previous_url=self._find_learning_module_prev()
+            previous_url=self._find_learning_module_prev(),
         )
 
     def _find_learning_module_next(self) -> Optional[str]:
@@ -517,7 +498,7 @@ class IliasPage:
 
         rtoken_form = cast(
             Optional[Tag],
-            self._soup.find("form", attrs={"action": lambda x: x is not None and "rtoken=" in x})
+            self._soup.find("form", attrs={"action": lambda x: x is not None and "rtoken=" in x}),
         )
         if not rtoken_form:
             log.explain("Found no rtoken anywhere")
@@ -557,9 +538,7 @@ class IliasPage:
             return True
 
         # Raw listing without ILIAS fluff
-        video_element_table = self._soup.find(
-            name="table", id=re.compile(r"tbl_xoct_.+")
-        )
+        video_element_table = self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
         return video_element_table is not None
 
     def _is_ilias_opencast_embedding(self) -> bool:
@@ -600,24 +579,28 @@ class IliasPage:
         return self._uncollapse_future_meetings_url() is not None
 
     def _uncollapse_future_meetings_url(self) -> Optional[IliasPageElement]:
-        element = cast(Optional[Tag], self._soup.find(
-            "a",
-            attrs={"href": lambda x: x is not None and ("crs_next_sess=1" in x or "crs_prev_sess=1" in x)}
-        ))
+        element = cast(
+            Optional[Tag],
+            self._soup.find(
+                "a",
+                attrs={
+                    "href": lambda x: x is not None and ("crs_next_sess=1" in x or "crs_prev_sess=1" in x)
+                },
+            ),
+        )
         if not element:
             return None
         link = self._abs_url_from_link(element)
         return IliasPageElement.create_new(IliasElementType.FOLDER, link, "show all meetings")
 
     def _is_exercise_not_all_shown(self) -> bool:
-        return (self._page_type == IliasElementType.EXERCISE_OVERVIEW
-                and "mode=all" not in self._page_url.lower())
+        return (
+            self._page_type == IliasElementType.EXERCISE_OVERVIEW and "mode=all" not in self._page_url.lower()
+        )
 
     def _show_all_exercises(self) -> Optional[IliasPageElement]:
         return IliasPageElement.create_new(
-            IliasElementType.EXERCISE_OVERVIEW,
-            self._page_url + "&mode=all",
-            "show all exercises"
+            IliasElementType.EXERCISE_OVERVIEW, self._page_url + "&mode=all", "show all exercises"
         )
 
     def _is_content_tab_selected(self) -> bool:
@@ -631,10 +614,12 @@ class IliasPage:
         return "baseClass=ilmembershipoverviewgui" in self._page_url
 
     def _select_content_page_url(self) -> Optional[IliasPageElement]:
-        tab = cast(Optional[Tag], self._soup.find(
-            id="tab_view_content",
-            attrs={"class": lambda x: x is not None and "active" not in x}
-        ))
+        tab = cast(
+            Optional[Tag],
+            self._soup.find(
+                id="tab_view_content", attrs={"class": lambda x: x is not None and "active" not in x}
+            ),
+        )
         # Already selected (or not found)
         if not tab:
             return None
@@ -654,9 +639,7 @@ class IliasPage:
         # on the page, but defined in a JS object inside a script tag, passed to the player
         # library.
         # We do the impossible and RegEx the stream JSON object out of the page's HTML source
-        regex = re.compile(
-            r"({\"streams\"[\s\S]+?),\s*{\"paella_config_file", re.IGNORECASE
-        )
+        regex = re.compile(r"({\"streams\"[\s\S]+?),\s*{\"paella_config_file", re.IGNORECASE)
         json_match = regex.search(str(self._soup))
 
         if json_match is None:
@@ -687,10 +670,12 @@ class IliasPage:
     def _get_show_max_forum_entries_per_page_url(
         self, wanted_max: Optional[int] = None
     ) -> Optional[IliasPageElement]:
-        correct_link = cast(Optional[Tag], self._soup.find(
-            "a",
-            attrs={"href": lambda x: x is not None and "trows=800" in x and "cmd=showThreads" in x}
-        ))
+        correct_link = cast(
+            Optional[Tag],
+            self._soup.find(
+                "a", attrs={"href": lambda x: x is not None and "trows=800" in x and "cmd=showThreads" in x}
+            ),
+        )
 
         if not correct_link:
             return None
@@ -775,11 +760,11 @@ class IliasPage:
                 continue
             if "cmd=sendfile" not in link["href"]:
                 continue
-            items.append(IliasPageElement.create_new(
-                IliasElementType.FILE,
-                self._abs_url_from_link(link),
-                _sanitize_path_name(link.get_text())
-            ))
+            items.append(
+                IliasPageElement.create_new(
+                    IliasElementType.FILE, self._abs_url_from_link(link), _sanitize_path_name(link.get_text())
+                )
+            )
 
         return items
 
@@ -791,9 +776,9 @@ class IliasPage:
         #
         # We need to figure out where we are.
 
-        video_element_table = cast(Optional[Tag], self._soup.find(
-            name="table", id=re.compile(r"tbl_xoct_.+")
-        ))
+        video_element_table = cast(
+            Optional[Tag], self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
+        )
 
         if video_element_table is None:
             # We are in stage 1
@@ -829,8 +814,7 @@ class IliasPage:
 
         table_id = id_match.group(1)
 
-        query_params = {f"tbl_xoct_{table_id}_trows": "800",
-                        "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
+        query_params = {f"tbl_xoct_{table_id}_trows": "800", "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
         url = url_set_query_params(self._page_url, query_params)
 
         log.explain("Disabled pagination, retrying folder as a new entry")
@@ -841,9 +825,9 @@ class IliasPage:
         Crawls the "second stage" video page. This page contains the actual video urls.
         """
         # Video start links are marked with an "Abspielen" link
-        video_links = cast(list[Tag], self._soup.find_all(
-            name="a", text=re.compile(r"\s*(Abspielen|Play)\s*")
-        ))
+        video_links = cast(
+            list[Tag], self._soup.find_all(name="a", text=re.compile(r"\s*(Abspielen|Play)\s*"))
+        )
 
         results: list[IliasPageElement] = []
 
@@ -857,12 +841,12 @@ class IliasPage:
         # 6th or 7th child (1 indexed) is the modification time string. Try to find it
         # by parsing backwards from the end and finding something that looks like a date
         modification_time = None
-        row: Tag = link.parent.parent.parent  # type: ignore
+        row: Tag = link.parent.parent.parent
         column_count = len(row.select("td.std"))
         for index in range(column_count, 0, -1):
-            modification_string = link.parent.parent.parent.select_one(  # type: ignore
-                f"td.std:nth-child({index})"
-            ).get_text().strip()
+            modification_string = (
+                link.parent.parent.parent.select_one(f"td.std:nth-child({index})").get_text().strip()
+            )
             if match := re.search(r"\d+\.\d+.\d+ \d+:\d+", modification_string):
                 modification_time = datetime.strptime(match.group(0), "%d.%m.%Y %H:%M")
                 break
@@ -871,7 +855,7 @@ class IliasPage:
             log.warn(f"Could not determine upload time for {link}")
             modification_time = datetime.now()
 
-        title = link.parent.parent.parent.select_one("td.std:nth-child(3)").get_text().strip()  # type: ignore
+        title = link.parent.parent.parent.select_one("td.std:nth-child(3)").get_text().strip()
         title += ".mp4"
 
         video_name: str = _sanitize_path_name(title)
@@ -900,25 +884,29 @@ class IliasPage:
         results: list[IliasPageElement] = []
 
         if link := cast(Optional[Tag], self._soup.select_one("#tab_submission > a")):
-            results.append(IliasPageElement.create_new(
-                IliasElementType.EXERCISE_FILES,
-                self._abs_url_from_link(link),
-                "Submission"
-            ))
+            results.append(
+                IliasPageElement.create_new(
+                    IliasElementType.EXERCISE_FILES, self._abs_url_from_link(link), "Submission"
+                )
+            )
         else:
             log.explain("Found no submission link for exercise, maybe it has not started yet?")
 
         # Find all download links in the container (this will contain all the *feedback* files)
-        download_links = cast(list[Tag], self._soup.find_all(
-            name="a",
-            # download links contain the given command class
-            attrs={"href": lambda x: x is not None and "cmd=download" in x},
-            text="Download"
-        ))
+        download_links = cast(
+            list[Tag],
+            self._soup.find_all(
+                name="a",
+                # download links contain the given command class
+                attrs={"href": lambda x: x is not None and "cmd=download" in x},
+                text="Download",
+            ),
+        )
 
         for link in download_links:
-            parent_row: Tag = cast(Tag, link.find_parent(
-                attrs={"class": lambda x: x is not None and "row" in x}))
+            parent_row: Tag = cast(
+                Tag, link.find_parent(attrs={"class": lambda x: x is not None and "row" in x})
+            )
             name_tag = cast(Optional[Tag], parent_row.find(name="div"))
 
             if not name_tag:
@@ -929,11 +917,9 @@ class IliasPage:
             name = _sanitize_path_name(name_tag.get_text().strip())
             log.explain(f"Found exercise detail entry {name!r}")
 
-            results.append(IliasPageElement.create_new(
-                IliasElementType.FILE,
-                self._abs_url_from_link(link),
-                name
-            ))
+            results.append(
+                IliasPageElement.create_new(IliasElementType.FILE, self._abs_url_from_link(link), name)
+            )
 
         return results
 
@@ -941,12 +927,15 @@ class IliasPage:
         results: list[IliasPageElement] = []
 
         # Find all download links in the container
-        download_links = cast(list[Tag], self._soup.find_all(
-            name="a",
-            # download links contain the given command class
-            attrs={"href": lambda x: x is not None and "cmd=download" in x},
-            text="Download"
-        ))
+        download_links = cast(
+            list[Tag],
+            self._soup.find_all(
+                name="a",
+                # download links contain the given command class
+                attrs={"href": lambda x: x is not None and "cmd=download" in x},
+                text="Download",
+            ),
+        )
 
         for link in download_links:
             parent_row: Tag = cast(Tag, link.find_parent("tr"))
@@ -963,12 +952,9 @@ class IliasPage:
             if date is None:
                 log.warn(f"Date parsing failed for exercise file entry {name!r}")
 
-            results.append(IliasPageElement.create_new(
-                IliasElementType.FILE,
-                self._abs_url_from_link(link),
-                name,
-                date
-            ))
+            results.append(
+                IliasPageElement.create_new(IliasElementType.FILE, self._abs_url_from_link(link), name, date)
+            )
 
         return results
 
@@ -993,11 +979,11 @@ class IliasPage:
                 continue
 
             name = _sanitize_path_name(exercise.get_text().strip())
-            results.append(IliasPageElement.create_new(
-                IliasElementType.EXERCISE,
-                self._abs_url_from_link(exercise),
-                name
-            ))
+            results.append(
+                IliasPageElement.create_new(
+                    IliasElementType.EXERCISE, self._abs_url_from_link(exercise), name
+                )
+            )
 
         for result in results:
             log.explain(f"Found exercise {result.name!r}")
@@ -1043,13 +1029,11 @@ class IliasPage:
                 continue
 
             log.explain(f"Found {element_name!r} of type {element_type}")
-            result.append(IliasPageElement.create_new(
-                element_type,
-                abs_url,
-                element_name,
-                description=description,
-                skip_sanitize=True
-            ))
+            result.append(
+                IliasPageElement.create_new(
+                    element_type, abs_url, element_name, description=description, skip_sanitize=True
+                )
+            )
 
         result += self._find_cards()
         result += self._find_mediacast_videos()
@@ -1086,11 +1070,13 @@ class IliasPage:
                     if not title.endswith(".mp4") and not title.endswith(".webm"):
                         # just to make sure it has some kinda-alrightish ending
                         title = title + ".mp4"
-                    videos.append(IliasPageElement.create_new(
-                        typ=IliasElementType.MEDIACAST_VIDEO,
-                        url=self._abs_url_from_relative(cast(str, url)),
-                        name=_sanitize_path_name(title)
-                    ))
+                    videos.append(
+                        IliasPageElement.create_new(
+                            typ=IliasElementType.MEDIACAST_VIDEO,
+                            url=self._abs_url_from_relative(cast(str, url)),
+                            name=_sanitize_path_name(title),
+                        )
+                    )
 
         return videos
 
@@ -1114,12 +1100,11 @@ class IliasPage:
                 log.explain(f"Found external video at {url}, ignoring")
                 continue
 
-            videos.append(IliasPageElement.create_new(
-                typ=IliasElementType.MOB_VIDEO,
-                url=url,
-                name=_sanitize_path_name(title),
-                mtime=None
-            ))
+            videos.append(
+                IliasPageElement.create_new(
+                    typ=IliasElementType.MOB_VIDEO, url=url, name=_sanitize_path_name(title), mtime=None
+                )
+            )
 
         return videos
 
@@ -1161,11 +1146,11 @@ class IliasPage:
 
             # We should not crawl files under meetings
             if "ilContainerListItemContentCB" in cast(str, parent.get("class")):
-                link: Tag = parent.parent.find("a")  # type: ignore
+                link: Tag = parent.parent.find("a")
                 typ = IliasPage._find_type_for_element(
                     "meeting",
                     self._abs_url_from_link(link),
-                    lambda: IliasPage._find_icon_for_folder_entry(link)
+                    lambda: IliasPage._find_icon_for_folder_entry(link),
                 )
                 return typ == IliasElementType.MEETING
 
@@ -1191,9 +1176,11 @@ class IliasPage:
 
             # This is for these weird JS-y blocks and custom item groups
             if "ilContainerItemsContainer" in cast(str, parent.get("class")):
-                data_store_url = parent.parent.get("data-store-url", "").lower()  # type: ignore
-                is_custom_item_group = "baseclass=ilcontainerblockpropertiesstoragegui" in data_store_url \
-                                       and "cont_block_id=" in data_store_url
+                data_store_url = parent.parent.get("data-store-url", "").lower()
+                is_custom_item_group = (
+                    "baseclass=ilcontainerblockpropertiesstoragegui" in data_store_url
+                    and "cont_block_id=" in data_store_url
+                )
                 # I am currently under the impression that *only* those JS blocks have an
                 # ilNoDisplay class.
                 if not is_custom_item_group and "ilNoDisplay" not in cast(str, parent.get("class")):
@@ -1212,11 +1199,15 @@ class IliasPage:
 
         if outer_accordion_content:
             accordion_tag = cast(Tag, outer_accordion_content.parent)
-            head_tag = cast(Tag, accordion_tag.find(attrs={
-                "class": lambda x: x is not None and (
-                    "ilc_va_ihead_VAccordIHead" in x or "ilc_va_ihead_AccordIHead" in x
-                )
-            }))
+            head_tag = cast(
+                Tag,
+                accordion_tag.find(
+                    attrs={
+                        "class": lambda x: x is not None
+                        and ("ilc_va_ihead_VAccordIHead" in x or "ilc_va_ihead_AccordIHead" in x)
+                    }
+                ),
+            )
             found_titles.append(head_tag.get_text().strip())
 
         return [_sanitize_path_name(x) for x in reversed(found_titles)]
@@ -1224,14 +1215,12 @@ class IliasPage:
     @staticmethod
     def _find_link_description(link: Tag) -> Optional[str]:
         tile = cast(
-            Tag,
-            link.find_parent("div", {"class": lambda x: x is not None and "il_ContainerListItem" in x})
+            Tag, link.find_parent("div", {"class": lambda x: x is not None and "il_ContainerListItem" in x})
         )
         if not tile:
             return None
         description_element = cast(
-            Tag,
-            tile.find("div", {"class": lambda x: x is not None and "il_Description" in x})
+            Tag, tile.find("div", {"class": lambda x: x is not None and "il_Description" in x})
         )
         if not description_element:
             return None
@@ -1242,9 +1231,15 @@ class IliasPage:
         # Files have a list of properties (type, modification date, size, etc.)
         # In a series of divs.
         # Find the parent containing all those divs, so we can filter our what we need
-        properties_parent = cast(Tag, cast(Tag, link_element.find_parent(
-            "div", {"class": lambda x: x is not None and "il_ContainerListItem" in x}
-        )).select_one(".il_ItemProperties"))
+        properties_parent = cast(
+            Tag,
+            cast(
+                Tag,
+                link_element.find_parent(
+                    "div", {"class": lambda x: x is not None and "il_ContainerListItem" in x}
+                ),
+            ).select_one(".il_ItemProperties"),
+        )
         # The first one is always the filetype
         file_type = cast(Tag, properties_parent.select_one("span.il_ItemProperty")).get_text().strip()
 
@@ -1271,9 +1266,7 @@ class IliasPage:
         for title in card_titles:
             url = self._abs_url_from_link(title)
             name = _sanitize_path_name(title.get_text().strip())
-            typ = IliasPage._find_type_for_element(
-                name, url, lambda: IliasPage._find_icon_from_card(title)
-            )
+            typ = IliasPage._find_type_for_element(name, url, lambda: IliasPage._find_icon_from_card(title))
 
             if not typ:
                 _unexpected_html_warning()
@@ -1300,13 +1293,14 @@ class IliasPage:
                 continue
             url = self._abs_url_from_relative(open_match.group(1))
             name = _sanitize_path_name(button.get_text().strip())
-            typ = IliasPage._find_type_for_element(
-                name, url, lambda: IliasPage._find_icon_from_card(button)
+            typ = IliasPage._find_type_for_element(name, url, lambda: IliasPage._find_icon_from_card(button))
+            caption_parent = cast(
+                Tag,
+                button.find_parent(
+                    "div",
+                    attrs={"class": lambda x: x is not None and "caption" in x},
+                ),
             )
-            caption_parent = cast(Tag, button.find_parent(
-                "div",
-                attrs={"class": lambda x: x is not None and "caption" in x},
-            ))
             caption_container = caption_parent.find_next_sibling("div")
             if caption_container:
                 description = caption_container.get_text().strip()
@@ -1377,9 +1371,7 @@ class IliasPage:
 
         if found_parent is None:
             _unexpected_html_warning()
-            log.warn_contd(
-                f"Tried to figure out element type, but did not find an icon for {link_element!r}"
-            )
+            log.warn_contd(f"Tried to figure out element type, but did not find an icon for {link_element!r}")
             return None
 
         # Find the small descriptive icon to figure out the type
@@ -1389,8 +1381,7 @@ class IliasPage:
             img_tag = found_parent.select_one("img.icon")
 
         is_session_expansion_button = found_parent.find(
-            "a",
-            attrs={"href": lambda x: x is not None and ("crs_next_sess=" in x or "crs_prev_sess=" in x)}
+            "a", attrs={"href": lambda x: x is not None and ("crs_next_sess=" in x or "crs_prev_sess=" in x)}
         )
         if img_tag is None and is_session_expansion_button:
             log.explain("Found session expansion button, skipping it as it has no content")
@@ -1447,9 +1438,7 @@ class IliasPage:
         # Video listing embeds do not have complete ILIAS html. Try to match them by
         # their video listing table
         video_table = soup.find(
-            recursive=True,
-            name="table",
-            attrs={"id": lambda x: x is not None and x.startswith("tbl_xoct")}
+            recursive=True, name="table", attrs={"id": lambda x: x is not None and x.startswith("tbl_xoct")}
         )
         if video_table is not None:
             return True
@@ -1462,8 +1451,7 @@ class IliasPage:
     @staticmethod
     def _find_date_in_text(text: str) -> Optional[datetime]:
         modification_date_match = re.search(
-            r"(((\d+\. \w+ \d+)|(Gestern|Yesterday)|(Heute|Today)|(Morgen|Tomorrow)), \d+:\d+)",
-            text
+            r"(((\d+\. \w+ \d+)|(Gestern|Yesterday)|(Heute|Today)|(Morgen|Tomorrow)), \d+:\d+)", text
         )
         if modification_date_match is not None:
             modification_date_str = modification_date_match.group(1)
@@ -1501,8 +1489,8 @@ def _unexpected_html_warning() -> None:
     log.warn("Encountered unexpected HTML structure, ignoring element.")
 
 
-german_months = ['Jan', 'Feb', 'Mär', 'Apr', 'Mai', 'Jun', 'Jul', 'Aug', 'Sep', 'Okt', 'Nov', 'Dez']
-english_months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
+german_months = ["Jan", "Feb", "Mär", "Apr", "Mai", "Jun", "Jul", "Aug", "Sep", "Okt", "Nov", "Dez"]
+english_months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
 
 
 def demangle_date(date_str: str, fail_silently: bool = False) -> Optional[datetime]:
@@ -1579,7 +1567,7 @@ def parse_ilias_forum_export(forum_export: BeautifulSoup) -> list[IliasForumThre
 
         title = cast(Tag, p.find("b")).text
         if ":" in title:
-            title = title[title.find(":") + 1:]
+            title = title[title.find(":") + 1 :]
         title = title.strip()
 
         if not content_tag or content_tag.find_previous_sibling("p") != title_tag:
@@ -1604,7 +1592,7 @@ def _guess_timestamp_from_forum_post_content(content: Tag) -> Optional[datetime]
 
     for post in posts:
         text = post.text.strip()
-        text = text[text.rfind("|") + 1:]
+        text = text[text.rfind("|") + 1 :]
         date = demangle_date(text, fail_silently=True)
         if not date:
             continue
diff --git a/PFERD/crawl/ilias/shibboleth_login.py b/PFERD/crawl/ilias/shibboleth_login.py
index 7e725f0..bdff4ea 100644
--- a/PFERD/crawl/ilias/shibboleth_login.py
+++ b/PFERD/crawl/ilias/shibboleth_login.py
@@ -38,9 +38,7 @@ class ShibbolethLogin:
         async with sess.get(url) as response:
             shib_url = response.url
             if str(shib_url).startswith(self._ilias_url):
-                log.explain(
-                    "ILIAS recognized our shib token and logged us in in the background, returning"
-                )
+                log.explain("ILIAS recognized our shib token and logged us in in the background, returning")
                 return
             soup: BeautifulSoup = soupify(await response.read())
 
@@ -62,7 +60,7 @@ class ShibbolethLogin:
                 "fudis_web_authn_assertion_input": "",
             }
             if csrf_token_input := form.find("input", {"name": "csrf_token"}):
-                data["csrf_token"] = csrf_token_input["value"]  # type: ignore
+                data["csrf_token"] = csrf_token_input["value"]
             soup = await _post(sess, url, data)
 
             if soup.find(id="attributeRelease"):
@@ -81,7 +79,7 @@ class ShibbolethLogin:
         # (or clicking "Continue" if you have JS disabled)
         relay_state = cast(Tag, soup.find("input", {"name": "RelayState"}))
         saml_response = cast(Tag, soup.find("input", {"name": "SAMLResponse"}))
-        url = form = soup.find("form", {"method": "post"})["action"]  # type: ignore
+        url = form = soup.find("form", {"method": "post"})["action"]
         data = {  # using the info obtained in the while loop above
             "RelayState": cast(str, relay_state["value"]),
             "SAMLResponse": cast(str, saml_response["value"]),
@@ -110,7 +108,7 @@ class ShibbolethLogin:
             "fudis_otp_input": tfa_token,
         }
         if csrf_token_input := form.find("input", {"name": "csrf_token"}):
-            data["csrf_token"] = csrf_token_input["value"]  # type: ignore
+            data["csrf_token"] = csrf_token_input["value"]
         return await _post(session, url, data)
 
     @staticmethod
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index 21d9dec..f47c969 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -53,12 +53,11 @@ class KitIpdFolder:
 
 
 class KitIpdCrawler(HttpCrawler):
-
     def __init__(
-            self,
-            name: str,
-            section: KitIpdCrawlerSection,
-            config: Config,
+        self,
+        name: str,
+        section: KitIpdCrawlerSection,
+        config: Config,
     ):
         super().__init__(name, section, config)
         self._url = section.target()
@@ -104,11 +103,7 @@ class KitIpdCrawler(HttpCrawler):
         await self.gather(tasks)
 
     async def _download_file(
-        self,
-        parent: PurePath,
-        file: KitIpdFile,
-        etag: Optional[str],
-        mtime: Optional[datetime]
+        self, parent: PurePath, file: KitIpdFile, etag: Optional[str], mtime: Optional[datetime]
     ) -> None:
         element_path = parent / file.name
 
diff --git a/PFERD/crawl/local_crawler.py b/PFERD/crawl/local_crawler.py
index f102bc9..dfc6f65 100644
--- a/PFERD/crawl/local_crawler.py
+++ b/PFERD/crawl/local_crawler.py
@@ -18,31 +18,28 @@ class LocalCrawlerSection(CrawlerSection):
     def crawl_delay(self) -> float:
         value = self.s.getfloat("crawl_delay", fallback=0.0)
         if value < 0:
-            self.invalid_value("crawl_delay", value,
-                               "Must not be negative")
+            self.invalid_value("crawl_delay", value, "Must not be negative")
         return value
 
     def download_delay(self) -> float:
         value = self.s.getfloat("download_delay", fallback=0.0)
         if value < 0:
-            self.invalid_value("download_delay", value,
-                               "Must not be negative")
+            self.invalid_value("download_delay", value, "Must not be negative")
         return value
 
     def download_speed(self) -> Optional[int]:
         value = self.s.getint("download_speed")
         if value is not None and value <= 0:
-            self.invalid_value("download_speed", value,
-                               "Must be greater than 0")
+            self.invalid_value("download_speed", value, "Must be greater than 0")
         return value
 
 
 class LocalCrawler(Crawler):
     def __init__(
-            self,
-            name: str,
-            section: LocalCrawlerSection,
-            config: Config,
+        self,
+        name: str,
+        section: LocalCrawlerSection,
+        config: Config,
     ):
         super().__init__(name, section, config)
 
@@ -74,10 +71,12 @@ class LocalCrawler(Crawler):
         tasks = []
 
         async with cl:
-            await asyncio.sleep(random.uniform(
-                0.5 * self._crawl_delay,
-                self._crawl_delay,
-            ))
+            await asyncio.sleep(
+                random.uniform(
+                    0.5 * self._crawl_delay,
+                    self._crawl_delay,
+                )
+            )
 
             for child in path.iterdir():
                 pure_child = cl.path / child.name
@@ -93,10 +92,12 @@ class LocalCrawler(Crawler):
             return
 
         async with dl as (bar, sink):
-            await asyncio.sleep(random.uniform(
-                0.5 * self._download_delay,
-                self._download_delay,
-            ))
+            await asyncio.sleep(
+                random.uniform(
+                    0.5 * self._download_delay,
+                    self._download_delay,
+                )
+            )
 
             bar.set_total(stat.st_size)
 
diff --git a/PFERD/deduplicator.py b/PFERD/deduplicator.py
index 559addb..c204726 100644
--- a/PFERD/deduplicator.py
+++ b/PFERD/deduplicator.py
@@ -16,9 +16,28 @@ def name_variants(path: PurePath) -> Iterator[PurePath]:
 class Deduplicator:
     FORBIDDEN_CHARS = '<>:"/\\|?*' + "".join([chr(i) for i in range(0, 32)])
     FORBIDDEN_NAMES = {
-        "CON", "PRN", "AUX", "NUL",
-        "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9",
-        "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
+        "CON",
+        "PRN",
+        "AUX",
+        "NUL",
+        "COM1",
+        "COM2",
+        "COM3",
+        "COM4",
+        "COM5",
+        "COM6",
+        "COM7",
+        "COM8",
+        "COM9",
+        "LPT1",
+        "LPT2",
+        "LPT3",
+        "LPT4",
+        "LPT5",
+        "LPT6",
+        "LPT7",
+        "LPT8",
+        "LPT9",
     }
 
     def __init__(self, windows_paths: bool) -> None:
diff --git a/PFERD/limiter.py b/PFERD/limiter.py
index 3122a7a..49de0ed 100644
--- a/PFERD/limiter.py
+++ b/PFERD/limiter.py
@@ -12,12 +12,7 @@ class Slot:
 
 
 class Limiter:
-    def __init__(
-            self,
-            task_limit: int,
-            download_limit: int,
-            task_delay: float
-    ):
+    def __init__(self, task_limit: int, download_limit: int, task_delay: float):
         if task_limit <= 0:
             raise ValueError("task limit must be at least 1")
         if download_limit <= 0:
diff --git a/PFERD/logging.py b/PFERD/logging.py
index c19e4a0..e371494 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -8,8 +8,15 @@ from rich.console import Console, Group
 from rich.live import Live
 from rich.markup import escape
 from rich.panel import Panel
-from rich.progress import (BarColumn, DownloadColumn, Progress, TaskID, TextColumn, TimeRemainingColumn,
-                           TransferSpeedColumn)
+from rich.progress import (
+    BarColumn,
+    DownloadColumn,
+    Progress,
+    TaskID,
+    TextColumn,
+    TimeRemainingColumn,
+    TransferSpeedColumn,
+)
 from rich.table import Column
 
 
@@ -176,10 +183,14 @@ class Log:
         # Our print function doesn't take types other than strings, but the
         # underlying rich.print function does. This call is a special case
         # anyways, and we're calling it internally, so this should be fine.
-        self.print(Panel.fit("""
+        self.print(
+            Panel.fit(
+                """
 Please copy your program output and send it to the PFERD maintainers, either
 directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
-        """.strip()))  # type: ignore
+        """.strip()
+            )
+        )
 
     def explain_topic(self, text: str) -> None:
         """
@@ -236,10 +247,10 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
 
     @contextmanager
     def _bar(
-            self,
-            progress: Progress,
-            description: str,
-            total: Optional[float],
+        self,
+        progress: Progress,
+        description: str,
+        total: Optional[float],
     ) -> Iterator[ProgressBar]:
         if total is None:
             # Indeterminate progress bar
@@ -255,11 +266,11 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
             self._update_live()
 
     def crawl_bar(
-            self,
-            style: str,
-            action: str,
-            text: str,
-            total: Optional[float] = None,
+        self,
+        style: str,
+        action: str,
+        text: str,
+        total: Optional[float] = None,
     ) -> AbstractContextManager[ProgressBar]:
         """
         Allows markup in the "style" argument which will be applied to the
@@ -271,11 +282,11 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
         return self._bar(self._crawl_progress, description, total)
 
     def download_bar(
-            self,
-            style: str,
-            action: str,
-            text: str,
-            total: Optional[float] = None,
+        self,
+        style: str,
+        action: str,
+        text: str,
+        total: Optional[float] = None,
     ) -> AbstractContextManager[ProgressBar]:
         """
         Allows markup in the "style" argument which will be applied to the
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 94337b6..c452c0f 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -35,8 +35,7 @@ class Redownload(Enum):
         try:
             return Redownload(string)
         except ValueError:
-            raise ValueError("must be one of 'never', 'never-smart',"
-                             " 'always', 'always-smart'")
+            raise ValueError("must be one of 'never', 'never-smart', 'always', 'always-smart'")
 
 
 class OnConflict(Enum):
@@ -51,8 +50,10 @@ class OnConflict(Enum):
         try:
             return OnConflict(string)
         except ValueError:
-            raise ValueError("must be one of 'prompt', 'local-first',"
-                             " 'remote-first', 'no-delete', 'no-delete-prompt-overwrite'")
+            raise ValueError(
+                "must be one of 'prompt', 'local-first',"
+                " 'remote-first', 'no-delete', 'no-delete-prompt-overwrite'"
+            )
 
 
 @dataclass
@@ -96,13 +97,13 @@ class FileSinkToken(ReusableAsyncContextManager[FileSink]):
     # download handed back to the OutputDirectory.
 
     def __init__(
-            self,
-            output_dir: "OutputDirectory",
-            remote_path: PurePath,
-            path: PurePath,
-            local_path: Path,
-            heuristics: Heuristics,
-            on_conflict: OnConflict,
+        self,
+        output_dir: "OutputDirectory",
+        remote_path: PurePath,
+        path: PurePath,
+        local_path: Path,
+        heuristics: Heuristics,
+        on_conflict: OnConflict,
     ):
         super().__init__()
 
@@ -118,15 +119,17 @@ class FileSinkToken(ReusableAsyncContextManager[FileSink]):
         sink = FileSink(file)
 
         async def after_download() -> None:
-            await self._output_dir._after_download(DownloadInfo(
-                self._remote_path,
-                self._path,
-                self._local_path,
-                tmp_path,
-                self._heuristics,
-                self._on_conflict,
-                sink.is_done(),
-            ))
+            await self._output_dir._after_download(
+                DownloadInfo(
+                    self._remote_path,
+                    self._path,
+                    self._local_path,
+                    tmp_path,
+                    self._heuristics,
+                    self._on_conflict,
+                    sink.is_done(),
+                )
+            )
 
         self._stack.push_async_callback(after_download)
         self._stack.enter_context(file)
@@ -138,10 +141,10 @@ class OutputDirectory:
     REPORT_FILE = PurePath(".report")
 
     def __init__(
-            self,
-            root: Path,
-            redownload: Redownload,
-            on_conflict: OnConflict,
+        self,
+        root: Path,
+        redownload: Redownload,
+        on_conflict: OnConflict,
     ):
         if os.name == "nt":
             # Windows limits the path length to 260 for some historical reason.
@@ -193,11 +196,11 @@ class OutputDirectory:
         return self._root / path
 
     def _should_download(
-            self,
-            local_path: Path,
-            heuristics: Heuristics,
-            redownload: Redownload,
-            on_conflict: OnConflict,
+        self,
+        local_path: Path,
+        heuristics: Heuristics,
+        redownload: Redownload,
+        on_conflict: OnConflict,
     ) -> bool:
         if not local_path.exists():
             log.explain("No corresponding file present locally")
@@ -270,9 +273,9 @@ class OutputDirectory:
     # files.
 
     async def _conflict_lfrf(
-            self,
-            on_conflict: OnConflict,
-            path: PurePath,
+        self,
+        on_conflict: OnConflict,
+        path: PurePath,
     ) -> bool:
         if on_conflict in {OnConflict.PROMPT, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
             async with log.exclusive_output():
@@ -289,9 +292,9 @@ class OutputDirectory:
         raise ValueError(f"{on_conflict!r} is not a valid conflict policy")
 
     async def _conflict_ldrf(
-            self,
-            on_conflict: OnConflict,
-            path: PurePath,
+        self,
+        on_conflict: OnConflict,
+        path: PurePath,
     ) -> bool:
         if on_conflict in {OnConflict.PROMPT, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
             async with log.exclusive_output():
@@ -308,10 +311,10 @@ class OutputDirectory:
         raise ValueError(f"{on_conflict!r} is not a valid conflict policy")
 
     async def _conflict_lfrd(
-            self,
-            on_conflict: OnConflict,
-            path: PurePath,
-            parent: PurePath,
+        self,
+        on_conflict: OnConflict,
+        path: PurePath,
+        parent: PurePath,
     ) -> bool:
         if on_conflict in {OnConflict.PROMPT, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
             async with log.exclusive_output():
@@ -328,9 +331,9 @@ class OutputDirectory:
         raise ValueError(f"{on_conflict!r} is not a valid conflict policy")
 
     async def _conflict_delete_lf(
-            self,
-            on_conflict: OnConflict,
-            path: PurePath,
+        self,
+        on_conflict: OnConflict,
+        path: PurePath,
     ) -> bool:
         if on_conflict == OnConflict.PROMPT:
             async with log.exclusive_output():
@@ -353,8 +356,8 @@ class OutputDirectory:
         return base.parent / name
 
     async def _create_tmp_file(
-            self,
-            local_path: Path,
+        self,
+        local_path: Path,
     ) -> Tuple[Path, BinaryIO]:
         """
         May raise an OutputDirError.
@@ -388,14 +391,14 @@ class OutputDirectory:
         return self._should_download(local_path, heuristics, redownload, on_conflict)
 
     async def download(
-            self,
-            remote_path: PurePath,
-            path: PurePath,
-            *,
-            etag_differs: Optional[bool] = None,
-            mtime: Optional[datetime] = None,
-            redownload: Optional[Redownload] = None,
-            on_conflict: Optional[OnConflict] = None,
+        self,
+        remote_path: PurePath,
+        path: PurePath,
+        *,
+        etag_differs: Optional[bool] = None,
+        mtime: Optional[datetime] = None,
+        redownload: Optional[Redownload] = None,
+        on_conflict: Optional[OnConflict] = None,
     ) -> Optional[FileSinkToken]:
         """
         May throw an OutputDirError, a MarkDuplicateError or a
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index ca2e5b7..c805c13 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -66,10 +66,10 @@ class Pferd:
         return crawlers_to_run
 
     def _find_crawlers_to_run(
-            self,
-            config: Config,
-            cli_crawlers: Optional[List[str]],
-            cli_skips: Optional[List[str]],
+        self,
+        config: Config,
+        cli_crawlers: Optional[List[str]],
+        cli_skips: Optional[List[str]],
     ) -> List[str]:
         log.explain_topic("Deciding which crawlers to run")
 
diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index a48c827..96b5ca7 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -208,7 +208,7 @@ class Line:
 
     @property
     def rest(self) -> str:
-        return self.line[self.index:]
+        return self.line[self.index :]
 
     def peek(self, amount: int = 1) -> str:
         return self.rest[:amount]
@@ -327,21 +327,27 @@ def parse_right(line: Line) -> Union[str, Ignore]:
 
 
 def parse_arrow_name(line: Line) -> str:
-    return line.one_of([
-        lambda: line.expect("exact-re"),
-        lambda: line.expect("exact"),
-        lambda: line.expect("name-re"),
-        lambda: line.expect("name"),
-        lambda: line.expect("re"),
-        lambda: line.expect(""),
-    ], "Expected arrow name")
+    return line.one_of(
+        [
+            lambda: line.expect("exact-re"),
+            lambda: line.expect("exact"),
+            lambda: line.expect("name-re"),
+            lambda: line.expect("name"),
+            lambda: line.expect("re"),
+            lambda: line.expect(""),
+        ],
+        "Expected arrow name",
+    )
 
 
 def parse_arrow_head(line: Line) -> ArrowHead:
-    return line.one_of([
-        lambda: line.expect_with(">>", ArrowHead.SEQUENCE),
-        lambda: line.expect_with(">", ArrowHead.NORMAL),
-    ], "Expected arrow head")
+    return line.one_of(
+        [
+            lambda: line.expect_with(">>", ArrowHead.SEQUENCE),
+            lambda: line.expect_with(">", ArrowHead.NORMAL),
+        ],
+        "Expected arrow head",
+    )
 
 
 def parse_eol(line: Line) -> None:
@@ -413,12 +419,12 @@ class Transformer:
 
     def transform(self, path: PurePath) -> Optional[PurePath]:
         for i, (line, tf) in enumerate(self._tfs):
-            log.explain(f"Testing rule {i+1}: {line}")
+            log.explain(f"Testing rule {i + 1}: {line}")
 
             try:
                 result = tf.transform(path)
             except Exception as e:
-                log.warn(f"Error while testing rule {i+1}: {line}")
+                log.warn(f"Error while testing rule {i + 1}: {line}")
                 log.warn_contd(str(e))
                 continue
 
diff --git a/PFERD/utils.py b/PFERD/utils.py
index 7c7b6f4..acd282e 100644
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -131,10 +131,10 @@ class ReusableAsyncContextManager(ABC, Generic[T]):
         return result
 
     async def __aexit__(
-            self,
-            exc_type: Optional[Type[BaseException]],
-            exc_value: Optional[BaseException],
-            traceback: Optional[TracebackType],
+        self,
+        exc_type: Optional[Type[BaseException]],
+        exc_value: Optional[BaseException],
+        traceback: Optional[TracebackType],
     ) -> Optional[bool]:
         if not self._active:
             raise RuntimeError("__aexit__ called too many times")
diff --git a/pyproject.toml b/pyproject.toml
index e22fe85..9d4460b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,16 +20,29 @@ pferd = "PFERD.__main__:main"
 [tool.setuptools.dynamic]
 version = {attr = "PFERD.version.VERSION"}
 
-[tool.flake8]
-max-line-length = 110
+[tool.ruff]
+line-length = 110
 
-[tool.isort]
-line_length = 110
-
-[tool.autopep8]
-max_line_length = 110
-in-place = true
-recursive = true
+[tool.ruff.lint]
+select = [
+    # pycodestyle
+    "E",
+    # Pyflakes
+    "F",
+    # pyupgrade
+    "UP",
+    # flake8-bugbear
+    "B",
+    # flake8-simplify
+    "SIM",
+    # isort
+    "I",
+]
+ignore = [
+  "UP045",
+  "SIM114",
+  "B023"
+]
 
 [tool.mypy]
 disallow_any_generics = true
diff --git a/scripts/check b/scripts/check
index 6f4f4c2..cce6a38 100755
--- a/scripts/check
+++ b/scripts/check
@@ -3,4 +3,4 @@
 set -e
 
 mypy .
-flake8 PFERD --max-line-length 110
+ruff check
diff --git a/scripts/format b/scripts/format
index 981cd75..38b10fd 100755
--- a/scripts/format
+++ b/scripts/format
@@ -2,5 +2,4 @@
 
 set -e
 
-autopep8 .
-isort .
+ruff format
diff --git a/uv.lock b/uv.lock
new file mode 100644
index 0000000..eba384b
--- /dev/null
+++ b/uv.lock
@@ -0,0 +1,905 @@
+version = 1
+revision = 3
+requires-python = ">=3.11"
+
+[[package]]
+name = "aiohappyeyeballs"
+version = "2.6.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/26/30/f84a107a9c4331c14b2b586036f40965c128aa4fee4dda5d3d51cb14ad54/aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558", size = 22760, upload-time = "2025-03-12T01:42:48.764Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0f/15/5bf3b99495fb160b63f95972b81750f18f7f4e02ad051373b669d17d44f2/aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8", size = 15265, upload-time = "2025-03-12T01:42:47.083Z" },
+]
+
+[[package]]
+name = "aiohttp"
+version = "3.13.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "aiohappyeyeballs" },
+    { name = "aiosignal" },
+    { name = "attrs" },
+    { name = "frozenlist" },
+    { name = "multidict" },
+    { name = "propcache" },
+    { name = "yarl" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ba/fa/3ae643cd525cf6844d3dc810481e5748107368eb49563c15a5fb9f680750/aiohttp-3.13.1.tar.gz", hash = "sha256:4b7ee9c355015813a6aa085170b96ec22315dabc3d866fd77d147927000e9464", size = 7835344, upload-time = "2025-10-17T14:03:29.337Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/be/2c/739d03730ffce57d2093e2e611e1541ac9a4b3bb88288c33275058b9ffc2/aiohttp-3.13.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9eefa0a891e85dca56e2d00760945a6325bd76341ec386d3ad4ff72eb97b7e64", size = 742004, upload-time = "2025-10-17T13:59:29.73Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/f8/7f5b7f7184d7c80e421dbaecbd13e0b2a0bb8663fd0406864f9a167a438c/aiohttp-3.13.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6c20eb646371a5a57a97de67e52aac6c47badb1564e719b3601bbb557a2e8fd0", size = 495601, upload-time = "2025-10-17T13:59:31.312Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/af/fb78d028b9642dd33ff127d9a6a151586f33daff631b05250fecd0ab23f8/aiohttp-3.13.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bfc28038cd86fb1deed5cc75c8fda45c6b0f5c51dfd76f8c63d3d22dc1ab3d1b", size = 491790, upload-time = "2025-10-17T13:59:33.304Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/ae/e40e422ee995e4f91f7f087b86304e3dd622d3a5b9ca902a1e94ebf9a117/aiohttp-3.13.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b22eeffca2e522451990c31a36fe0e71079e6112159f39a4391f1c1e259a795", size = 1746350, upload-time = "2025-10-17T13:59:35.158Z" },
+    { url = "https://files.pythonhosted.org/packages/28/a5/fe6022bb869bf2d2633b155ed8348d76358c22d5ff9692a15016b2d1019f/aiohttp-3.13.1-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:65782b2977c05ebd78787e3c834abe499313bf69d6b8be4ff9c340901ee7541f", size = 1703046, upload-time = "2025-10-17T13:59:37.077Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/a5/c4ef3617d7cdc49f2d5af077f19794946f0f2d94b93c631ace79047361a2/aiohttp-3.13.1-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:dacba54f9be3702eb866b0b9966754b475e1e39996e29e442c3cd7f1117b43a9", size = 1806161, upload-time = "2025-10-17T13:59:38.837Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/45/b87d2430aee7e7d00b24e3dff2c5bd69f21017f6edb19cfd91e514664fc8/aiohttp-3.13.1-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:aa878da718e8235302c365e376b768035add36b55177706d784a122cb822a6a4", size = 1894546, upload-time = "2025-10-17T13:59:40.741Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/a2/79eb466786a7f11a0292c353a8a9b95e88268c48c389239d7531d66dbb48/aiohttp-3.13.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e4b4e607fbd4964d65945a7b9d1e7f98b0d5545736ea613f77d5a2a37ff1e46", size = 1745683, upload-time = "2025-10-17T13:59:42.59Z" },
+    { url = "https://files.pythonhosted.org/packages/93/1a/153b0ad694f377e94eacc85338efe03ed4776a396c8bb47bd9227135792a/aiohttp-3.13.1-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0c3db2d0e5477ad561bf7ba978c3ae5f8f78afda70daa05020179f759578754f", size = 1605418, upload-time = "2025-10-17T13:59:45.229Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/4e/18605b1bfeb4b00d3396d833647cdb213118e2a96862e5aebee62ad065b4/aiohttp-3.13.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9739d34506fdf59bf2c092560d502aa728b8cdb33f34ba15fb5e2852c35dd829", size = 1722379, upload-time = "2025-10-17T13:59:46.969Z" },
+    { url = "https://files.pythonhosted.org/packages/72/13/0a38ad385d547fb283e0e1fe1ff1dff8899bd4ed0aaceeb13ec14abbf136/aiohttp-3.13.1-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:b902e30a268a85d50197b4997edc6e78842c14c0703450f632c2d82f17577845", size = 1716693, upload-time = "2025-10-17T13:59:49.217Z" },
+    { url = "https://files.pythonhosted.org/packages/55/65/7029d7573ab9009adde380052c6130d02c8db52195fda112db35e914fe7b/aiohttp-3.13.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:1bbfc04c8de7def6504cce0a97f9885a5c805fd2395a0634bc10f9d6ecb42524", size = 1784174, upload-time = "2025-10-17T13:59:51.439Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/36/fd46e39cb85418e45b0e4a8bfc39651ee0b8f08ea006adf217a221cdb269/aiohttp-3.13.1-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:6941853405a38a5eeb7d9776db77698df373ff7fa8c765cb81ea14a344fccbeb", size = 1593716, upload-time = "2025-10-17T13:59:53.367Z" },
+    { url = "https://files.pythonhosted.org/packages/85/b8/188e0cb1be37b4408373171070fda17c3bf9c67c0d3d4fd5ee5b1fa108e1/aiohttp-3.13.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:7764adcd2dc8bd21c8228a53dda2005428498dc4d165f41b6086f0ac1c65b1c9", size = 1799254, upload-time = "2025-10-17T13:59:55.352Z" },
+    { url = "https://files.pythonhosted.org/packages/67/ff/fdf768764eb427b0cc9ebb2cebddf990f94d98b430679f8383c35aa114be/aiohttp-3.13.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c09e08d38586fa59e5a2f9626505a0326fadb8e9c45550f029feeb92097a0afc", size = 1738122, upload-time = "2025-10-17T13:59:57.263Z" },
+    { url = "https://files.pythonhosted.org/packages/94/84/fce7a4d575943394d7c0e632273838eb6f39de8edf25386017bf5f0de23b/aiohttp-3.13.1-cp311-cp311-win32.whl", hash = "sha256:ce1371675e74f6cf271d0b5530defb44cce713fd0ab733713562b3a2b870815c", size = 430491, upload-time = "2025-10-17T13:59:59.466Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/d2/d21b8ab6315a5d588c550ab285b4f02ae363edf012920e597904c5a56608/aiohttp-3.13.1-cp311-cp311-win_amd64.whl", hash = "sha256:77a2f5cc28cf4704cc157be135c6a6cfb38c9dea478004f1c0fd7449cf445c28", size = 454808, upload-time = "2025-10-17T14:00:01.247Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/72/d463a10bf29871f6e3f63bcf3c91362dc4d72ed5917a8271f96672c415ad/aiohttp-3.13.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0760bd9a28efe188d77b7c3fe666e6ef74320d0f5b105f2e931c7a7e884c8230", size = 736218, upload-time = "2025-10-17T14:00:03.51Z" },
+    { url = "https://files.pythonhosted.org/packages/26/13/f7bccedbe52ea5a6eef1e4ebb686a8d7765319dfd0a5939f4238cb6e79e6/aiohttp-3.13.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7129a424b441c3fe018a414401bf1b9e1d49492445f5676a3aecf4f74f67fcdb", size = 491251, upload-time = "2025-10-17T14:00:05.756Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/7c/7ea51b5aed6cc69c873f62548da8345032aa3416336f2d26869d4d37b4a2/aiohttp-3.13.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e1cb04ae64a594f6ddf5cbb024aba6b4773895ab6ecbc579d60414f8115e9e26", size = 490394, upload-time = "2025-10-17T14:00:07.504Z" },
+    { url = "https://files.pythonhosted.org/packages/31/05/1172cc4af4557f6522efdee6eb2b9f900e1e320a97e25dffd3c5a6af651b/aiohttp-3.13.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:782d656a641e755decd6bd98d61d2a8ea062fd45fd3ff8d4173605dd0d2b56a1", size = 1737455, upload-time = "2025-10-17T14:00:09.403Z" },
+    { url = "https://files.pythonhosted.org/packages/24/3d/ce6e4eca42f797d6b1cd3053cf3b0a22032eef3e4d1e71b9e93c92a3f201/aiohttp-3.13.1-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:f92ad8169767429a6d2237331726c03ccc5f245222f9373aa045510976af2b35", size = 1699176, upload-time = "2025-10-17T14:00:11.314Z" },
+    { url = "https://files.pythonhosted.org/packages/25/04/7127ba55653e04da51477372566b16ae786ef854e06222a1c96b4ba6c8ef/aiohttp-3.13.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:0e778f634ca50ec005eefa2253856921c429581422d887be050f2c1c92e5ce12", size = 1767216, upload-time = "2025-10-17T14:00:13.668Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/3b/43bca1e75847e600f40df829a6b2f0f4e1d4c70fb6c4818fdc09a462afd5/aiohttp-3.13.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:9bc36b41cf4aab5d3b34d22934a696ab83516603d1bc1f3e4ff9930fe7d245e5", size = 1865870, upload-time = "2025-10-17T14:00:15.852Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/69/b204e5d43384197a614c88c1717c324319f5b4e7d0a1b5118da583028d40/aiohttp-3.13.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3fd4570ea696aee27204dd524f287127ed0966d14d309dc8cc440f474e3e7dbd", size = 1751021, upload-time = "2025-10-17T14:00:18.297Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/af/845dc6b6fdf378791d720364bf5150f80d22c990f7e3a42331d93b337cc7/aiohttp-3.13.1-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7bda795f08b8a620836ebfb0926f7973972a4bf8c74fdf9145e489f88c416811", size = 1561448, upload-time = "2025-10-17T14:00:20.152Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/91/d2ab08cd77ed76a49e4106b1cfb60bce2768242dd0c4f9ec0cb01e2cbf94/aiohttp-3.13.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:055a51d90e351aae53dcf324d0eafb2abe5b576d3ea1ec03827d920cf81a1c15", size = 1698196, upload-time = "2025-10-17T14:00:22.131Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/d1/082f0620dc428ecb8f21c08a191a4694915cd50f14791c74a24d9161cc50/aiohttp-3.13.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:d4131df864cbcc09bb16d3612a682af0db52f10736e71312574d90f16406a867", size = 1719252, upload-time = "2025-10-17T14:00:24.453Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/78/2af2f44491be7b08e43945b72d2b4fd76f0a14ba850ba9e41d28a7ce716a/aiohttp-3.13.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:163d3226e043f79bf47c87f8dfc89c496cc7bc9128cb7055ce026e435d551720", size = 1736529, upload-time = "2025-10-17T14:00:26.567Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/34/3e919ecdc93edaea8d140138049a0d9126141072e519535e2efa38eb7a02/aiohttp-3.13.1-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:a2370986a3b75c1a5f3d6f6d763fc6be4b430226577b0ed16a7c13a75bf43d8f", size = 1553723, upload-time = "2025-10-17T14:00:28.592Z" },
+    { url = "https://files.pythonhosted.org/packages/21/4b/d8003aeda2f67f359b37e70a5a4b53fee336d8e89511ac307ff62aeefcdb/aiohttp-3.13.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:d7c14de0c7c9f1e6e785ce6cbe0ed817282c2af0012e674f45b4e58c6d4ea030", size = 1763394, upload-time = "2025-10-17T14:00:31.051Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/7b/1dbe6a39e33af9baaafc3fc016a280663684af47ba9f0e5d44249c1f72ec/aiohttp-3.13.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bb611489cf0db10b99beeb7280bd39e0ef72bc3eb6d8c0f0a16d8a56075d1eb7", size = 1718104, upload-time = "2025-10-17T14:00:33.407Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/88/bd1b38687257cce67681b9b0fa0b16437be03383fa1be4d1a45b168bef25/aiohttp-3.13.1-cp312-cp312-win32.whl", hash = "sha256:f90fe0ee75590f7428f7c8b5479389d985d83c949ea10f662ab928a5ed5cf5e6", size = 425303, upload-time = "2025-10-17T14:00:35.829Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/e3/4481f50dd6f27e9e58c19a60cff44029641640237e35d32b04aaee8cf95f/aiohttp-3.13.1-cp312-cp312-win_amd64.whl", hash = "sha256:3461919a9dca272c183055f2aab8e6af0adc810a1b386cce28da11eb00c859d9", size = 452071, upload-time = "2025-10-17T14:00:37.764Z" },
+    { url = "https://files.pythonhosted.org/packages/16/6d/d267b132342e1080f4c1bb7e1b4e96b168b3cbce931ec45780bff693ff95/aiohttp-3.13.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:55785a7f8f13df0c9ca30b5243d9909bd59f48b274262a8fe78cee0828306e5d", size = 730727, upload-time = "2025-10-17T14:00:39.681Z" },
+    { url = "https://files.pythonhosted.org/packages/92/c8/1cf495bac85cf71b80fad5f6d7693e84894f11b9fe876b64b0a1e7cbf32f/aiohttp-3.13.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4bef5b83296cebb8167707b4f8d06c1805db0af632f7a72d7c5288a84667e7c3", size = 488678, upload-time = "2025-10-17T14:00:41.541Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/19/23c6b81cca587ec96943d977a58d11d05a82837022e65cd5502d665a7d11/aiohttp-3.13.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:27af0619c33f9ca52f06069ec05de1a357033449ab101836f431768ecfa63ff5", size = 487637, upload-time = "2025-10-17T14:00:43.527Z" },
+    { url = "https://files.pythonhosted.org/packages/48/58/8f9464afb88b3eed145ad7c665293739b3a6f91589694a2bb7e5778cbc72/aiohttp-3.13.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a47fe43229a8efd3764ef7728a5c1158f31cdf2a12151fe99fde81c9ac87019c", size = 1718975, upload-time = "2025-10-17T14:00:45.496Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/8b/c3da064ca392b2702f53949fd7c403afa38d9ee10bf52c6ad59a42537103/aiohttp-3.13.1-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:6e68e126de5b46e8b2bee73cab086b5d791e7dc192056916077aa1e2e2b04437", size = 1686905, upload-time = "2025-10-17T14:00:47.707Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/a4/9c8a3843ecf526daee6010af1a66eb62579be1531d2d5af48ea6f405ad3c/aiohttp-3.13.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e65ef49dd22514329c55970d39079618a8abf856bae7147913bb774a3ab3c02f", size = 1754907, upload-time = "2025-10-17T14:00:49.702Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/80/1f470ed93e06436e3fc2659a9fc329c192fa893fb7ed4e884d399dbfb2a8/aiohttp-3.13.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0e425a7e0511648b3376839dcc9190098671a47f21a36e815b97762eb7d556b0", size = 1857129, upload-time = "2025-10-17T14:00:51.822Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/e6/33d305e6cce0a8daeb79c7d8d6547d6e5f27f4e35fa4883fc9c9eb638596/aiohttp-3.13.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:010dc9b7110f055006acd3648d5d5955bb6473b37c3663ec42a1b4cba7413e6b", size = 1738189, upload-time = "2025-10-17T14:00:53.976Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/42/8df03367e5a64327fe0c39291080697795430c438fc1139c7cc1831aa1df/aiohttp-3.13.1-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:1b5c722d0ca5f57d61066b5dfa96cdb87111e2519156b35c1f8dd17c703bee7a", size = 1553608, upload-time = "2025-10-17T14:00:56.144Z" },
+    { url = "https://files.pythonhosted.org/packages/96/17/6d5c73cd862f1cf29fddcbb54aac147037ff70a043a2829d03a379e95742/aiohttp-3.13.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:93029f0e9b77b714904a281b5aa578cdc8aa8ba018d78c04e51e1c3d8471b8ec", size = 1681809, upload-time = "2025-10-17T14:00:58.603Z" },
+    { url = "https://files.pythonhosted.org/packages/be/31/8926c8ab18533f6076ce28d2c329a203b58c6861681906e2d73b9c397588/aiohttp-3.13.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:d1824c7d08d8ddfc8cb10c847f696942e5aadbd16fd974dfde8bd2c3c08a9fa1", size = 1711161, upload-time = "2025-10-17T14:01:01.744Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/36/2f83e1ca730b1e0a8cf1c8ab9559834c5eec9f5da86e77ac71f0d16b521d/aiohttp-3.13.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:8f47d0ff5b3eb9c1278a2f56ea48fda667da8ebf28bd2cb378b7c453936ce003", size = 1731999, upload-time = "2025-10-17T14:01:04.626Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/ec/1f818cc368dfd4d5ab4e9efc8f2f6f283bfc31e1c06d3e848bcc862d4591/aiohttp-3.13.1-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:8a396b1da9b51ded79806ac3b57a598f84e0769eaa1ba300655d8b5e17b70c7b", size = 1548684, upload-time = "2025-10-17T14:01:06.828Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/ad/33d36efd16e4fefee91b09a22a3a0e1b830f65471c3567ac5a8041fac812/aiohttp-3.13.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d9c52a65f54796e066b5d674e33b53178014752d28bca555c479c2c25ffcec5b", size = 1756676, upload-time = "2025-10-17T14:01:09.517Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/c4/4a526d84e77d464437713ca909364988ed2e0cd0cdad2c06cb065ece9e08/aiohttp-3.13.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a89da72d18d6c95a653470b78d8ee5aa3c4b37212004c103403d0776cbea6ff0", size = 1715577, upload-time = "2025-10-17T14:01:11.958Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/21/e39638b7d9c7f1362c4113a91870f89287e60a7ea2d037e258b81e8b37d5/aiohttp-3.13.1-cp313-cp313-win32.whl", hash = "sha256:02e0258b7585ddf5d01c79c716ddd674386bfbf3041fbbfe7bdf9c7c32eb4a9b", size = 424468, upload-time = "2025-10-17T14:01:14.344Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/00/f3a92c592a845ebb2f47d102a67f35f0925cb854c5e7386f1a3a1fdff2ab/aiohttp-3.13.1-cp313-cp313-win_amd64.whl", hash = "sha256:ef56ffe60e8d97baac123272bde1ab889ee07d3419606fae823c80c2b86c403e", size = 450806, upload-time = "2025-10-17T14:01:16.437Z" },
+    { url = "https://files.pythonhosted.org/packages/97/be/0f6c41d2fd0aab0af133c509cabaf5b1d78eab882cb0ceb872e87ceeabf7/aiohttp-3.13.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:77f83b3dc5870a2ea79a0fcfdcc3fc398187ec1675ff61ec2ceccad27ecbd303", size = 733828, upload-time = "2025-10-17T14:01:18.58Z" },
+    { url = "https://files.pythonhosted.org/packages/75/14/24e2ac5efa76ae30e05813e0f50737005fd52da8ddffee474d4a5e7f38a6/aiohttp-3.13.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:9cafd2609ebb755e47323306c7666283fbba6cf82b5f19982ea627db907df23a", size = 489320, upload-time = "2025-10-17T14:01:20.644Z" },
+    { url = "https://files.pythonhosted.org/packages/da/5a/4cbe599358d05ea7db4869aff44707b57d13f01724d48123dc68b3288d5a/aiohttp-3.13.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9c489309a2ca548d5f11131cfb4092f61d67954f930bba7e413bcdbbb82d7fae", size = 489899, upload-time = "2025-10-17T14:01:22.638Z" },
+    { url = "https://files.pythonhosted.org/packages/67/96/3aec9d9cfc723273d4386328a1e2562cf23629d2f57d137047c49adb2afb/aiohttp-3.13.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79ac15fe5fdbf3c186aa74b656cd436d9a1e492ba036db8901c75717055a5b1c", size = 1716556, upload-time = "2025-10-17T14:01:25.406Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/99/39a3d250595b5c8172843831221fa5662884f63f8005b00b4034f2a7a836/aiohttp-3.13.1-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:095414be94fce3bc080684b4cd50fb70d439bc4662b2a1984f45f3bf9ede08aa", size = 1665814, upload-time = "2025-10-17T14:01:27.683Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/96/8319e7060a85db14a9c178bc7b3cf17fad458db32ba6d2910de3ca71452d/aiohttp-3.13.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c68172e1a2dca65fa1272c85ca72e802d78b67812b22827df01017a15c5089fa", size = 1755767, upload-time = "2025-10-17T14:01:29.914Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/c6/0a2b3d886b40aa740fa2294cd34ed46d2e8108696748492be722e23082a7/aiohttp-3.13.1-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3751f9212bcd119944d4ea9de6a3f0fee288c177b8ca55442a2cdff0c8201eb3", size = 1836591, upload-time = "2025-10-17T14:01:32.28Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/34/8ab5904b3331c91a58507234a1e2f662f837e193741609ee5832eb436251/aiohttp-3.13.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8619dca57d98a8353abdc7a1eeb415548952b39d6676def70d9ce76d41a046a9", size = 1714915, upload-time = "2025-10-17T14:01:35.138Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/d3/d36077ca5f447649112189074ac6c192a666bf68165b693e48c23b0d008c/aiohttp-3.13.1-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:97795a0cb0a5f8a843759620e9cbd8889f8079551f5dcf1ccd99ed2f056d9632", size = 1546579, upload-time = "2025-10-17T14:01:38.237Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/14/dbc426a1bb1305c4fc78ce69323498c9e7c699983366ef676aa5d3f949fa/aiohttp-3.13.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1060e058da8f9f28a7026cdfca9fc886e45e551a658f6a5c631188f72a3736d2", size = 1680633, upload-time = "2025-10-17T14:01:40.902Z" },
+    { url = "https://files.pythonhosted.org/packages/29/83/1e68e519aff9f3ef6d4acb6cdda7b5f592ef5c67c8f095dc0d8e06ce1c3e/aiohttp-3.13.1-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:f48a2c26333659101ef214907d29a76fe22ad7e912aa1e40aeffdff5e8180977", size = 1678675, upload-time = "2025-10-17T14:01:43.779Z" },
+    { url = "https://files.pythonhosted.org/packages/38/b9/7f3e32a81c08b6d29ea15060c377e1f038ad96cd9923a85f30e817afff22/aiohttp-3.13.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:f1dfad638b9c91ff225162b2824db0e99ae2d1abe0dc7272b5919701f0a1e685", size = 1726829, upload-time = "2025-10-17T14:01:46.546Z" },
+    { url = "https://files.pythonhosted.org/packages/23/ce/610b1f77525a0a46639aea91377b12348e9f9412cc5ddcb17502aa4681c7/aiohttp-3.13.1-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:8fa09ab6dd567cb105db4e8ac4d60f377a7a94f67cf669cac79982f626360f32", size = 1542985, upload-time = "2025-10-17T14:01:49.082Z" },
+    { url = "https://files.pythonhosted.org/packages/53/39/3ac8dfdad5de38c401846fa071fcd24cb3b88ccfb024854df6cbd9b4a07e/aiohttp-3.13.1-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:4159fae827f9b5f655538a4f99b7cbc3a2187e5ca2eee82f876ef1da802ccfa9", size = 1741556, upload-time = "2025-10-17T14:01:51.846Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/48/b1948b74fea7930b0f29595d1956842324336de200593d49a51a40607fdc/aiohttp-3.13.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ad671118c19e9cfafe81a7a05c294449fe0ebb0d0c6d5bb445cd2190023f5cef", size = 1696175, upload-time = "2025-10-17T14:01:54.232Z" },
+    { url = "https://files.pythonhosted.org/packages/96/26/063bba38e4b27b640f56cc89fe83cc3546a7ae162c2e30ca345f0ccdc3d1/aiohttp-3.13.1-cp314-cp314-win32.whl", hash = "sha256:c5c970c148c48cf6acb65224ca3c87a47f74436362dde75c27bc44155ccf7dfc", size = 430254, upload-time = "2025-10-17T14:01:56.451Z" },
+    { url = "https://files.pythonhosted.org/packages/88/aa/25fd764384dc4eab714023112d3548a8dd69a058840d61d816ea736097a2/aiohttp-3.13.1-cp314-cp314-win_amd64.whl", hash = "sha256:748a00167b7a88385756fa615417d24081cba7e58c8727d2e28817068b97c18c", size = 456256, upload-time = "2025-10-17T14:01:58.752Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/9f/9ba6059de4bad25c71cd88e3da53f93e9618ea369cf875c9f924b1c167e2/aiohttp-3.13.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:390b73e99d7a1f0f658b3f626ba345b76382f3edc65f49d6385e326e777ed00e", size = 765956, upload-time = "2025-10-17T14:02:01.515Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/30/b86da68b494447d3060f45c7ebb461347535dab4af9162a9267d9d86ca31/aiohttp-3.13.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:27e83abb330e687e019173d8fc1fd6a1cf471769624cf89b1bb49131198a810a", size = 503206, upload-time = "2025-10-17T14:02:03.818Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/21/d27a506552843ff9eeb9fcc2d45f943b09eefdfdf205aab044f4f1f39f6a/aiohttp-3.13.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2b20eed07131adbf3e873e009c2869b16a579b236e9d4b2f211bf174d8bef44a", size = 507719, upload-time = "2025-10-17T14:02:05.947Z" },
+    { url = "https://files.pythonhosted.org/packages/58/23/4042230ec7e4edc7ba43d0342b5a3d2fe0222ca046933c4251a35aaf17f5/aiohttp-3.13.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:58fee9ef8477fd69e823b92cfd1f590ee388521b5ff8f97f3497e62ee0656212", size = 1862758, upload-time = "2025-10-17T14:02:08.469Z" },
+    { url = "https://files.pythonhosted.org/packages/df/88/525c45bea7cbb9f65df42cadb4ff69f6a0dbf95931b0ff7d1fdc40a1cb5f/aiohttp-3.13.1-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:1f62608fcb7b3d034d5e9496bea52d94064b7b62b06edba82cd38191336bbeda", size = 1717790, upload-time = "2025-10-17T14:02:11.37Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/80/21e9b5eb77df352a5788713f37359b570a793f0473f3a72db2e46df379b9/aiohttp-3.13.1-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:fdc4d81c3dfc999437f23e36d197e8b557a3f779625cd13efe563a9cfc2ce712", size = 1842088, upload-time = "2025-10-17T14:02:13.872Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/bf/d1738f6d63fe8b2a0ad49533911b3347f4953cd001bf3223cb7b61f18dff/aiohttp-3.13.1-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:601d7ec812f746fd80ff8af38eeb3f196e1bab4a4d39816ccbc94c222d23f1d0", size = 1934292, upload-time = "2025-10-17T14:02:16.624Z" },
+    { url = "https://files.pythonhosted.org/packages/04/e6/26cab509b42610ca49573f2fc2867810f72bd6a2070182256c31b14f2e98/aiohttp-3.13.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:47c3f21c469b840d9609089435c0d9918ae89f41289bf7cc4afe5ff7af5458db", size = 1791328, upload-time = "2025-10-17T14:02:19.051Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/6d/baf7b462852475c9d045bee8418d9cdf280efb687752b553e82d0c58bcc2/aiohttp-3.13.1-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d6c6cdc0750db88520332d4aaa352221732b0cafe89fd0e42feec7cb1b5dc236", size = 1622663, upload-time = "2025-10-17T14:02:21.397Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/48/396a97318af9b5f4ca8b3dc14a67976f71c6400a9609c622f96da341453f/aiohttp-3.13.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:58a12299eeb1fca2414ee2bc345ac69b0f765c20b82c3ab2a75d91310d95a9f6", size = 1787791, upload-time = "2025-10-17T14:02:24.212Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/e2/6925f6784134ce3ff3ce1a8502ab366432a3b5605387618c1a939ce778d9/aiohttp-3.13.1-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:0989cbfc195a4de1bb48f08454ef1cb47424b937e53ed069d08404b9d3c7aea1", size = 1775459, upload-time = "2025-10-17T14:02:26.971Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/e3/b372047ba739fc39f199b99290c4cc5578ce5fd125f69168c967dac44021/aiohttp-3.13.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:feb5ee664300e2435e0d1bc3443a98925013dfaf2cae9699c1f3606b88544898", size = 1789250, upload-time = "2025-10-17T14:02:29.686Z" },
+    { url = "https://files.pythonhosted.org/packages/02/8c/9f48b93d7d57fc9ef2ad4adace62e4663ea1ce1753806c4872fb36b54c39/aiohttp-3.13.1-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:58a6f8702da0c3606fb5cf2e669cce0ca681d072fe830968673bb4c69eb89e88", size = 1616139, upload-time = "2025-10-17T14:02:32.151Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/c6/c64e39d61aaa33d7de1be5206c0af3ead4b369bf975dac9fdf907a4291c1/aiohttp-3.13.1-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:a417ceb433b9d280e2368ffea22d4bc6e3e0d894c4bc7768915124d57d0964b6", size = 1815829, upload-time = "2025-10-17T14:02:34.635Z" },
+    { url = "https://files.pythonhosted.org/packages/22/75/e19e93965ea675f1151753b409af97a14f1d888588a555e53af1e62b83eb/aiohttp-3.13.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8ac8854f7b0466c5d6a9ea49249b3f6176013859ac8f4bb2522ad8ed6b94ded2", size = 1760923, upload-time = "2025-10-17T14:02:37.364Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/a4/06ed38f1dabd98ea136fd116cba1d02c9b51af5a37d513b6850a9a567d86/aiohttp-3.13.1-cp314-cp314t-win32.whl", hash = "sha256:be697a5aeff42179ed13b332a411e674994bcd406c81642d014ace90bf4bb968", size = 463318, upload-time = "2025-10-17T14:02:39.924Z" },
+    { url = "https://files.pythonhosted.org/packages/04/0f/27e4fdde899e1e90e35eeff56b54ed63826435ad6cdb06b09ed312d1b3fa/aiohttp-3.13.1-cp314-cp314t-win_amd64.whl", hash = "sha256:f1d6aa90546a4e8f20c3500cb68ab14679cd91f927fa52970035fd3207dfb3da", size = 496721, upload-time = "2025-10-17T14:02:42.199Z" },
+]
+
+[[package]]
+name = "aiosignal"
+version = "1.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "frozenlist" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/61/62/06741b579156360248d1ec624842ad0edf697050bbaf7c3e46394e106ad1/aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7", size = 25007, upload-time = "2025-07-03T22:54:43.528Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" },
+]
+
+[[package]]
+name = "attrs"
+version = "25.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6b/5c/685e6633917e101e5dcb62b9dd76946cbb57c26e133bae9e0cd36033c0a9/attrs-25.4.0.tar.gz", hash = "sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11", size = 934251, upload-time = "2025-10-06T13:54:44.725Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3a/2a/7cc015f5b9f5db42b7d48157e23356022889fc354a2813c15934b7cb5c0e/attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373", size = 67615, upload-time = "2025-10-06T13:54:43.17Z" },
+]
+
+[[package]]
+name = "backports-tarfile"
+version = "1.2.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/86/72/cd9b395f25e290e633655a100af28cb253e4393396264a98bd5f5951d50f/backports_tarfile-1.2.0.tar.gz", hash = "sha256:d75e02c268746e1b8144c278978b6e98e85de6ad16f8e4b0844a154557eca991", size = 86406, upload-time = "2024-05-28T17:01:54.731Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b9/fa/123043af240e49752f1c4bd24da5053b6bd00cad78c2be53c0d1e8b975bc/backports.tarfile-1.2.0-py3-none-any.whl", hash = "sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34", size = 30181, upload-time = "2024-05-28T17:01:53.112Z" },
+]
+
+[[package]]
+name = "beautifulsoup4"
+version = "4.14.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "soupsieve" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/77/e9/df2358efd7659577435e2177bfa69cba6c33216681af51a707193dec162a/beautifulsoup4-4.14.2.tar.gz", hash = "sha256:2a98ab9f944a11acee9cc848508ec28d9228abfd522ef0fad6a02a72e0ded69e", size = 625822, upload-time = "2025-09-29T10:05:42.613Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/94/fe/3aed5d0be4d404d12d36ab97e2f1791424d9ca39c2f754a6285d59a3b01d/beautifulsoup4-4.14.2-py3-none-any.whl", hash = "sha256:5ef6fa3a8cbece8488d66985560f97ed091e22bbc4e9c2338508a9d5de6d4515", size = 106392, upload-time = "2025-09-29T10:05:43.771Z" },
+]
+
+[[package]]
+name = "certifi"
+version = "2025.10.5"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/4c/5b/b6ce21586237c77ce67d01dc5507039d444b630dd76611bbca2d8e5dcd91/certifi-2025.10.5.tar.gz", hash = "sha256:47c09d31ccf2acf0be3f701ea53595ee7e0b8fa08801c6624be771df09ae7b43", size = 164519, upload-time = "2025-10-05T04:12:15.808Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e4/37/af0d2ef3967ac0d6113837b44a4f0bfe1328c2b9763bd5b1744520e5cfed/certifi-2025.10.5-py3-none-any.whl", hash = "sha256:0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de", size = 163286, upload-time = "2025-10-05T04:12:14.03Z" },
+]
+
+[[package]]
+name = "cffi"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pycparser", marker = "implementation_name != 'PyPy'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b1/b7/1200d354378ef52ec227395d95c2576330fd22a869f7a70e88e1447eb234/cffi-2.0.0-cp311-cp311-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:baf5215e0ab74c16e2dd324e8ec067ef59e41125d3eade2b863d294fd5035c92", size = 209613, upload-time = "2025-09-08T23:22:29.475Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/56/6033f5e86e8cc9bb629f0077ba71679508bdf54a9a5e112a3c0b91870332/cffi-2.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:730cacb21e1bdff3ce90babf007d0a0917cc3e6492f336c2f0134101e0944f93", size = 216476, upload-time = "2025-09-08T23:22:31.063Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/7f/55fecd70f7ece178db2f26128ec41430d8720f2d12ca97bf8f0a628207d5/cffi-2.0.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:6824f87845e3396029f3820c206e459ccc91760e8fa24422f8b0c3d1731cbec5", size = 203374, upload-time = "2025-09-08T23:22:32.507Z" },
+    { url = "https://files.pythonhosted.org/packages/84/ef/a7b77c8bdc0f77adc3b46888f1ad54be8f3b7821697a7b89126e829e676a/cffi-2.0.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:9de40a7b0323d889cf8d23d1ef214f565ab154443c42737dfe52ff82cf857664", size = 202597, upload-time = "2025-09-08T23:22:34.132Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/91/500d892b2bf36529a75b77958edfcd5ad8e2ce4064ce2ecfeab2125d72d1/cffi-2.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8941aaadaf67246224cee8c3803777eed332a19d909b47e29c9842ef1e79ac26", size = 215574, upload-time = "2025-09-08T23:22:35.443Z" },
+    { url = "https://files.pythonhosted.org/packages/44/64/58f6255b62b101093d5df22dcb752596066c7e89dd725e0afaed242a61be/cffi-2.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a05d0c237b3349096d3981b727493e22147f934b20f6f125a3eba8f994bec4a9", size = 218971, upload-time = "2025-09-08T23:22:36.805Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/49/fa72cebe2fd8a55fbe14956f9970fe8eb1ac59e5df042f603ef7c8ba0adc/cffi-2.0.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:94698a9c5f91f9d138526b48fe26a199609544591f859c870d477351dc7b2414", size = 211972, upload-time = "2025-09-08T23:22:38.436Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/28/dd0967a76aab36731b6ebfe64dec4e981aff7e0608f60c2d46b46982607d/cffi-2.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:5fed36fccc0612a53f1d4d9a816b50a36702c28a2aa880cb8a122b3466638743", size = 217078, upload-time = "2025-09-08T23:22:39.776Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/df/a4f0fbd47331ceeba3d37c2e51e9dfc9722498becbeec2bd8bc856c9538a/cffi-2.0.0-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe", size = 212529, upload-time = "2025-09-08T23:22:47.349Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/72/12b5f8d3865bf0f87cf1404d8c374e7487dcf097a1c91c436e72e6badd83/cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062", size = 220097, upload-time = "2025-09-08T23:22:48.677Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/95/7a135d52a50dfa7c882ab0ac17e8dc11cec9d55d2c18dda414c051c5e69e/cffi-2.0.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e", size = 207983, upload-time = "2025-09-08T23:22:50.06Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/c8/15cb9ada8895957ea171c62dc78ff3e99159ee7adb13c0123c001a2546c1/cffi-2.0.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037", size = 206519, upload-time = "2025-09-08T23:22:51.364Z" },
+    { url = "https://files.pythonhosted.org/packages/78/2d/7fa73dfa841b5ac06c7b8855cfc18622132e365f5b81d02230333ff26e9e/cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba", size = 219572, upload-time = "2025-09-08T23:22:52.902Z" },
+    { url = "https://files.pythonhosted.org/packages/07/e0/267e57e387b4ca276b90f0434ff88b2c2241ad72b16d31836adddfd6031b/cffi-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94", size = 222963, upload-time = "2025-09-08T23:22:54.518Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/75/1f2747525e06f53efbd878f4d03bac5b859cbc11c633d0fb81432d98a795/cffi-2.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187", size = 221361, upload-time = "2025-09-08T23:22:55.867Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/1e/d22cc63332bd59b06481ceaac49d6c507598642e2230f201649058a7e704/cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b", size = 212446, upload-time = "2025-09-08T23:23:03.472Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/f5/a2c23eb03b61a0b8747f211eb716446c826ad66818ddc7810cc2cc19b3f2/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", size = 220101, upload-time = "2025-09-08T23:23:04.792Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/7f/e6647792fc5850d634695bc0e6ab4111ae88e89981d35ac269956605feba/cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2", size = 207948, upload-time = "2025-09-08T23:23:06.127Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/1e/a5a1bd6f1fb30f22573f76533de12a00bf274abcdc55c8edab639078abb6/cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3", size = 206422, upload-time = "2025-09-08T23:23:07.753Z" },
+    { url = "https://files.pythonhosted.org/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499, upload-time = "2025-09-08T23:23:09.648Z" },
+    { url = "https://files.pythonhosted.org/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928, upload-time = "2025-09-08T23:23:10.928Z" },
+    { url = "https://files.pythonhosted.org/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302, upload-time = "2025-09-08T23:23:12.42Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/43/0e822876f87ea8a4ef95442c3d766a06a51fc5298823f884ef87aaad168c/cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b", size = 220049, upload-time = "2025-09-08T23:23:20.853Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/89/76799151d9c2d2d1ead63c2429da9ea9d7aac304603de0c6e8764e6e8e70/cffi-2.0.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c", size = 207793, upload-time = "2025-09-08T23:23:22.08Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/dd/3465b14bb9e24ee24cb88c9e3730f6de63111fffe513492bf8c808a3547e/cffi-2.0.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef", size = 206300, upload-time = "2025-09-08T23:23:23.314Z" },
+    { url = "https://files.pythonhosted.org/packages/47/d9/d83e293854571c877a92da46fdec39158f8d7e68da75bf73581225d28e90/cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775", size = 219244, upload-time = "2025-09-08T23:23:24.541Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/0f/1f177e3683aead2bb00f7679a16451d302c436b5cbf2505f0ea8146ef59e/cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205", size = 222828, upload-time = "2025-09-08T23:23:26.143Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/0f/cafacebd4b040e3119dcb32fed8bdef8dfe94da653155f9d0b9dc660166e/cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1", size = 220926, upload-time = "2025-09-08T23:23:27.873Z" },
+    { url = "https://files.pythonhosted.org/packages/be/b4/c56878d0d1755cf9caa54ba71e5d049479c52f9e4afc230f06822162ab2f/cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c", size = 221593, upload-time = "2025-09-08T23:23:31.91Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/0d/eb704606dfe8033e7128df5e90fee946bbcb64a04fcdaa97321309004000/cffi-2.0.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8", size = 209354, upload-time = "2025-09-08T23:23:33.214Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/19/3c435d727b368ca475fb8742ab97c9cb13a0de600ce86f62eab7fa3eea60/cffi-2.0.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc", size = 208480, upload-time = "2025-09-08T23:23:34.495Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/44/681604464ed9541673e486521497406fadcc15b5217c3e326b061696899a/cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592", size = 221584, upload-time = "2025-09-08T23:23:36.096Z" },
+    { url = "https://files.pythonhosted.org/packages/25/8e/342a504ff018a2825d395d44d63a767dd8ebc927ebda557fecdaca3ac33a/cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512", size = 224443, upload-time = "2025-09-08T23:23:37.328Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/5e/b666bacbbc60fbf415ba9988324a132c9a7a0448a9a8f125074671c0f2c3/cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4", size = 223437, upload-time = "2025-09-08T23:23:38.945Z" },
+]
+
+[[package]]
+name = "cryptography"
+version = "46.0.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cffi", marker = "platform_python_implementation != 'PyPy'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/9f/33/c00162f49c0e2fe8064a62cb92b93e50c74a72bc370ab92f86112b33ff62/cryptography-46.0.3.tar.gz", hash = "sha256:a8b17438104fed022ce745b362294d9ce35b4c2e45c1d958ad4a4b019285f4a1", size = 749258, upload-time = "2025-10-15T23:18:31.74Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1c/67/38769ca6b65f07461eb200e85fc1639b438bdc667be02cf7f2cd6a64601c/cryptography-46.0.3-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:09859af8466b69bc3c27bdf4f5d84a665e0f7ab5088412e9e2ec49758eca5cbc", size = 4296667, upload-time = "2025-10-15T23:16:54.369Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/49/498c86566a1d80e978b42f0d702795f69887005548c041636df6ae1ca64c/cryptography-46.0.3-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:01ca9ff2885f3acc98c29f1860552e37f6d7c7d013d7334ff2a9de43a449315d", size = 4450807, upload-time = "2025-10-15T23:16:56.414Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/0a/863a3604112174c8624a2ac3c038662d9e59970c7f926acdcfaed8d61142/cryptography-46.0.3-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:6eae65d4c3d33da080cff9c4ab1f711b15c1d9760809dad6ea763f3812d254cb", size = 4299615, upload-time = "2025-10-15T23:16:58.442Z" },
+    { url = "https://files.pythonhosted.org/packages/64/02/b73a533f6b64a69f3cd3872acb6ebc12aef924d8d103133bb3ea750dc703/cryptography-46.0.3-cp311-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5bf0ed4490068a2e72ac03d786693adeb909981cc596425d09032d372bcc849", size = 4016800, upload-time = "2025-10-15T23:17:00.378Z" },
+    { url = "https://files.pythonhosted.org/packages/25/d5/16e41afbfa450cde85a3b7ec599bebefaef16b5c6ba4ec49a3532336ed72/cryptography-46.0.3-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:5ecfccd2329e37e9b7112a888e76d9feca2347f12f37918facbb893d7bb88ee8", size = 4984707, upload-time = "2025-10-15T23:17:01.98Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/56/e7e69b427c3878352c2fb9b450bd0e19ed552753491d39d7d0a2f5226d41/cryptography-46.0.3-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a2c0cd47381a3229c403062f764160d57d4d175e022c1df84e168c6251a22eec", size = 4482541, upload-time = "2025-10-15T23:17:04.078Z" },
+    { url = "https://files.pythonhosted.org/packages/78/f6/50736d40d97e8483172f1bb6e698895b92a223dba513b0ca6f06b2365339/cryptography-46.0.3-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:549e234ff32571b1f4076ac269fcce7a808d3bf98b76c8dd560e42dbc66d7d91", size = 4299464, upload-time = "2025-10-15T23:17:05.483Z" },
+    { url = "https://files.pythonhosted.org/packages/00/de/d8e26b1a855f19d9994a19c702fa2e93b0456beccbcfe437eda00e0701f2/cryptography-46.0.3-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:c0a7bb1a68a5d3471880e264621346c48665b3bf1c3759d682fc0864c540bd9e", size = 4950838, upload-time = "2025-10-15T23:17:07.425Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/29/798fc4ec461a1c9e9f735f2fc58741b0daae30688f41b2497dcbc9ed1355/cryptography-46.0.3-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:10b01676fc208c3e6feeb25a8b83d81767e8059e1fe86e1dc62d10a3018fa926", size = 4481596, upload-time = "2025-10-15T23:17:09.343Z" },
+    { url = "https://files.pythonhosted.org/packages/15/8d/03cd48b20a573adfff7652b76271078e3045b9f49387920e7f1f631d125e/cryptography-46.0.3-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0abf1ffd6e57c67e92af68330d05760b7b7efb243aab8377e583284dbab72c71", size = 4426782, upload-time = "2025-10-15T23:17:11.22Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/b1/ebacbfe53317d55cf33165bda24c86523497a6881f339f9aae5c2e13e57b/cryptography-46.0.3-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a04bee9ab6a4da801eb9b51f1b708a1b5b5c9eb48c03f74198464c66f0d344ac", size = 4698381, upload-time = "2025-10-15T23:17:12.829Z" },
+    { url = "https://files.pythonhosted.org/packages/73/dc/9aa866fbdbb95b02e7f9d086f1fccfeebf8953509b87e3f28fff927ff8a0/cryptography-46.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c8daeb2d2174beb4575b77482320303f3d39b8e81153da4f0fb08eb5fe86a6c5", size = 4288728, upload-time = "2025-10-15T23:17:21.527Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/fd/bc1daf8230eaa075184cbbf5f8cd00ba9db4fd32d63fb83da4671b72ed8a/cryptography-46.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:39b6755623145ad5eff1dab323f4eae2a32a77a7abef2c5089a04a3d04366715", size = 4435078, upload-time = "2025-10-15T23:17:23.042Z" },
+    { url = "https://files.pythonhosted.org/packages/82/98/d3bd5407ce4c60017f8ff9e63ffee4200ab3e23fe05b765cab805a7db008/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:db391fa7c66df6762ee3f00c95a89e6d428f4d60e7abc8328f4fe155b5ac6e54", size = 4293460, upload-time = "2025-10-15T23:17:24.885Z" },
+    { url = "https://files.pythonhosted.org/packages/26/e9/e23e7900983c2b8af7a08098db406cf989d7f09caea7897e347598d4cd5b/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:78a97cf6a8839a48c49271cdcbd5cf37ca2c1d6b7fdd86cc864f302b5e9bf459", size = 3995237, upload-time = "2025-10-15T23:17:26.449Z" },
+    { url = "https://files.pythonhosted.org/packages/91/15/af68c509d4a138cfe299d0d7ddb14afba15233223ebd933b4bbdbc7155d3/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:dfb781ff7eaa91a6f7fd41776ec37c5853c795d3b358d4896fdbb5df168af422", size = 4967344, upload-time = "2025-10-15T23:17:28.06Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/e3/8643d077c53868b681af077edf6b3cb58288b5423610f21c62aadcbe99f4/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:6f61efb26e76c45c4a227835ddeae96d83624fb0d29eb5df5b96e14ed1a0afb7", size = 4466564, upload-time = "2025-10-15T23:17:29.665Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/43/c1e8726fa59c236ff477ff2b5dc071e54b21e5a1e51aa2cee1676f1c986f/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:23b1a8f26e43f47ceb6d6a43115f33a5a37d57df4ea0ca295b780ae8546e8044", size = 4292415, upload-time = "2025-10-15T23:17:31.686Z" },
+    { url = "https://files.pythonhosted.org/packages/42/f9/2f8fefdb1aee8a8e3256a0568cffc4e6d517b256a2fe97a029b3f1b9fe7e/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:b419ae593c86b87014b9be7396b385491ad7f320bde96826d0dd174459e54665", size = 4931457, upload-time = "2025-10-15T23:17:33.478Z" },
+    { url = "https://files.pythonhosted.org/packages/79/30/9b54127a9a778ccd6d27c3da7563e9f2d341826075ceab89ae3b41bf5be2/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:50fc3343ac490c6b08c0cf0d704e881d0d660be923fd3076db3e932007e726e3", size = 4466074, upload-time = "2025-10-15T23:17:35.158Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/68/b4f4a10928e26c941b1b6a179143af9f4d27d88fe84a6a3c53592d2e76bf/cryptography-46.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:22d7e97932f511d6b0b04f2bfd818d73dcd5928db509460aaf48384778eb6d20", size = 4420569, upload-time = "2025-10-15T23:17:37.188Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/49/3746dab4c0d1979888f125226357d3262a6dd40e114ac29e3d2abdf1ec55/cryptography-46.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:d55f3dffadd674514ad19451161118fd010988540cee43d8bc20675e775925de", size = 4681941, upload-time = "2025-10-15T23:17:39.236Z" },
+    { url = "https://files.pythonhosted.org/packages/27/32/b68d27471372737054cbd34c84981f9edbc24fe67ca225d389799614e27f/cryptography-46.0.3-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4b7387121ac7d15e550f5cb4a43aef2559ed759c35df7336c402bb8275ac9683", size = 4294089, upload-time = "2025-10-15T23:17:48.269Z" },
+    { url = "https://files.pythonhosted.org/packages/26/42/fa8389d4478368743e24e61eea78846a0006caffaf72ea24a15159215a14/cryptography-46.0.3-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:15ab9b093e8f09daab0f2159bb7e47532596075139dd74365da52ecc9cb46c5d", size = 4440029, upload-time = "2025-10-15T23:17:49.837Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/eb/f483db0ec5ac040824f269e93dd2bd8a21ecd1027e77ad7bdf6914f2fd80/cryptography-46.0.3-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:46acf53b40ea38f9c6c229599a4a13f0d46a6c3fa9ef19fc1a124d62e338dfa0", size = 4297222, upload-time = "2025-10-15T23:17:51.357Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/cf/da9502c4e1912cb1da3807ea3618a6829bee8207456fbbeebc361ec38ba3/cryptography-46.0.3-cp38-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:10ca84c4668d066a9878890047f03546f3ae0a6b8b39b697457b7757aaf18dbc", size = 4012280, upload-time = "2025-10-15T23:17:52.964Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/8f/9adb86b93330e0df8b3dcf03eae67c33ba89958fc2e03862ef1ac2b42465/cryptography-46.0.3-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:36e627112085bb3b81b19fed209c05ce2a52ee8b15d161b7c643a7d5a88491f3", size = 4978958, upload-time = "2025-10-15T23:17:54.965Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/a0/5fa77988289c34bdb9f913f5606ecc9ada1adb5ae870bd0d1054a7021cc4/cryptography-46.0.3-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:1000713389b75c449a6e979ffc7dcc8ac90b437048766cef052d4d30b8220971", size = 4473714, upload-time = "2025-10-15T23:17:56.754Z" },
+    { url = "https://files.pythonhosted.org/packages/14/e5/fc82d72a58d41c393697aa18c9abe5ae1214ff6f2a5c18ac470f92777895/cryptography-46.0.3-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:b02cf04496f6576afffef5ddd04a0cb7d49cf6be16a9059d793a30b035f6b6ac", size = 4296970, upload-time = "2025-10-15T23:17:58.588Z" },
+    { url = "https://files.pythonhosted.org/packages/78/06/5663ed35438d0b09056973994f1aec467492b33bd31da36e468b01ec1097/cryptography-46.0.3-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:71e842ec9bc7abf543b47cf86b9a743baa95f4677d22baa4c7d5c69e49e9bc04", size = 4940236, upload-time = "2025-10-15T23:18:00.897Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/59/873633f3f2dcd8a053b8dd1d38f783043b5fce589c0f6988bf55ef57e43e/cryptography-46.0.3-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:402b58fc32614f00980b66d6e56a5b4118e6cb362ae8f3fda141ba4689bd4506", size = 4472642, upload-time = "2025-10-15T23:18:02.749Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/39/8e71f3930e40f6877737d6f69248cf74d4e34b886a3967d32f919cc50d3b/cryptography-46.0.3-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ef639cb3372f69ec44915fafcd6698b6cc78fbe0c2ea41be867f6ed612811963", size = 4423126, upload-time = "2025-10-15T23:18:04.85Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/c7/f65027c2810e14c3e7268353b1681932b87e5a48e65505d8cc17c99e36ae/cryptography-46.0.3-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3b51b8ca4f1c6453d8829e1eb7299499ca7f313900dd4d89a24b8b87c0a780d4", size = 4686573, upload-time = "2025-10-15T23:18:06.908Z" },
+    { url = "https://files.pythonhosted.org/packages/da/38/f59940ec4ee91e93d3311f7532671a5cef5570eb04a144bf203b58552d11/cryptography-46.0.3-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:191bb60a7be5e6f54e30ba16fdfae78ad3a342a0599eb4193ba88e3f3d6e185b", size = 4243992, upload-time = "2025-10-15T23:18:18.695Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/0c/35b3d92ddebfdfda76bb485738306545817253d0a3ded0bfe80ef8e67aa5/cryptography-46.0.3-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c70cc23f12726be8f8bc72e41d5065d77e4515efae3690326764ea1b07845cfb", size = 4409944, upload-time = "2025-10-15T23:18:20.597Z" },
+    { url = "https://files.pythonhosted.org/packages/99/55/181022996c4063fc0e7666a47049a1ca705abb9c8a13830f074edb347495/cryptography-46.0.3-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:9394673a9f4de09e28b5356e7fff97d778f8abad85c9d5ac4a4b7e25a0de7717", size = 4242957, upload-time = "2025-10-15T23:18:22.18Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/af/72cd6ef29f9c5f731251acadaeb821559fe25f10852f44a63374c9ca08c1/cryptography-46.0.3-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:94cd0549accc38d1494e1f8de71eca837d0509d0d44bf11d158524b0e12cebf9", size = 4409447, upload-time = "2025-10-15T23:18:24.209Z" },
+]
+
+[[package]]
+name = "frozenlist"
+version = "1.8.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/2d/f5/c831fac6cc817d26fd54c7eaccd04ef7e0288806943f7cc5bbf69f3ac1f0/frozenlist-1.8.0.tar.gz", hash = "sha256:3ede829ed8d842f6cd48fc7081d7a41001a56f1f38603f9d49bf3020d59a31ad", size = 45875, upload-time = "2025-10-06T05:38:17.865Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bc/03/077f869d540370db12165c0aa51640a873fb661d8b315d1d4d67b284d7ac/frozenlist-1.8.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:09474e9831bc2b2199fad6da3c14c7b0fbdd377cce9d3d77131be28906cb7d84", size = 86912, upload-time = "2025-10-06T05:35:45.98Z" },
+    { url = "https://files.pythonhosted.org/packages/df/b5/7610b6bd13e4ae77b96ba85abea1c8cb249683217ef09ac9e0ae93f25a91/frozenlist-1.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:17c883ab0ab67200b5f964d2b9ed6b00971917d5d8a92df149dc2c9779208ee9", size = 50046, upload-time = "2025-10-06T05:35:47.009Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/ef/0e8f1fe32f8a53dd26bdd1f9347efe0778b0fddf62789ea683f4cc7d787d/frozenlist-1.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fa47e444b8ba08fffd1c18e8cdb9a75db1b6a27f17507522834ad13ed5922b93", size = 50119, upload-time = "2025-10-06T05:35:48.38Z" },
+    { url = "https://files.pythonhosted.org/packages/11/b1/71a477adc7c36e5fb628245dfbdea2166feae310757dea848d02bd0689fd/frozenlist-1.8.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2552f44204b744fba866e573be4c1f9048d6a324dfe14475103fd51613eb1d1f", size = 231067, upload-time = "2025-10-06T05:35:49.97Z" },
+    { url = "https://files.pythonhosted.org/packages/45/7e/afe40eca3a2dc19b9904c0f5d7edfe82b5304cb831391edec0ac04af94c2/frozenlist-1.8.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:957e7c38f250991e48a9a73e6423db1bb9dd14e722a10f6b8bb8e16a0f55f695", size = 233160, upload-time = "2025-10-06T05:35:51.729Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/aa/7416eac95603ce428679d273255ffc7c998d4132cfae200103f164b108aa/frozenlist-1.8.0-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:8585e3bb2cdea02fc88ffa245069c36555557ad3609e83be0ec71f54fd4abb52", size = 228544, upload-time = "2025-10-06T05:35:53.246Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/3d/2a2d1f683d55ac7e3875e4263d28410063e738384d3adc294f5ff3d7105e/frozenlist-1.8.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:edee74874ce20a373d62dc28b0b18b93f645633c2943fd90ee9d898550770581", size = 243797, upload-time = "2025-10-06T05:35:54.497Z" },
+    { url = "https://files.pythonhosted.org/packages/78/1e/2d5565b589e580c296d3bb54da08d206e797d941a83a6fdea42af23be79c/frozenlist-1.8.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c9a63152fe95756b85f31186bddf42e4c02c6321207fd6601a1c89ebac4fe567", size = 247923, upload-time = "2025-10-06T05:35:55.861Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/c3/65872fcf1d326a7f101ad4d86285c403c87be7d832b7470b77f6d2ed5ddc/frozenlist-1.8.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b6db2185db9be0a04fecf2f241c70b63b1a242e2805be291855078f2b404dd6b", size = 230886, upload-time = "2025-10-06T05:35:57.399Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/76/ac9ced601d62f6956f03cc794f9e04c81719509f85255abf96e2510f4265/frozenlist-1.8.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:f4be2e3d8bc8aabd566f8d5b8ba7ecc09249d74ba3c9ed52e54dc23a293f0b92", size = 245731, upload-time = "2025-10-06T05:35:58.563Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/49/ecccb5f2598daf0b4a1415497eba4c33c1e8ce07495eb07d2860c731b8d5/frozenlist-1.8.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:c8d1634419f39ea6f5c427ea2f90ca85126b54b50837f31497f3bf38266e853d", size = 241544, upload-time = "2025-10-06T05:35:59.719Z" },
+    { url = "https://files.pythonhosted.org/packages/53/4b/ddf24113323c0bbcc54cb38c8b8916f1da7165e07b8e24a717b4a12cbf10/frozenlist-1.8.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:1a7fa382a4a223773ed64242dbe1c9c326ec09457e6b8428efb4118c685c3dfd", size = 241806, upload-time = "2025-10-06T05:36:00.959Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/fb/9b9a084d73c67175484ba2789a59f8eebebd0827d186a8102005ce41e1ba/frozenlist-1.8.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:11847b53d722050808926e785df837353bd4d75f1d494377e59b23594d834967", size = 229382, upload-time = "2025-10-06T05:36:02.22Z" },
+    { url = "https://files.pythonhosted.org/packages/95/a3/c8fb25aac55bf5e12dae5c5aa6a98f85d436c1dc658f21c3ac73f9fa95e5/frozenlist-1.8.0-cp311-cp311-win32.whl", hash = "sha256:27c6e8077956cf73eadd514be8fb04d77fc946a7fe9f7fe167648b0b9085cc25", size = 39647, upload-time = "2025-10-06T05:36:03.409Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/f5/603d0d6a02cfd4c8f2a095a54672b3cf967ad688a60fb9faf04fc4887f65/frozenlist-1.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:ac913f8403b36a2c8610bbfd25b8013488533e71e62b4b4adce9c86c8cea905b", size = 44064, upload-time = "2025-10-06T05:36:04.368Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/16/c2c9ab44e181f043a86f9a8f84d5124b62dbcb3a02c0977ec72b9ac1d3e0/frozenlist-1.8.0-cp311-cp311-win_arm64.whl", hash = "sha256:d4d3214a0f8394edfa3e303136d0575eece0745ff2b47bd2cb2e66dd92d4351a", size = 39937, upload-time = "2025-10-06T05:36:05.669Z" },
+    { url = "https://files.pythonhosted.org/packages/69/29/948b9aa87e75820a38650af445d2ef2b6b8a6fab1a23b6bb9e4ef0be2d59/frozenlist-1.8.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:78f7b9e5d6f2fdb88cdde9440dc147259b62b9d3b019924def9f6478be254ac1", size = 87782, upload-time = "2025-10-06T05:36:06.649Z" },
+    { url = "https://files.pythonhosted.org/packages/64/80/4f6e318ee2a7c0750ed724fa33a4bdf1eacdc5a39a7a24e818a773cd91af/frozenlist-1.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:229bf37d2e4acdaf808fd3f06e854a4a7a3661e871b10dc1f8f1896a3b05f18b", size = 50594, upload-time = "2025-10-06T05:36:07.69Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/94/5c8a2b50a496b11dd519f4a24cb5496cf125681dd99e94c604ccdea9419a/frozenlist-1.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f833670942247a14eafbb675458b4e61c82e002a148f49e68257b79296e865c4", size = 50448, upload-time = "2025-10-06T05:36:08.78Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/bd/d91c5e39f490a49df14320f4e8c80161cfcce09f1e2cde1edd16a551abb3/frozenlist-1.8.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:494a5952b1c597ba44e0e78113a7266e656b9794eec897b19ead706bd7074383", size = 242411, upload-time = "2025-10-06T05:36:09.801Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/83/f61505a05109ef3293dfb1ff594d13d64a2324ac3482be2cedc2be818256/frozenlist-1.8.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96f423a119f4777a4a056b66ce11527366a8bb92f54e541ade21f2374433f6d4", size = 243014, upload-time = "2025-10-06T05:36:11.394Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/cb/cb6c7b0f7d4023ddda30cf56b8b17494eb3a79e3fda666bf735f63118b35/frozenlist-1.8.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3462dd9475af2025c31cc61be6652dfa25cbfb56cbbf52f4ccfe029f38decaf8", size = 234909, upload-time = "2025-10-06T05:36:12.598Z" },
+    { url = "https://files.pythonhosted.org/packages/31/c5/cd7a1f3b8b34af009fb17d4123c5a778b44ae2804e3ad6b86204255f9ec5/frozenlist-1.8.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c4c800524c9cd9bac5166cd6f55285957fcfc907db323e193f2afcd4d9abd69b", size = 250049, upload-time = "2025-10-06T05:36:14.065Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/01/2f95d3b416c584a1e7f0e1d6d31998c4a795f7544069ee2e0962a4b60740/frozenlist-1.8.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d6a5df73acd3399d893dafc71663ad22534b5aa4f94e8a2fabfe856c3c1b6a52", size = 256485, upload-time = "2025-10-06T05:36:15.39Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/03/024bf7720b3abaebcff6d0793d73c154237b85bdf67b7ed55e5e9596dc9a/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:405e8fe955c2280ce66428b3ca55e12b3c4e9c336fb2103a4937e891c69a4a29", size = 237619, upload-time = "2025-10-06T05:36:16.558Z" },
+    { url = "https://files.pythonhosted.org/packages/69/fa/f8abdfe7d76b731f5d8bd217827cf6764d4f1d9763407e42717b4bed50a0/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:908bd3f6439f2fef9e85031b59fd4f1297af54415fb60e4254a95f75b3cab3f3", size = 250320, upload-time = "2025-10-06T05:36:17.821Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/3c/b051329f718b463b22613e269ad72138cc256c540f78a6de89452803a47d/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:294e487f9ec720bd8ffcebc99d575f7eff3568a08a253d1ee1a0378754b74143", size = 246820, upload-time = "2025-10-06T05:36:19.046Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/ae/58282e8f98e444b3f4dd42448ff36fa38bef29e40d40f330b22e7108f565/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:74c51543498289c0c43656701be6b077f4b265868fa7f8a8859c197006efb608", size = 250518, upload-time = "2025-10-06T05:36:20.763Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/96/007e5944694d66123183845a106547a15944fbbb7154788cbf7272789536/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:776f352e8329135506a1d6bf16ac3f87bc25b28e765949282dcc627af36123aa", size = 239096, upload-time = "2025-10-06T05:36:22.129Z" },
+    { url = "https://files.pythonhosted.org/packages/66/bb/852b9d6db2fa40be96f29c0d1205c306288f0684df8fd26ca1951d461a56/frozenlist-1.8.0-cp312-cp312-win32.whl", hash = "sha256:433403ae80709741ce34038da08511d4a77062aa924baf411ef73d1146e74faf", size = 39985, upload-time = "2025-10-06T05:36:23.661Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/af/38e51a553dd66eb064cdf193841f16f077585d4d28394c2fa6235cb41765/frozenlist-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:34187385b08f866104f0c0617404c8eb08165ab1272e884abc89c112e9c00746", size = 44591, upload-time = "2025-10-06T05:36:24.958Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/06/1dc65480ab147339fecc70797e9c2f69d9cea9cf38934ce08df070fdb9cb/frozenlist-1.8.0-cp312-cp312-win_arm64.whl", hash = "sha256:fe3c58d2f5db5fbd18c2987cba06d51b0529f52bc3a6cdc33d3f4eab725104bd", size = 40102, upload-time = "2025-10-06T05:36:26.333Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/40/0832c31a37d60f60ed79e9dfb5a92e1e2af4f40a16a29abcc7992af9edff/frozenlist-1.8.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8d92f1a84bb12d9e56f818b3a746f3efba93c1b63c8387a73dde655e1e42282a", size = 85717, upload-time = "2025-10-06T05:36:27.341Z" },
+    { url = "https://files.pythonhosted.org/packages/30/ba/b0b3de23f40bc55a7057bd38434e25c34fa48e17f20ee273bbde5e0650f3/frozenlist-1.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:96153e77a591c8adc2ee805756c61f59fef4cf4073a9275ee86fe8cba41241f7", size = 49651, upload-time = "2025-10-06T05:36:28.855Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/ab/6e5080ee374f875296c4243c381bbdef97a9ac39c6e3ce1d5f7d42cb78d6/frozenlist-1.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f21f00a91358803399890ab167098c131ec2ddd5f8f5fd5fe9c9f2c6fcd91e40", size = 49417, upload-time = "2025-10-06T05:36:29.877Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/4e/e4691508f9477ce67da2015d8c00acd751e6287739123113a9fca6f1604e/frozenlist-1.8.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:fb30f9626572a76dfe4293c7194a09fb1fe93ba94c7d4f720dfae3b646b45027", size = 234391, upload-time = "2025-10-06T05:36:31.301Z" },
+    { url = "https://files.pythonhosted.org/packages/40/76/c202df58e3acdf12969a7895fd6f3bc016c642e6726aa63bd3025e0fc71c/frozenlist-1.8.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eaa352d7047a31d87dafcacbabe89df0aa506abb5b1b85a2fb91bc3faa02d822", size = 233048, upload-time = "2025-10-06T05:36:32.531Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/c0/8746afb90f17b73ca5979c7a3958116e105ff796e718575175319b5bb4ce/frozenlist-1.8.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:03ae967b4e297f58f8c774c7eabcce57fe3c2434817d4385c50661845a058121", size = 226549, upload-time = "2025-10-06T05:36:33.706Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/eb/4c7eefc718ff72f9b6c4893291abaae5fbc0c82226a32dcd8ef4f7a5dbef/frozenlist-1.8.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f6292f1de555ffcc675941d65fffffb0a5bcd992905015f85d0592201793e0e5", size = 239833, upload-time = "2025-10-06T05:36:34.947Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/4e/e5c02187cf704224f8b21bee886f3d713ca379535f16893233b9d672ea71/frozenlist-1.8.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:29548f9b5b5e3460ce7378144c3010363d8035cea44bc0bf02d57f5a685e084e", size = 245363, upload-time = "2025-10-06T05:36:36.534Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/96/cb85ec608464472e82ad37a17f844889c36100eed57bea094518bf270692/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ec3cc8c5d4084591b4237c0a272cc4f50a5b03396a47d9caaf76f5d7b38a4f11", size = 229314, upload-time = "2025-10-06T05:36:38.582Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/6f/4ae69c550e4cee66b57887daeebe006fe985917c01d0fff9caab9883f6d0/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:517279f58009d0b1f2e7c1b130b377a349405da3f7621ed6bfae50b10adf20c1", size = 243365, upload-time = "2025-10-06T05:36:40.152Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/58/afd56de246cf11780a40a2c28dc7cbabbf06337cc8ddb1c780a2d97e88d8/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:db1e72ede2d0d7ccb213f218df6a078a9c09a7de257c2fe8fcef16d5925230b1", size = 237763, upload-time = "2025-10-06T05:36:41.355Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/36/cdfaf6ed42e2644740d4a10452d8e97fa1c062e2a8006e4b09f1b5fd7d63/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:b4dec9482a65c54a5044486847b8a66bf10c9cb4926d42927ec4e8fd5db7fed8", size = 240110, upload-time = "2025-10-06T05:36:42.716Z" },
+    { url = "https://files.pythonhosted.org/packages/03/a8/9ea226fbefad669f11b52e864c55f0bd57d3c8d7eb07e9f2e9a0b39502e1/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:21900c48ae04d13d416f0e1e0c4d81f7931f73a9dfa0b7a8746fb2fe7dd970ed", size = 233717, upload-time = "2025-10-06T05:36:44.251Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/0b/1b5531611e83ba7d13ccc9988967ea1b51186af64c42b7a7af465dcc9568/frozenlist-1.8.0-cp313-cp313-win32.whl", hash = "sha256:8b7b94a067d1c504ee0b16def57ad5738701e4ba10cec90529f13fa03c833496", size = 39628, upload-time = "2025-10-06T05:36:45.423Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/cf/174c91dbc9cc49bc7b7aab74d8b734e974d1faa8f191c74af9b7e80848e6/frozenlist-1.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:878be833caa6a3821caf85eb39c5ba92d28e85df26d57afb06b35b2efd937231", size = 43882, upload-time = "2025-10-06T05:36:46.796Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/17/502cd212cbfa96eb1388614fe39a3fc9ab87dbbe042b66f97acb57474834/frozenlist-1.8.0-cp313-cp313-win_arm64.whl", hash = "sha256:44389d135b3ff43ba8cc89ff7f51f5a0bb6b63d829c8300f79a2fe4fe61bcc62", size = 39676, upload-time = "2025-10-06T05:36:47.8Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/5c/3bbfaa920dfab09e76946a5d2833a7cbdf7b9b4a91c714666ac4855b88b4/frozenlist-1.8.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:e25ac20a2ef37e91c1b39938b591457666a0fa835c7783c3a8f33ea42870db94", size = 89235, upload-time = "2025-10-06T05:36:48.78Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/d6/f03961ef72166cec1687e84e8925838442b615bd0b8854b54923ce5b7b8a/frozenlist-1.8.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:07cdca25a91a4386d2e76ad992916a85038a9b97561bf7a3fd12d5d9ce31870c", size = 50742, upload-time = "2025-10-06T05:36:49.837Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/bb/a6d12b7ba4c3337667d0e421f7181c82dda448ce4e7ad7ecd249a16fa806/frozenlist-1.8.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4e0c11f2cc6717e0a741f84a527c52616140741cd812a50422f83dc31749fb52", size = 51725, upload-time = "2025-10-06T05:36:50.851Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/71/d1fed0ffe2c2ccd70b43714c6cab0f4188f09f8a67a7914a6b46ee30f274/frozenlist-1.8.0-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b3210649ee28062ea6099cfda39e147fa1bc039583c8ee4481cb7811e2448c51", size = 284533, upload-time = "2025-10-06T05:36:51.898Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/1f/fb1685a7b009d89f9bf78a42d94461bc06581f6e718c39344754a5d9bada/frozenlist-1.8.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:581ef5194c48035a7de2aefc72ac6539823bb71508189e5de01d60c9dcd5fa65", size = 292506, upload-time = "2025-10-06T05:36:53.101Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/3b/b991fe1612703f7e0d05c0cf734c1b77aaf7c7d321df4572e8d36e7048c8/frozenlist-1.8.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3ef2d026f16a2b1866e1d86fc4e1291e1ed8a387b2c333809419a2f8b3a77b82", size = 274161, upload-time = "2025-10-06T05:36:54.309Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/ec/c5c618767bcdf66e88945ec0157d7f6c4a1322f1473392319b7a2501ded7/frozenlist-1.8.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5500ef82073f599ac84d888e3a8c1f77ac831183244bfd7f11eaa0289fb30714", size = 294676, upload-time = "2025-10-06T05:36:55.566Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/ce/3934758637d8f8a88d11f0585d6495ef54b2044ed6ec84492a91fa3b27aa/frozenlist-1.8.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:50066c3997d0091c411a66e710f4e11752251e6d2d73d70d8d5d4c76442a199d", size = 300638, upload-time = "2025-10-06T05:36:56.758Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/4f/a7e4d0d467298f42de4b41cbc7ddaf19d3cfeabaf9ff97c20c6c7ee409f9/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5c1c8e78426e59b3f8005e9b19f6ff46e5845895adbde20ece9218319eca6506", size = 283067, upload-time = "2025-10-06T05:36:57.965Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/48/c7b163063d55a83772b268e6d1affb960771b0e203b632cfe09522d67ea5/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:eefdba20de0d938cec6a89bd4d70f346a03108a19b9df4248d3cf0d88f1b0f51", size = 292101, upload-time = "2025-10-06T05:36:59.237Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/d0/2366d3c4ecdc2fd391e0afa6e11500bfba0ea772764d631bbf82f0136c9d/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:cf253e0e1c3ceb4aaff6df637ce033ff6535fb8c70a764a8f46aafd3d6ab798e", size = 289901, upload-time = "2025-10-06T05:37:00.811Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/94/daff920e82c1b70e3618a2ac39fbc01ae3e2ff6124e80739ce5d71c9b920/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:032efa2674356903cd0261c4317a561a6850f3ac864a63fc1583147fb05a79b0", size = 289395, upload-time = "2025-10-06T05:37:02.115Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/20/bba307ab4235a09fdcd3cc5508dbabd17c4634a1af4b96e0f69bfe551ebd/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6da155091429aeba16851ecb10a9104a108bcd32f6c1642867eadaee401c1c41", size = 283659, upload-time = "2025-10-06T05:37:03.711Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/00/04ca1c3a7a124b6de4f8a9a17cc2fcad138b4608e7a3fc5877804b8715d7/frozenlist-1.8.0-cp313-cp313t-win32.whl", hash = "sha256:0f96534f8bfebc1a394209427d0f8a63d343c9779cda6fc25e8e121b5fd8555b", size = 43492, upload-time = "2025-10-06T05:37:04.915Z" },
+    { url = "https://files.pythonhosted.org/packages/59/5e/c69f733a86a94ab10f68e496dc6b7e8bc078ebb415281d5698313e3af3a1/frozenlist-1.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5d63a068f978fc69421fb0e6eb91a9603187527c86b7cd3f534a5b77a592b888", size = 48034, upload-time = "2025-10-06T05:37:06.343Z" },
+    { url = "https://files.pythonhosted.org/packages/16/6c/be9d79775d8abe79b05fa6d23da99ad6e7763a1d080fbae7290b286093fd/frozenlist-1.8.0-cp313-cp313t-win_arm64.whl", hash = "sha256:bf0a7e10b077bf5fb9380ad3ae8ce20ef919a6ad93b4552896419ac7e1d8e042", size = 41749, upload-time = "2025-10-06T05:37:07.431Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/c8/85da824b7e7b9b6e7f7705b2ecaf9591ba6f79c1177f324c2735e41d36a2/frozenlist-1.8.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:cee686f1f4cadeb2136007ddedd0aaf928ab95216e7691c63e50a8ec066336d0", size = 86127, upload-time = "2025-10-06T05:37:08.438Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/e8/a1185e236ec66c20afd72399522f142c3724c785789255202d27ae992818/frozenlist-1.8.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:119fb2a1bd47307e899c2fac7f28e85b9a543864df47aa7ec9d3c1b4545f096f", size = 49698, upload-time = "2025-10-06T05:37:09.48Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/93/72b1736d68f03fda5fdf0f2180fb6caaae3894f1b854d006ac61ecc727ee/frozenlist-1.8.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4970ece02dbc8c3a92fcc5228e36a3e933a01a999f7094ff7c23fbd2beeaa67c", size = 49749, upload-time = "2025-10-06T05:37:10.569Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/b2/fabede9fafd976b991e9f1b9c8c873ed86f202889b864756f240ce6dd855/frozenlist-1.8.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:cba69cb73723c3f329622e34bdbf5ce1f80c21c290ff04256cff1cd3c2036ed2", size = 231298, upload-time = "2025-10-06T05:37:11.993Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/3b/d9b1e0b0eed36e70477ffb8360c49c85c8ca8ef9700a4e6711f39a6e8b45/frozenlist-1.8.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:778a11b15673f6f1df23d9586f83c4846c471a8af693a22e066508b77d201ec8", size = 232015, upload-time = "2025-10-06T05:37:13.194Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/94/be719d2766c1138148564a3960fc2c06eb688da592bdc25adcf856101be7/frozenlist-1.8.0-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0325024fe97f94c41c08872db482cf8ac4800d80e79222c6b0b7b162d5b13686", size = 225038, upload-time = "2025-10-06T05:37:14.577Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/09/6712b6c5465f083f52f50cf74167b92d4ea2f50e46a9eea0523d658454ae/frozenlist-1.8.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:97260ff46b207a82a7567b581ab4190bd4dfa09f4db8a8b49d1a958f6aa4940e", size = 240130, upload-time = "2025-10-06T05:37:15.781Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/d4/cd065cdcf21550b54f3ce6a22e143ac9e4836ca42a0de1022da8498eac89/frozenlist-1.8.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:54b2077180eb7f83dd52c40b2750d0a9f175e06a42e3213ce047219de902717a", size = 242845, upload-time = "2025-10-06T05:37:17.037Z" },
+    { url = "https://files.pythonhosted.org/packages/62/c3/f57a5c8c70cd1ead3d5d5f776f89d33110b1addae0ab010ad774d9a44fb9/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2f05983daecab868a31e1da44462873306d3cbfd76d1f0b5b69c473d21dbb128", size = 229131, upload-time = "2025-10-06T05:37:18.221Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/52/232476fe9cb64f0742f3fde2b7d26c1dac18b6d62071c74d4ded55e0ef94/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:33f48f51a446114bc5d251fb2954ab0164d5be02ad3382abcbfe07e2531d650f", size = 240542, upload-time = "2025-10-06T05:37:19.771Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/85/07bf3f5d0fb5414aee5f47d33c6f5c77bfe49aac680bfece33d4fdf6a246/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:154e55ec0655291b5dd1b8731c637ecdb50975a2ae70c606d100750a540082f7", size = 237308, upload-time = "2025-10-06T05:37:20.969Z" },
+    { url = "https://files.pythonhosted.org/packages/11/99/ae3a33d5befd41ac0ca2cc7fd3aa707c9c324de2e89db0e0f45db9a64c26/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:4314debad13beb564b708b4a496020e5306c7333fa9a3ab90374169a20ffab30", size = 238210, upload-time = "2025-10-06T05:37:22.252Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/60/b1d2da22f4970e7a155f0adde9b1435712ece01b3cd45ba63702aea33938/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:073f8bf8becba60aa931eb3bc420b217bb7d5b8f4750e6f8b3be7f3da85d38b7", size = 231972, upload-time = "2025-10-06T05:37:23.5Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/ab/945b2f32de889993b9c9133216c068b7fcf257d8595a0ac420ac8677cab0/frozenlist-1.8.0-cp314-cp314-win32.whl", hash = "sha256:bac9c42ba2ac65ddc115d930c78d24ab8d4f465fd3fc473cdedfccadb9429806", size = 40536, upload-time = "2025-10-06T05:37:25.581Z" },
+    { url = "https://files.pythonhosted.org/packages/59/ad/9caa9b9c836d9ad6f067157a531ac48b7d36499f5036d4141ce78c230b1b/frozenlist-1.8.0-cp314-cp314-win_amd64.whl", hash = "sha256:3e0761f4d1a44f1d1a47996511752cf3dcec5bbdd9cc2b4fe595caf97754b7a0", size = 44330, upload-time = "2025-10-06T05:37:26.928Z" },
+    { url = "https://files.pythonhosted.org/packages/82/13/e6950121764f2676f43534c555249f57030150260aee9dcf7d64efda11dd/frozenlist-1.8.0-cp314-cp314-win_arm64.whl", hash = "sha256:d1eaff1d00c7751b7c6662e9c5ba6eb2c17a2306ba5e2a37f24ddf3cc953402b", size = 40627, upload-time = "2025-10-06T05:37:28.075Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/c7/43200656ecc4e02d3f8bc248df68256cd9572b3f0017f0a0c4e93440ae23/frozenlist-1.8.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:d3bb933317c52d7ea5004a1c442eef86f426886fba134ef8cf4226ea6ee1821d", size = 89238, upload-time = "2025-10-06T05:37:29.373Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/29/55c5f0689b9c0fb765055629f472c0de484dcaf0acee2f7707266ae3583c/frozenlist-1.8.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:8009897cdef112072f93a0efdce29cd819e717fd2f649ee3016efd3cd885a7ed", size = 50738, upload-time = "2025-10-06T05:37:30.792Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/7d/b7282a445956506fa11da8c2db7d276adcbf2b17d8bb8407a47685263f90/frozenlist-1.8.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2c5dcbbc55383e5883246d11fd179782a9d07a986c40f49abe89ddf865913930", size = 51739, upload-time = "2025-10-06T05:37:32.127Z" },
+    { url = "https://files.pythonhosted.org/packages/62/1c/3d8622e60d0b767a5510d1d3cf21065b9db874696a51ea6d7a43180a259c/frozenlist-1.8.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:39ecbc32f1390387d2aa4f5a995e465e9e2f79ba3adcac92d68e3e0afae6657c", size = 284186, upload-time = "2025-10-06T05:37:33.21Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/14/aa36d5f85a89679a85a1d44cd7a6657e0b1c75f61e7cad987b203d2daca8/frozenlist-1.8.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92db2bf818d5cc8d9c1f1fc56b897662e24ea5adb36ad1f1d82875bd64e03c24", size = 292196, upload-time = "2025-10-06T05:37:36.107Z" },
+    { url = "https://files.pythonhosted.org/packages/05/23/6bde59eb55abd407d34f77d39a5126fb7b4f109a3f611d3929f14b700c66/frozenlist-1.8.0-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:2dc43a022e555de94c3b68a4ef0b11c4f747d12c024a520c7101709a2144fb37", size = 273830, upload-time = "2025-10-06T05:37:37.663Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/3f/22cff331bfad7a8afa616289000ba793347fcd7bc275f3b28ecea2a27909/frozenlist-1.8.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:cb89a7f2de3602cfed448095bab3f178399646ab7c61454315089787df07733a", size = 294289, upload-time = "2025-10-06T05:37:39.261Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/89/5b057c799de4838b6c69aa82b79705f2027615e01be996d2486a69ca99c4/frozenlist-1.8.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:33139dc858c580ea50e7e60a1b0ea003efa1fd42e6ec7fdbad78fff65fad2fd2", size = 300318, upload-time = "2025-10-06T05:37:43.213Z" },
+    { url = "https://files.pythonhosted.org/packages/30/de/2c22ab3eb2a8af6d69dc799e48455813bab3690c760de58e1bf43b36da3e/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:168c0969a329b416119507ba30b9ea13688fafffac1b7822802537569a1cb0ef", size = 282814, upload-time = "2025-10-06T05:37:45.337Z" },
+    { url = "https://files.pythonhosted.org/packages/59/f7/970141a6a8dbd7f556d94977858cfb36fa9b66e0892c6dd780d2219d8cd8/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:28bd570e8e189d7f7b001966435f9dac6718324b5be2990ac496cf1ea9ddb7fe", size = 291762, upload-time = "2025-10-06T05:37:46.657Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/15/ca1adae83a719f82df9116d66f5bb28bb95557b3951903d39135620ef157/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:b2a095d45c5d46e5e79ba1e5b9cb787f541a8dee0433836cea4b96a2c439dcd8", size = 289470, upload-time = "2025-10-06T05:37:47.946Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/83/dca6dc53bf657d371fbc88ddeb21b79891e747189c5de990b9dfff2ccba1/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:eab8145831a0d56ec9c4139b6c3e594c7a83c2c8be25d5bcf2d86136a532287a", size = 289042, upload-time = "2025-10-06T05:37:49.499Z" },
+    { url = "https://files.pythonhosted.org/packages/96/52/abddd34ca99be142f354398700536c5bd315880ed0a213812bc491cff5e4/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:974b28cf63cc99dfb2188d8d222bc6843656188164848c4f679e63dae4b0708e", size = 283148, upload-time = "2025-10-06T05:37:50.745Z" },
+    { url = "https://files.pythonhosted.org/packages/af/d3/76bd4ed4317e7119c2b7f57c3f6934aba26d277acc6309f873341640e21f/frozenlist-1.8.0-cp314-cp314t-win32.whl", hash = "sha256:342c97bf697ac5480c0a7ec73cd700ecfa5a8a40ac923bd035484616efecc2df", size = 44676, upload-time = "2025-10-06T05:37:52.222Z" },
+    { url = "https://files.pythonhosted.org/packages/89/76/c615883b7b521ead2944bb3480398cbb07e12b7b4e4d073d3752eb721558/frozenlist-1.8.0-cp314-cp314t-win_amd64.whl", hash = "sha256:06be8f67f39c8b1dc671f5d83aaefd3358ae5cdcf8314552c57e7ed3e6475bdd", size = 49451, upload-time = "2025-10-06T05:37:53.425Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/a3/5982da14e113d07b325230f95060e2169f5311b1017ea8af2a29b374c289/frozenlist-1.8.0-cp314-cp314t-win_arm64.whl", hash = "sha256:102e6314ca4da683dca92e3b1355490fed5f313b768500084fbe6371fddfdb79", size = 42507, upload-time = "2025-10-06T05:37:54.513Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/9a/e35b4a917281c0b8419d4207f4334c8e8c5dbf4f3f5f9ada73958d937dcc/frozenlist-1.8.0-py3-none-any.whl", hash = "sha256:0c18a16eab41e82c295618a77502e17b195883241c563b00f0aa5106fc4eaa0d", size = 13409, upload-time = "2025-10-06T05:38:16.721Z" },
+]
+
+[[package]]
+name = "idna"
+version = "3.11"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
+]
+
+[[package]]
+name = "importlib-metadata"
+version = "8.7.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "zipp" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/76/66/650a33bd90f786193e4de4b3ad86ea60b53c89b669a5c7be931fac31cdb0/importlib_metadata-8.7.0.tar.gz", hash = "sha256:d13b81ad223b890aa16c5471f2ac3056cf76c5f10f82d6f9292f0b415f389000", size = 56641, upload-time = "2025-04-27T15:29:01.736Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656, upload-time = "2025-04-27T15:29:00.214Z" },
+]
+
+[[package]]
+name = "jaraco-classes"
+version = "3.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "more-itertools" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/06/c0/ed4a27bc5571b99e3cff68f8a9fa5b56ff7df1c2251cc715a652ddd26402/jaraco.classes-3.4.0.tar.gz", hash = "sha256:47a024b51d0239c0dd8c8540c6c7f484be3b8fcf0b2d85c13825780d3b3f3acd", size = 11780, upload-time = "2024-03-31T07:27:36.643Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7f/66/b15ce62552d84bbfcec9a4873ab79d993a1dd4edb922cbfccae192bd5b5f/jaraco.classes-3.4.0-py3-none-any.whl", hash = "sha256:f662826b6bed8cace05e7ff873ce0f9283b5c924470fe664fff1c2f00f581790", size = 6777, upload-time = "2024-03-31T07:27:34.792Z" },
+]
+
+[[package]]
+name = "jaraco-context"
+version = "6.0.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "backports-tarfile", marker = "python_full_version < '3.12'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/df/ad/f3777b81bf0b6e7bc7514a1656d3e637b2e8e15fab2ce3235730b3e7a4e6/jaraco_context-6.0.1.tar.gz", hash = "sha256:9bae4ea555cf0b14938dc0aee7c9f32ed303aa20a3b73e7dc80111628792d1b3", size = 13912, upload-time = "2024-08-20T03:39:27.358Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ff/db/0c52c4cf5e4bd9f5d7135ec7669a3a767af21b3a308e1ed3674881e52b62/jaraco.context-6.0.1-py3-none-any.whl", hash = "sha256:f797fc481b490edb305122c9181830a3a5b76d84ef6d1aef2fb9b47ab956f9e4", size = 6825, upload-time = "2024-08-20T03:39:25.966Z" },
+]
+
+[[package]]
+name = "jaraco-functools"
+version = "4.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "more-itertools" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f7/ed/1aa2d585304ec07262e1a83a9889880701079dde796ac7b1d1826f40c63d/jaraco_functools-4.3.0.tar.gz", hash = "sha256:cfd13ad0dd2c47a3600b439ef72d8615d482cedcff1632930d6f28924d92f294", size = 19755, upload-time = "2025-08-18T20:05:09.91Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b4/09/726f168acad366b11e420df31bf1c702a54d373a83f968d94141a8c3fde0/jaraco_functools-4.3.0-py3-none-any.whl", hash = "sha256:227ff8ed6f7b8f62c56deff101545fa7543cf2c8e7b82a7c2116e672f29c26e8", size = 10408, upload-time = "2025-08-18T20:05:08.69Z" },
+]
+
+[[package]]
+name = "jeepney"
+version = "0.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7b/6f/357efd7602486741aa73ffc0617fb310a29b588ed0fd69c2399acbb85b0c/jeepney-0.9.0.tar.gz", hash = "sha256:cf0e9e845622b81e4a28df94c40345400256ec608d0e55bb8a3feaa9163f5732", size = 106758, upload-time = "2025-02-27T18:51:01.684Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b2/a3/e137168c9c44d18eff0376253da9f1e9234d0239e0ee230d2fee6cea8e55/jeepney-0.9.0-py3-none-any.whl", hash = "sha256:97e5714520c16fc0a45695e5365a2e11b81ea79bba796e26f9f1d178cb182683", size = 49010, upload-time = "2025-02-27T18:51:00.104Z" },
+]
+
+[[package]]
+name = "keyring"
+version = "25.6.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "importlib-metadata", marker = "python_full_version < '3.12'" },
+    { name = "jaraco-classes" },
+    { name = "jaraco-context" },
+    { name = "jaraco-functools" },
+    { name = "jeepney", marker = "sys_platform == 'linux'" },
+    { name = "pywin32-ctypes", marker = "sys_platform == 'win32'" },
+    { name = "secretstorage", marker = "sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/70/09/d904a6e96f76ff214be59e7aa6ef7190008f52a0ab6689760a98de0bf37d/keyring-25.6.0.tar.gz", hash = "sha256:0b39998aa941431eb3d9b0d4b2460bc773b9df6fed7621c2dfb291a7e0187a66", size = 62750, upload-time = "2024-12-25T15:26:45.782Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d3/32/da7f44bcb1105d3e88a0b74ebdca50c59121d2ddf71c9e34ba47df7f3a56/keyring-25.6.0-py3-none-any.whl", hash = "sha256:552a3f7af126ece7ed5c89753650eec89c7eaae8617d0aa4d9ad2b75111266bd", size = 39085, upload-time = "2024-12-25T15:26:44.377Z" },
+]
+
+[[package]]
+name = "markdown-it-py"
+version = "4.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mdurl" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" },
+]
+
+[[package]]
+name = "mdurl"
+version = "0.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
+]
+
+[[package]]
+name = "more-itertools"
+version = "10.8.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ea/5d/38b681d3fce7a266dd9ab73c66959406d565b3e85f21d5e66e1181d93721/more_itertools-10.8.0.tar.gz", hash = "sha256:f638ddf8a1a0d134181275fb5d58b086ead7c6a72429ad725c67503f13ba30bd", size = 137431, upload-time = "2025-09-02T15:23:11.018Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a4/8e/469e5a4a2f5855992e425f3cb33804cc07bf18d48f2db061aec61ce50270/more_itertools-10.8.0-py3-none-any.whl", hash = "sha256:52d4362373dcf7c52546bc4af9a86ee7c4579df9a8dc268be0a2f949d376cc9b", size = 69667, upload-time = "2025-09-02T15:23:09.635Z" },
+]
+
+[[package]]
+name = "multidict"
+version = "6.7.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/80/1e/5492c365f222f907de1039b91f922b93fa4f764c713ee858d235495d8f50/multidict-6.7.0.tar.gz", hash = "sha256:c6e99d9a65ca282e578dfea819cfa9c0a62b2499d8677392e09feaf305e9e6f5", size = 101834, upload-time = "2025-10-06T14:52:30.657Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/34/9e/5c727587644d67b2ed479041e4b1c58e30afc011e3d45d25bbe35781217c/multidict-6.7.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4d409aa42a94c0b3fa617708ef5276dfe81012ba6753a0370fcc9d0195d0a1fc", size = 76604, upload-time = "2025-10-06T14:48:54.277Z" },
+    { url = "https://files.pythonhosted.org/packages/17/e4/67b5c27bd17c085a5ea8f1ec05b8a3e5cba0ca734bfcad5560fb129e70ca/multidict-6.7.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:14c9e076eede3b54c636f8ce1c9c252b5f057c62131211f0ceeec273810c9721", size = 44715, upload-time = "2025-10-06T14:48:55.445Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/e1/866a5d77be6ea435711bef2a4291eed11032679b6b28b56b4776ab06ba3e/multidict-6.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4c09703000a9d0fa3c3404b27041e574cc7f4df4c6563873246d0e11812a94b6", size = 44332, upload-time = "2025-10-06T14:48:56.706Z" },
+    { url = "https://files.pythonhosted.org/packages/31/61/0c2d50241ada71ff61a79518db85ada85fdabfcf395d5968dae1cbda04e5/multidict-6.7.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a265acbb7bb33a3a2d626afbe756371dce0279e7b17f4f4eda406459c2b5ff1c", size = 245212, upload-time = "2025-10-06T14:48:58.042Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/e0/919666a4e4b57fff1b57f279be1c9316e6cdc5de8a8b525d76f6598fefc7/multidict-6.7.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:51cb455de290ae462593e5b1cb1118c5c22ea7f0d3620d9940bf695cea5a4bd7", size = 246671, upload-time = "2025-10-06T14:49:00.004Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/cc/d027d9c5a520f3321b65adea289b965e7bcbd2c34402663f482648c716ce/multidict-6.7.0-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:db99677b4457c7a5c5a949353e125ba72d62b35f74e26da141530fbb012218a7", size = 225491, upload-time = "2025-10-06T14:49:01.393Z" },
+    { url = "https://files.pythonhosted.org/packages/75/c4/bbd633980ce6155a28ff04e6a6492dd3335858394d7bb752d8b108708558/multidict-6.7.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f470f68adc395e0183b92a2f4689264d1ea4b40504a24d9882c27375e6662bb9", size = 257322, upload-time = "2025-10-06T14:49:02.745Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/6d/d622322d344f1f053eae47e033b0b3f965af01212de21b10bcf91be991fb/multidict-6.7.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0db4956f82723cc1c270de9c6e799b4c341d327762ec78ef82bb962f79cc07d8", size = 254694, upload-time = "2025-10-06T14:49:04.15Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/9f/78f8761c2705d4c6d7516faed63c0ebdac569f6db1bef95e0d5218fdc146/multidict-6.7.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3e56d780c238f9e1ae66a22d2adf8d16f485381878250db8d496623cd38b22bd", size = 246715, upload-time = "2025-10-06T14:49:05.967Z" },
+    { url = "https://files.pythonhosted.org/packages/78/59/950818e04f91b9c2b95aab3d923d9eabd01689d0dcd889563988e9ea0fd8/multidict-6.7.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9d14baca2ee12c1a64740d4531356ba50b82543017f3ad6de0deb943c5979abb", size = 243189, upload-time = "2025-10-06T14:49:07.37Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/3d/77c79e1934cad2ee74991840f8a0110966d9599b3af95964c0cd79bb905b/multidict-6.7.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:295a92a76188917c7f99cda95858c822f9e4aae5824246bba9b6b44004ddd0a6", size = 237845, upload-time = "2025-10-06T14:49:08.759Z" },
+    { url = "https://files.pythonhosted.org/packages/63/1b/834ce32a0a97a3b70f86437f685f880136677ac00d8bce0027e9fd9c2db7/multidict-6.7.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:39f1719f57adbb767ef592a50ae5ebb794220d1188f9ca93de471336401c34d2", size = 246374, upload-time = "2025-10-06T14:49:10.574Z" },
+    { url = "https://files.pythonhosted.org/packages/23/ef/43d1c3ba205b5dec93dc97f3fba179dfa47910fc73aaaea4f7ceb41cec2a/multidict-6.7.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:0a13fb8e748dfc94749f622de065dd5c1def7e0d2216dba72b1d8069a389c6ff", size = 253345, upload-time = "2025-10-06T14:49:12.331Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/03/eaf95bcc2d19ead522001f6a650ef32811aa9e3624ff0ad37c445c7a588c/multidict-6.7.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:e3aa16de190d29a0ea1b48253c57d99a68492c8dd8948638073ab9e74dc9410b", size = 246940, upload-time = "2025-10-06T14:49:13.821Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/df/ec8a5fd66ea6cd6f525b1fcbb23511b033c3e9bc42b81384834ffa484a62/multidict-6.7.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a048ce45dcdaaf1defb76b2e684f997fb5abf74437b6cb7b22ddad934a964e34", size = 242229, upload-time = "2025-10-06T14:49:15.603Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/a2/59b405d59fd39ec86d1142630e9049243015a5f5291ba49cadf3c090c541/multidict-6.7.0-cp311-cp311-win32.whl", hash = "sha256:a90af66facec4cebe4181b9e62a68be65e45ac9b52b67de9eec118701856e7ff", size = 41308, upload-time = "2025-10-06T14:49:16.871Z" },
+    { url = "https://files.pythonhosted.org/packages/32/0f/13228f26f8b882c34da36efa776c3b7348455ec383bab4a66390e42963ae/multidict-6.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:95b5ffa4349df2887518bb839409bcf22caa72d82beec453216802f475b23c81", size = 46037, upload-time = "2025-10-06T14:49:18.457Z" },
+    { url = "https://files.pythonhosted.org/packages/84/1f/68588e31b000535a3207fd3c909ebeec4fb36b52c442107499c18a896a2a/multidict-6.7.0-cp311-cp311-win_arm64.whl", hash = "sha256:329aa225b085b6f004a4955271a7ba9f1087e39dcb7e65f6284a988264a63912", size = 43023, upload-time = "2025-10-06T14:49:19.648Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/9e/9f61ac18d9c8b475889f32ccfa91c9f59363480613fc807b6e3023d6f60b/multidict-6.7.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:8a3862568a36d26e650a19bb5cbbba14b71789032aebc0423f8cc5f150730184", size = 76877, upload-time = "2025-10-06T14:49:20.884Z" },
+    { url = "https://files.pythonhosted.org/packages/38/6f/614f09a04e6184f8824268fce4bc925e9849edfa654ddd59f0b64508c595/multidict-6.7.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:960c60b5849b9b4f9dcc9bea6e3626143c252c74113df2c1540aebce70209b45", size = 45467, upload-time = "2025-10-06T14:49:22.054Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/93/c4f67a436dd026f2e780c433277fff72be79152894d9fc36f44569cab1a6/multidict-6.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2049be98fb57a31b4ccf870bf377af2504d4ae35646a19037ec271e4c07998aa", size = 43834, upload-time = "2025-10-06T14:49:23.566Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/f5/013798161ca665e4a422afbc5e2d9e4070142a9ff8905e482139cd09e4d0/multidict-6.7.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0934f3843a1860dd465d38895c17fce1f1cb37295149ab05cd1b9a03afacb2a7", size = 250545, upload-time = "2025-10-06T14:49:24.882Z" },
+    { url = "https://files.pythonhosted.org/packages/71/2f/91dbac13e0ba94669ea5119ba267c9a832f0cb65419aca75549fcf09a3dc/multidict-6.7.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b3e34f3a1b8131ba06f1a73adab24f30934d148afcd5f5de9a73565a4404384e", size = 258305, upload-time = "2025-10-06T14:49:26.778Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/b0/754038b26f6e04488b48ac621f779c341338d78503fb45403755af2df477/multidict-6.7.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:efbb54e98446892590dc2458c19c10344ee9a883a79b5cec4bc34d6656e8d546", size = 242363, upload-time = "2025-10-06T14:49:28.562Z" },
+    { url = "https://files.pythonhosted.org/packages/87/15/9da40b9336a7c9fa606c4cf2ed80a649dffeb42b905d4f63a1d7eb17d746/multidict-6.7.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a35c5fc61d4f51eb045061e7967cfe3123d622cd500e8868e7c0c592a09fedc4", size = 268375, upload-time = "2025-10-06T14:49:29.96Z" },
+    { url = "https://files.pythonhosted.org/packages/82/72/c53fcade0cc94dfaad583105fd92b3a783af2091eddcb41a6d5a52474000/multidict-6.7.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:29fe6740ebccba4175af1b9b87bf553e9c15cd5868ee967e010efcf94e4fd0f1", size = 269346, upload-time = "2025-10-06T14:49:31.404Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/e2/9baffdae21a76f77ef8447f1a05a96ec4bc0a24dae08767abc0a2fe680b8/multidict-6.7.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:123e2a72e20537add2f33a79e605f6191fba2afda4cbb876e35c1a7074298a7d", size = 256107, upload-time = "2025-10-06T14:49:32.974Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/06/3f06f611087dc60d65ef775f1fb5aca7c6d61c6db4990e7cda0cef9b1651/multidict-6.7.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b284e319754366c1aee2267a2036248b24eeb17ecd5dc16022095e747f2f4304", size = 253592, upload-time = "2025-10-06T14:49:34.52Z" },
+    { url = "https://files.pythonhosted.org/packages/20/24/54e804ec7945b6023b340c412ce9c3f81e91b3bf5fa5ce65558740141bee/multidict-6.7.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:803d685de7be4303b5a657b76e2f6d1240e7e0a8aa2968ad5811fa2285553a12", size = 251024, upload-time = "2025-10-06T14:49:35.956Z" },
+    { url = "https://files.pythonhosted.org/packages/14/48/011cba467ea0b17ceb938315d219391d3e421dfd35928e5dbdc3f4ae76ef/multidict-6.7.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c04a328260dfd5db8c39538f999f02779012268f54614902d0afc775d44e0a62", size = 251484, upload-time = "2025-10-06T14:49:37.631Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/2f/919258b43bb35b99fa127435cfb2d91798eb3a943396631ef43e3720dcf4/multidict-6.7.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:8a19cdb57cd3df4cd865849d93ee14920fb97224300c88501f16ecfa2604b4e0", size = 263579, upload-time = "2025-10-06T14:49:39.502Z" },
+    { url = "https://files.pythonhosted.org/packages/31/22/a0e884d86b5242b5a74cf08e876bdf299e413016b66e55511f7a804a366e/multidict-6.7.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9b2fd74c52accced7e75de26023b7dccee62511a600e62311b918ec5c168fc2a", size = 259654, upload-time = "2025-10-06T14:49:41.32Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/e5/17e10e1b5c5f5a40f2fcbb45953c9b215f8a4098003915e46a93f5fcaa8f/multidict-6.7.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3e8bfdd0e487acf992407a140d2589fe598238eaeffa3da8448d63a63cd363f8", size = 251511, upload-time = "2025-10-06T14:49:46.021Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/9a/201bb1e17e7af53139597069c375e7b0dcbd47594604f65c2d5359508566/multidict-6.7.0-cp312-cp312-win32.whl", hash = "sha256:dd32a49400a2c3d52088e120ee00c1e3576cbff7e10b98467962c74fdb762ed4", size = 41895, upload-time = "2025-10-06T14:49:48.718Z" },
+    { url = "https://files.pythonhosted.org/packages/46/e2/348cd32faad84eaf1d20cce80e2bb0ef8d312c55bca1f7fa9865e7770aaf/multidict-6.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:92abb658ef2d7ef22ac9f8bb88e8b6c3e571671534e029359b6d9e845923eb1b", size = 46073, upload-time = "2025-10-06T14:49:50.28Z" },
+    { url = "https://files.pythonhosted.org/packages/25/ec/aad2613c1910dce907480e0c3aa306905830f25df2e54ccc9dea450cb5aa/multidict-6.7.0-cp312-cp312-win_arm64.whl", hash = "sha256:490dab541a6a642ce1a9d61a4781656b346a55c13038f0b1244653828e3a83ec", size = 43226, upload-time = "2025-10-06T14:49:52.304Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/86/33272a544eeb36d66e4d9a920602d1a2f57d4ebea4ef3cdfe5a912574c95/multidict-6.7.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:bee7c0588aa0076ce77c0ea5d19a68d76ad81fcd9fe8501003b9a24f9d4000f6", size = 76135, upload-time = "2025-10-06T14:49:54.26Z" },
+    { url = "https://files.pythonhosted.org/packages/91/1c/eb97db117a1ebe46d457a3d235a7b9d2e6dcab174f42d1b67663dd9e5371/multidict-6.7.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7ef6b61cad77091056ce0e7ce69814ef72afacb150b7ac6a3e9470def2198159", size = 45117, upload-time = "2025-10-06T14:49:55.82Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/d8/6c3442322e41fb1dd4de8bd67bfd11cd72352ac131f6368315617de752f1/multidict-6.7.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9c0359b1ec12b1d6849c59f9d319610b7f20ef990a6d454ab151aa0e3b9f78ca", size = 43472, upload-time = "2025-10-06T14:49:57.048Z" },
+    { url = "https://files.pythonhosted.org/packages/75/3f/e2639e80325af0b6c6febdf8e57cc07043ff15f57fa1ef808f4ccb5ac4cd/multidict-6.7.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:cd240939f71c64bd658f186330603aac1a9a81bf6273f523fca63673cb7378a8", size = 249342, upload-time = "2025-10-06T14:49:58.368Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/cc/84e0585f805cbeaa9cbdaa95f9a3d6aed745b9d25700623ac89a6ecff400/multidict-6.7.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a60a4d75718a5efa473ebd5ab685786ba0c67b8381f781d1be14da49f1a2dc60", size = 257082, upload-time = "2025-10-06T14:49:59.89Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/9c/ac851c107c92289acbbf5cfb485694084690c1b17e555f44952c26ddc5bd/multidict-6.7.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:53a42d364f323275126aff81fb67c5ca1b7a04fda0546245730a55c8c5f24bc4", size = 240704, upload-time = "2025-10-06T14:50:01.485Z" },
+    { url = "https://files.pythonhosted.org/packages/50/cc/5f93e99427248c09da95b62d64b25748a5f5c98c7c2ab09825a1d6af0e15/multidict-6.7.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3b29b980d0ddbecb736735ee5bef69bb2ddca56eff603c86f3f29a1128299b4f", size = 266355, upload-time = "2025-10-06T14:50:02.955Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/0c/2ec1d883ceb79c6f7f6d7ad90c919c898f5d1c6ea96d322751420211e072/multidict-6.7.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f8a93b1c0ed2d04b97a5e9336fd2d33371b9a6e29ab7dd6503d63407c20ffbaf", size = 267259, upload-time = "2025-10-06T14:50:04.446Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/2d/f0b184fa88d6630aa267680bdb8623fb69cb0d024b8c6f0d23f9a0f406d3/multidict-6.7.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9ff96e8815eecacc6645da76c413eb3b3d34cfca256c70b16b286a687d013c32", size = 254903, upload-time = "2025-10-06T14:50:05.98Z" },
+    { url = "https://files.pythonhosted.org/packages/06/c9/11ea263ad0df7dfabcad404feb3c0dd40b131bc7f232d5537f2fb1356951/multidict-6.7.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7516c579652f6a6be0e266aec0acd0db80829ca305c3d771ed898538804c2036", size = 252365, upload-time = "2025-10-06T14:50:07.511Z" },
+    { url = "https://files.pythonhosted.org/packages/41/88/d714b86ee2c17d6e09850c70c9d310abac3d808ab49dfa16b43aba9d53fd/multidict-6.7.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:040f393368e63fb0f3330e70c26bfd336656bed925e5cbe17c9da839a6ab13ec", size = 250062, upload-time = "2025-10-06T14:50:09.074Z" },
+    { url = "https://files.pythonhosted.org/packages/15/fe/ad407bb9e818c2b31383f6131ca19ea7e35ce93cf1310fce69f12e89de75/multidict-6.7.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b3bc26a951007b1057a1c543af845f1c7e3e71cc240ed1ace7bf4484aa99196e", size = 249683, upload-time = "2025-10-06T14:50:10.714Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/a4/a89abdb0229e533fb925e7c6e5c40201c2873efebc9abaf14046a4536ee6/multidict-6.7.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:7b022717c748dd1992a83e219587aabe45980d88969f01b316e78683e6285f64", size = 261254, upload-time = "2025-10-06T14:50:12.28Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/aa/0e2b27bd88b40a4fb8dc53dd74eecac70edaa4c1dd0707eb2164da3675b3/multidict-6.7.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:9600082733859f00d79dee64effc7aef1beb26adb297416a4ad2116fd61374bd", size = 257967, upload-time = "2025-10-06T14:50:14.16Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/8e/0c67b7120d5d5f6d874ed85a085f9dc770a7f9d8813e80f44a9fec820bb7/multidict-6.7.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:94218fcec4d72bc61df51c198d098ce2b378e0ccbac41ddbed5ef44092913288", size = 250085, upload-time = "2025-10-06T14:50:15.639Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/55/b73e1d624ea4b8fd4dd07a3bb70f6e4c7c6c5d9d640a41c6ffe5cdbd2a55/multidict-6.7.0-cp313-cp313-win32.whl", hash = "sha256:a37bd74c3fa9d00be2d7b8eca074dc56bd8077ddd2917a839bd989612671ed17", size = 41713, upload-time = "2025-10-06T14:50:17.066Z" },
+    { url = "https://files.pythonhosted.org/packages/32/31/75c59e7d3b4205075b4c183fa4ca398a2daf2303ddf616b04ae6ef55cffe/multidict-6.7.0-cp313-cp313-win_amd64.whl", hash = "sha256:30d193c6cc6d559db42b6bcec8a5d395d34d60c9877a0b71ecd7c204fcf15390", size = 45915, upload-time = "2025-10-06T14:50:18.264Z" },
+    { url = "https://files.pythonhosted.org/packages/31/2a/8987831e811f1184c22bc2e45844934385363ee61c0a2dcfa8f71b87e608/multidict-6.7.0-cp313-cp313-win_arm64.whl", hash = "sha256:ea3334cabe4d41b7ccd01e4d349828678794edbc2d3ae97fc162a3312095092e", size = 43077, upload-time = "2025-10-06T14:50:19.853Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/68/7b3a5170a382a340147337b300b9eb25a9ddb573bcdfff19c0fa3f31ffba/multidict-6.7.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:ad9ce259f50abd98a1ca0aa6e490b58c316a0fce0617f609723e40804add2c00", size = 83114, upload-time = "2025-10-06T14:50:21.223Z" },
+    { url = "https://files.pythonhosted.org/packages/55/5c/3fa2d07c84df4e302060f555bbf539310980362236ad49f50eeb0a1c1eb9/multidict-6.7.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:07f5594ac6d084cbb5de2df218d78baf55ef150b91f0ff8a21cc7a2e3a5a58eb", size = 48442, upload-time = "2025-10-06T14:50:22.871Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/56/67212d33239797f9bd91962bb899d72bb0f4c35a8652dcdb8ed049bef878/multidict-6.7.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:0591b48acf279821a579282444814a2d8d0af624ae0bc600aa4d1b920b6e924b", size = 46885, upload-time = "2025-10-06T14:50:24.258Z" },
+    { url = "https://files.pythonhosted.org/packages/46/d1/908f896224290350721597a61a69cd19b89ad8ee0ae1f38b3f5cd12ea2ac/multidict-6.7.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:749a72584761531d2b9467cfbdfd29487ee21124c304c4b6cb760d8777b27f9c", size = 242588, upload-time = "2025-10-06T14:50:25.716Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/67/8604288bbd68680eee0ab568fdcb56171d8b23a01bcd5cb0c8fedf6e5d99/multidict-6.7.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b4c3d199f953acd5b446bf7c0de1fe25d94e09e79086f8dc2f48a11a129cdf1", size = 249966, upload-time = "2025-10-06T14:50:28.192Z" },
+    { url = "https://files.pythonhosted.org/packages/20/33/9228d76339f1ba51e3efef7da3ebd91964d3006217aae13211653193c3ff/multidict-6.7.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:9fb0211dfc3b51efea2f349ec92c114d7754dd62c01f81c3e32b765b70c45c9b", size = 228618, upload-time = "2025-10-06T14:50:29.82Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/2d/25d9b566d10cab1c42b3b9e5b11ef79c9111eaf4463b8c257a3bd89e0ead/multidict-6.7.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a027ec240fe73a8d6281872690b988eed307cd7d91b23998ff35ff577ca688b5", size = 257539, upload-time = "2025-10-06T14:50:31.731Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/b1/8d1a965e6637fc33de3c0d8f414485c2b7e4af00f42cab3d84e7b955c222/multidict-6.7.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1d964afecdf3a8288789df2f5751dc0a8261138c3768d9af117ed384e538fad", size = 256345, upload-time = "2025-10-06T14:50:33.26Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/0c/06b5a8adbdeedada6f4fb8d8f193d44a347223b11939b42953eeb6530b6b/multidict-6.7.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:caf53b15b1b7df9fbd0709aa01409000a2b4dd03a5f6f5cc548183c7c8f8b63c", size = 247934, upload-time = "2025-10-06T14:50:34.808Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/31/b2491b5fe167ca044c6eb4b8f2c9f3b8a00b24c432c365358eadac5d7625/multidict-6.7.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:654030da3197d927f05a536a66186070e98765aa5142794c9904555d3a9d8fb5", size = 245243, upload-time = "2025-10-06T14:50:36.436Z" },
+    { url = "https://files.pythonhosted.org/packages/61/1a/982913957cb90406c8c94f53001abd9eafc271cb3e70ff6371590bec478e/multidict-6.7.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:2090d3718829d1e484706a2f525e50c892237b2bf9b17a79b059cb98cddc2f10", size = 235878, upload-time = "2025-10-06T14:50:37.953Z" },
+    { url = "https://files.pythonhosted.org/packages/be/c0/21435d804c1a1cf7a2608593f4d19bca5bcbd7a81a70b253fdd1c12af9c0/multidict-6.7.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2d2cfeec3f6f45651b3d408c4acec0ebf3daa9bc8a112a084206f5db5d05b754", size = 243452, upload-time = "2025-10-06T14:50:39.574Z" },
+    { url = "https://files.pythonhosted.org/packages/54/0a/4349d540d4a883863191be6eb9a928846d4ec0ea007d3dcd36323bb058ac/multidict-6.7.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:4ef089f985b8c194d341eb2c24ae6e7408c9a0e2e5658699c92f497437d88c3c", size = 252312, upload-time = "2025-10-06T14:50:41.612Z" },
+    { url = "https://files.pythonhosted.org/packages/26/64/d5416038dbda1488daf16b676e4dbfd9674dde10a0cc8f4fc2b502d8125d/multidict-6.7.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:e93a0617cd16998784bf4414c7e40f17a35d2350e5c6f0bd900d3a8e02bd3762", size = 246935, upload-time = "2025-10-06T14:50:43.972Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/8c/8290c50d14e49f35e0bd4abc25e1bc7711149ca9588ab7d04f886cdf03d9/multidict-6.7.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f0feece2ef8ebc42ed9e2e8c78fc4aa3cf455733b507c09ef7406364c94376c6", size = 243385, upload-time = "2025-10-06T14:50:45.648Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/a0/f83ae75e42d694b3fbad3e047670e511c138be747bc713cf1b10d5096416/multidict-6.7.0-cp313-cp313t-win32.whl", hash = "sha256:19a1d55338ec1be74ef62440ca9e04a2f001a04d0cc49a4983dc320ff0f3212d", size = 47777, upload-time = "2025-10-06T14:50:47.154Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/80/9b174a92814a3830b7357307a792300f42c9e94664b01dee8e457551fa66/multidict-6.7.0-cp313-cp313t-win_amd64.whl", hash = "sha256:3da4fb467498df97e986af166b12d01f05d2e04f978a9c1c680ea1988e0bc4b6", size = 53104, upload-time = "2025-10-06T14:50:48.851Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/28/04baeaf0428d95bb7a7bea0e691ba2f31394338ba424fb0679a9ed0f4c09/multidict-6.7.0-cp313-cp313t-win_arm64.whl", hash = "sha256:b4121773c49a0776461f4a904cdf6264c88e42218aaa8407e803ca8025872792", size = 45503, upload-time = "2025-10-06T14:50:50.16Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/b1/3da6934455dd4b261d4c72f897e3a5728eba81db59959f3a639245891baa/multidict-6.7.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3bab1e4aff7adaa34410f93b1f8e57c4b36b9af0426a76003f441ee1d3c7e842", size = 75128, upload-time = "2025-10-06T14:50:51.92Z" },
+    { url = "https://files.pythonhosted.org/packages/14/2c/f069cab5b51d175a1a2cb4ccdf7a2c2dabd58aa5bd933fa036a8d15e2404/multidict-6.7.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b8512bac933afc3e45fb2b18da8e59b78d4f408399a960339598374d4ae3b56b", size = 44410, upload-time = "2025-10-06T14:50:53.275Z" },
+    { url = "https://files.pythonhosted.org/packages/42/e2/64bb41266427af6642b6b128e8774ed84c11b80a90702c13ac0a86bb10cc/multidict-6.7.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:79dcf9e477bc65414ebfea98ffd013cb39552b5ecd62908752e0e413d6d06e38", size = 43205, upload-time = "2025-10-06T14:50:54.911Z" },
+    { url = "https://files.pythonhosted.org/packages/02/68/6b086fef8a3f1a8541b9236c594f0c9245617c29841f2e0395d979485cde/multidict-6.7.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:31bae522710064b5cbeddaf2e9f32b1abab70ac6ac91d42572502299e9953128", size = 245084, upload-time = "2025-10-06T14:50:56.369Z" },
+    { url = "https://files.pythonhosted.org/packages/15/ee/f524093232007cd7a75c1d132df70f235cfd590a7c9eaccd7ff422ef4ae8/multidict-6.7.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a0df7ff02397bb63e2fd22af2c87dfa39e8c7f12947bc524dbdc528282c7e34", size = 252667, upload-time = "2025-10-06T14:50:57.991Z" },
+    { url = "https://files.pythonhosted.org/packages/02/a5/eeb3f43ab45878f1895118c3ef157a480db58ede3f248e29b5354139c2c9/multidict-6.7.0-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:7a0222514e8e4c514660e182d5156a415c13ef0aabbd71682fc714e327b95e99", size = 233590, upload-time = "2025-10-06T14:50:59.589Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/1e/76d02f8270b97269d7e3dbd45644b1785bda457b474315f8cf999525a193/multidict-6.7.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2397ab4daaf2698eb51a76721e98db21ce4f52339e535725de03ea962b5a3202", size = 264112, upload-time = "2025-10-06T14:51:01.183Z" },
+    { url = "https://files.pythonhosted.org/packages/76/0b/c28a70ecb58963847c2a8efe334904cd254812b10e535aefb3bcce513918/multidict-6.7.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8891681594162635948a636c9fe0ff21746aeb3dd5463f6e25d9bea3a8a39ca1", size = 261194, upload-time = "2025-10-06T14:51:02.794Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/63/2ab26e4209773223159b83aa32721b4021ffb08102f8ac7d689c943fded1/multidict-6.7.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18706cc31dbf402a7945916dd5cddf160251b6dab8a2c5f3d6d5a55949f676b3", size = 248510, upload-time = "2025-10-06T14:51:04.724Z" },
+    { url = "https://files.pythonhosted.org/packages/93/cd/06c1fa8282af1d1c46fd55c10a7930af652afdce43999501d4d68664170c/multidict-6.7.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f844a1bbf1d207dd311a56f383f7eda2d0e134921d45751842d8235e7778965d", size = 248395, upload-time = "2025-10-06T14:51:06.306Z" },
+    { url = "https://files.pythonhosted.org/packages/99/ac/82cb419dd6b04ccf9e7e61befc00c77614fc8134362488b553402ecd55ce/multidict-6.7.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:d4393e3581e84e5645506923816b9cc81f5609a778c7e7534054091acc64d1c6", size = 239520, upload-time = "2025-10-06T14:51:08.091Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/f3/a0f9bf09493421bd8716a362e0cd1d244f5a6550f5beffdd6b47e885b331/multidict-6.7.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:fbd18dc82d7bf274b37aa48d664534330af744e03bccf696d6f4c6042e7d19e7", size = 245479, upload-time = "2025-10-06T14:51:10.365Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/01/476d38fc73a212843f43c852b0eee266b6971f0e28329c2184a8df90c376/multidict-6.7.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:b6234e14f9314731ec45c42fc4554b88133ad53a09092cc48a88e771c125dadb", size = 258903, upload-time = "2025-10-06T14:51:12.466Z" },
+    { url = "https://files.pythonhosted.org/packages/49/6d/23faeb0868adba613b817d0e69c5f15531b24d462af8012c4f6de4fa8dc3/multidict-6.7.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:08d4379f9744d8f78d98c8673c06e202ffa88296f009c71bbafe8a6bf847d01f", size = 252333, upload-time = "2025-10-06T14:51:14.48Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/cc/48d02ac22b30fa247f7dad82866e4b1015431092f4ba6ebc7e77596e0b18/multidict-6.7.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:9fe04da3f79387f450fd0061d4dd2e45a72749d31bf634aecc9e27f24fdc4b3f", size = 243411, upload-time = "2025-10-06T14:51:16.072Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/03/29a8bf5a18abf1fe34535c88adbdfa88c9fb869b5a3b120692c64abe8284/multidict-6.7.0-cp314-cp314-win32.whl", hash = "sha256:fbafe31d191dfa7c4c51f7a6149c9fb7e914dcf9ffead27dcfd9f1ae382b3885", size = 40940, upload-time = "2025-10-06T14:51:17.544Z" },
+    { url = "https://files.pythonhosted.org/packages/82/16/7ed27b680791b939de138f906d5cf2b4657b0d45ca6f5dd6236fdddafb1a/multidict-6.7.0-cp314-cp314-win_amd64.whl", hash = "sha256:2f67396ec0310764b9222a1728ced1ab638f61aadc6226f17a71dd9324f9a99c", size = 45087, upload-time = "2025-10-06T14:51:18.875Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/3c/e3e62eb35a1950292fe39315d3c89941e30a9d07d5d2df42965ab041da43/multidict-6.7.0-cp314-cp314-win_arm64.whl", hash = "sha256:ba672b26069957ee369cfa7fc180dde1fc6f176eaf1e6beaf61fbebbd3d9c000", size = 42368, upload-time = "2025-10-06T14:51:20.225Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/40/cd499bd0dbc5f1136726db3153042a735fffd0d77268e2ee20d5f33c010f/multidict-6.7.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:c1dcc7524066fa918c6a27d61444d4ee7900ec635779058571f70d042d86ed63", size = 82326, upload-time = "2025-10-06T14:51:21.588Z" },
+    { url = "https://files.pythonhosted.org/packages/13/8a/18e031eca251c8df76daf0288e6790561806e439f5ce99a170b4af30676b/multidict-6.7.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:27e0b36c2d388dc7b6ced3406671b401e84ad7eb0656b8f3a2f46ed0ce483718", size = 48065, upload-time = "2025-10-06T14:51:22.93Z" },
+    { url = "https://files.pythonhosted.org/packages/40/71/5e6701277470a87d234e433fb0a3a7deaf3bcd92566e421e7ae9776319de/multidict-6.7.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2a7baa46a22e77f0988e3b23d4ede5513ebec1929e34ee9495be535662c0dfe2", size = 46475, upload-time = "2025-10-06T14:51:24.352Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/6a/bab00cbab6d9cfb57afe1663318f72ec28289ea03fd4e8236bb78429893a/multidict-6.7.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7bf77f54997a9166a2f5675d1201520586439424c2511723a7312bdb4bcc034e", size = 239324, upload-time = "2025-10-06T14:51:25.822Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/5f/8de95f629fc22a7769ade8b41028e3e5a822c1f8904f618d175945a81ad3/multidict-6.7.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e011555abada53f1578d63389610ac8a5400fc70ce71156b0aa30d326f1a5064", size = 246877, upload-time = "2025-10-06T14:51:27.604Z" },
+    { url = "https://files.pythonhosted.org/packages/23/b4/38881a960458f25b89e9f4a4fdcb02ac101cfa710190db6e5528841e67de/multidict-6.7.0-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:28b37063541b897fd6a318007373930a75ca6d6ac7c940dbe14731ffdd8d498e", size = 225824, upload-time = "2025-10-06T14:51:29.664Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/39/6566210c83f8a261575f18e7144736059f0c460b362e96e9cf797a24b8e7/multidict-6.7.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:05047ada7a2fde2631a0ed706f1fd68b169a681dfe5e4cf0f8e4cb6618bbc2cd", size = 253558, upload-time = "2025-10-06T14:51:31.684Z" },
+    { url = "https://files.pythonhosted.org/packages/00/a3/67f18315100f64c269f46e6c0319fa87ba68f0f64f2b8e7fd7c72b913a0b/multidict-6.7.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:716133f7d1d946a4e1b91b1756b23c088881e70ff180c24e864c26192ad7534a", size = 252339, upload-time = "2025-10-06T14:51:33.699Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/2a/1cb77266afee2458d82f50da41beba02159b1d6b1f7973afc9a1cad1499b/multidict-6.7.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d1bed1b467ef657f2a0ae62844a607909ef1c6889562de5e1d505f74457d0b96", size = 244895, upload-time = "2025-10-06T14:51:36.189Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/72/09fa7dd487f119b2eb9524946ddd36e2067c08510576d43ff68469563b3b/multidict-6.7.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ca43bdfa5d37bd6aee89d85e1d0831fb86e25541be7e9d376ead1b28974f8e5e", size = 241862, upload-time = "2025-10-06T14:51:41.291Z" },
+    { url = "https://files.pythonhosted.org/packages/65/92/bc1f8bd0853d8669300f732c801974dfc3702c3eeadae2f60cef54dc69d7/multidict-6.7.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:44b546bd3eb645fd26fb949e43c02a25a2e632e2ca21a35e2e132c8105dc8599", size = 232376, upload-time = "2025-10-06T14:51:43.55Z" },
+    { url = "https://files.pythonhosted.org/packages/09/86/ac39399e5cb9d0c2ac8ef6e10a768e4d3bc933ac808d49c41f9dc23337eb/multidict-6.7.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:a6ef16328011d3f468e7ebc326f24c1445f001ca1dec335b2f8e66bed3006394", size = 240272, upload-time = "2025-10-06T14:51:45.265Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/b6/fed5ac6b8563ec72df6cb1ea8dac6d17f0a4a1f65045f66b6d3bf1497c02/multidict-6.7.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:5aa873cbc8e593d361ae65c68f85faadd755c3295ea2c12040ee146802f23b38", size = 248774, upload-time = "2025-10-06T14:51:46.836Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/8d/b954d8c0dc132b68f760aefd45870978deec6818897389dace00fcde32ff/multidict-6.7.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:3d7b6ccce016e29df4b7ca819659f516f0bc7a4b3efa3bb2012ba06431b044f9", size = 242731, upload-time = "2025-10-06T14:51:48.541Z" },
+    { url = "https://files.pythonhosted.org/packages/16/9d/a2dac7009125d3540c2f54e194829ea18ac53716c61b655d8ed300120b0f/multidict-6.7.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:171b73bd4ee683d307599b66793ac80981b06f069b62eea1c9e29c9241aa66b0", size = 240193, upload-time = "2025-10-06T14:51:50.355Z" },
+    { url = "https://files.pythonhosted.org/packages/39/ca/c05f144128ea232ae2178b008d5011d4e2cea86e4ee8c85c2631b1b94802/multidict-6.7.0-cp314-cp314t-win32.whl", hash = "sha256:b2d7f80c4e1fd010b07cb26820aae86b7e73b681ee4889684fb8d2d4537aab13", size = 48023, upload-time = "2025-10-06T14:51:51.883Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/8f/0a60e501584145588be1af5cc829265701ba3c35a64aec8e07cbb71d39bb/multidict-6.7.0-cp314-cp314t-win_amd64.whl", hash = "sha256:09929cab6fcb68122776d575e03c6cc64ee0b8fca48d17e135474b042ce515cd", size = 53507, upload-time = "2025-10-06T14:51:53.672Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/ae/3148b988a9c6239903e786eac19c889fab607c31d6efa7fb2147e5680f23/multidict-6.7.0-cp314-cp314t-win_arm64.whl", hash = "sha256:cc41db090ed742f32bd2d2c721861725e6109681eddf835d0a82bd3a5c382827", size = 44804, upload-time = "2025-10-06T14:51:55.415Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/da/7d22601b625e241d4f23ef1ebff8acfc60da633c9e7e7922e24d10f592b3/multidict-6.7.0-py3-none-any.whl", hash = "sha256:394fc5c42a333c9ffc3e421a4c85e08580d990e08b99f6bf35b4132114c5dcb3", size = 12317, upload-time = "2025-10-06T14:52:29.272Z" },
+]
+
+[[package]]
+name = "pferd"
+source = { editable = "." }
+dependencies = [
+    { name = "aiohttp" },
+    { name = "beautifulsoup4" },
+    { name = "certifi" },
+    { name = "keyring" },
+    { name = "rich" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "aiohttp", specifier = ">=3.8.1" },
+    { name = "beautifulsoup4", specifier = ">=4.10.0" },
+    { name = "certifi", specifier = ">=2021.10.8" },
+    { name = "keyring", specifier = ">=23.5.0" },
+    { name = "rich", specifier = ">=11.0.0" },
+]
+
+[[package]]
+name = "propcache"
+version = "0.4.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/9e/da/e9fc233cf63743258bff22b3dfa7ea5baef7b5bc324af47a0ad89b8ffc6f/propcache-0.4.1.tar.gz", hash = "sha256:f48107a8c637e80362555f37ecf49abe20370e557cc4ab374f04ec4423c97c3d", size = 46442, upload-time = "2025-10-08T19:49:02.291Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8c/d4/4e2c9aaf7ac2242b9358f98dccd8f90f2605402f5afeff6c578682c2c491/propcache-0.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:60a8fda9644b7dfd5dece8c61d8a85e271cb958075bfc4e01083c148b61a7caf", size = 80208, upload-time = "2025-10-08T19:46:24.597Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/21/d7b68e911f9c8e18e4ae43bdbc1e1e9bbd971f8866eb81608947b6f585ff/propcache-0.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c30b53e7e6bda1d547cabb47c825f3843a0a1a42b0496087bb58d8fedf9f41b5", size = 45777, upload-time = "2025-10-08T19:46:25.733Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/1d/11605e99ac8ea9435651ee71ab4cb4bf03f0949586246476a25aadfec54a/propcache-0.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6918ecbd897443087a3b7cd978d56546a812517dcaaca51b49526720571fa93e", size = 47647, upload-time = "2025-10-08T19:46:27.304Z" },
+    { url = "https://files.pythonhosted.org/packages/58/1a/3c62c127a8466c9c843bccb503d40a273e5cc69838805f322e2826509e0d/propcache-0.4.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3d902a36df4e5989763425a8ab9e98cd8ad5c52c823b34ee7ef307fd50582566", size = 214929, upload-time = "2025-10-08T19:46:28.62Z" },
+    { url = "https://files.pythonhosted.org/packages/56/b9/8fa98f850960b367c4b8fe0592e7fc341daa7a9462e925228f10a60cf74f/propcache-0.4.1-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a9695397f85973bb40427dedddf70d8dc4a44b22f1650dd4af9eedf443d45165", size = 221778, upload-time = "2025-10-08T19:46:30.358Z" },
+    { url = "https://files.pythonhosted.org/packages/46/a6/0ab4f660eb59649d14b3d3d65c439421cf2f87fe5dd68591cbe3c1e78a89/propcache-0.4.1-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2bb07ffd7eaad486576430c89f9b215f9e4be68c4866a96e97db9e97fead85dc", size = 228144, upload-time = "2025-10-08T19:46:32.607Z" },
+    { url = "https://files.pythonhosted.org/packages/52/6a/57f43e054fb3d3a56ac9fc532bc684fc6169a26c75c353e65425b3e56eef/propcache-0.4.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fd6f30fdcf9ae2a70abd34da54f18da086160e4d7d9251f81f3da0ff84fc5a48", size = 210030, upload-time = "2025-10-08T19:46:33.969Z" },
+    { url = "https://files.pythonhosted.org/packages/40/e2/27e6feebb5f6b8408fa29f5efbb765cd54c153ac77314d27e457a3e993b7/propcache-0.4.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:fc38cba02d1acba4e2869eef1a57a43dfbd3d49a59bf90dda7444ec2be6a5570", size = 208252, upload-time = "2025-10-08T19:46:35.309Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/f8/91c27b22ccda1dbc7967f921c42825564fa5336a01ecd72eb78a9f4f53c2/propcache-0.4.1-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:67fad6162281e80e882fb3ec355398cf72864a54069d060321f6cd0ade95fe85", size = 202064, upload-time = "2025-10-08T19:46:36.993Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/26/7f00bd6bd1adba5aafe5f4a66390f243acab58eab24ff1a08bebb2ef9d40/propcache-0.4.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f10207adf04d08bec185bae14d9606a1444715bc99180f9331c9c02093e1959e", size = 212429, upload-time = "2025-10-08T19:46:38.398Z" },
+    { url = "https://files.pythonhosted.org/packages/84/89/fd108ba7815c1117ddca79c228f3f8a15fc82a73bca8b142eb5de13b2785/propcache-0.4.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:e9b0d8d0845bbc4cfcdcbcdbf5086886bc8157aa963c31c777ceff7846c77757", size = 216727, upload-time = "2025-10-08T19:46:39.732Z" },
+    { url = "https://files.pythonhosted.org/packages/79/37/3ec3f7e3173e73f1d600495d8b545b53802cbf35506e5732dd8578db3724/propcache-0.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:981333cb2f4c1896a12f4ab92a9cc8f09ea664e9b7dbdc4eff74627af3a11c0f", size = 205097, upload-time = "2025-10-08T19:46:41.025Z" },
+    { url = "https://files.pythonhosted.org/packages/61/b0/b2631c19793f869d35f47d5a3a56fb19e9160d3c119f15ac7344fc3ccae7/propcache-0.4.1-cp311-cp311-win32.whl", hash = "sha256:f1d2f90aeec838a52f1c1a32fe9a619fefd5e411721a9117fbf82aea638fe8a1", size = 38084, upload-time = "2025-10-08T19:46:42.693Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/78/6cce448e2098e9f3bfc91bb877f06aa24b6ccace872e39c53b2f707c4648/propcache-0.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:364426a62660f3f699949ac8c621aad6977be7126c5807ce48c0aeb8e7333ea6", size = 41637, upload-time = "2025-10-08T19:46:43.778Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/e9/754f180cccd7f51a39913782c74717c581b9cc8177ad0e949f4d51812383/propcache-0.4.1-cp311-cp311-win_arm64.whl", hash = "sha256:e53f3a38d3510c11953f3e6a33f205c6d1b001129f972805ca9b42fc308bc239", size = 38064, upload-time = "2025-10-08T19:46:44.872Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/0f/f17b1b2b221d5ca28b4b876e8bb046ac40466513960646bda8e1853cdfa2/propcache-0.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e153e9cd40cc8945138822807139367f256f89c6810c2634a4f6902b52d3b4e2", size = 80061, upload-time = "2025-10-08T19:46:46.075Z" },
+    { url = "https://files.pythonhosted.org/packages/76/47/8ccf75935f51448ba9a16a71b783eb7ef6b9ee60f5d14c7f8a8a79fbeed7/propcache-0.4.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:cd547953428f7abb73c5ad82cbb32109566204260d98e41e5dfdc682eb7f8403", size = 46037, upload-time = "2025-10-08T19:46:47.23Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/b6/5c9a0e42df4d00bfb4a3cbbe5cf9f54260300c88a0e9af1f47ca5ce17ac0/propcache-0.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f048da1b4f243fc44f205dfd320933a951b8d89e0afd4c7cacc762a8b9165207", size = 47324, upload-time = "2025-10-08T19:46:48.384Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/d3/6c7ee328b39a81ee877c962469f1e795f9db87f925251efeb0545e0020d0/propcache-0.4.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ec17c65562a827bba85e3872ead335f95405ea1674860d96483a02f5c698fa72", size = 225505, upload-time = "2025-10-08T19:46:50.055Z" },
+    { url = "https://files.pythonhosted.org/packages/01/5d/1c53f4563490b1d06a684742cc6076ef944bc6457df6051b7d1a877c057b/propcache-0.4.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:405aac25c6394ef275dee4c709be43745d36674b223ba4eb7144bf4d691b7367", size = 230242, upload-time = "2025-10-08T19:46:51.815Z" },
+    { url = "https://files.pythonhosted.org/packages/20/e1/ce4620633b0e2422207c3cb774a0ee61cac13abc6217763a7b9e2e3f4a12/propcache-0.4.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0013cb6f8dde4b2a2f66903b8ba740bdfe378c943c4377a200551ceb27f379e4", size = 238474, upload-time = "2025-10-08T19:46:53.208Z" },
+    { url = "https://files.pythonhosted.org/packages/46/4b/3aae6835b8e5f44ea6a68348ad90f78134047b503765087be2f9912140ea/propcache-0.4.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:15932ab57837c3368b024473a525e25d316d8353016e7cc0e5ba9eb343fbb1cf", size = 221575, upload-time = "2025-10-08T19:46:54.511Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/a5/8a5e8678bcc9d3a1a15b9a29165640d64762d424a16af543f00629c87338/propcache-0.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:031dce78b9dc099f4c29785d9cf5577a3faf9ebf74ecbd3c856a7b92768c3df3", size = 216736, upload-time = "2025-10-08T19:46:56.212Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/63/b7b215eddeac83ca1c6b934f89d09a625aa9ee4ba158338854c87210cc36/propcache-0.4.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:ab08df6c9a035bee56e31af99be621526bd237bea9f32def431c656b29e41778", size = 213019, upload-time = "2025-10-08T19:46:57.595Z" },
+    { url = "https://files.pythonhosted.org/packages/57/74/f580099a58c8af587cac7ba19ee7cb418506342fbbe2d4a4401661cca886/propcache-0.4.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4d7af63f9f93fe593afbf104c21b3b15868efb2c21d07d8732c0c4287e66b6a6", size = 220376, upload-time = "2025-10-08T19:46:59.067Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/ee/542f1313aff7eaf19c2bb758c5d0560d2683dac001a1c96d0774af799843/propcache-0.4.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:cfc27c945f422e8b5071b6e93169679e4eb5bf73bbcbf1ba3ae3a83d2f78ebd9", size = 226988, upload-time = "2025-10-08T19:47:00.544Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/18/9c6b015dd9c6930f6ce2229e1f02fb35298b847f2087ea2b436a5bfa7287/propcache-0.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:35c3277624a080cc6ec6f847cbbbb5b49affa3598c4535a0a4682a697aaa5c75", size = 215615, upload-time = "2025-10-08T19:47:01.968Z" },
+    { url = "https://files.pythonhosted.org/packages/80/9e/e7b85720b98c45a45e1fca6a177024934dc9bc5f4d5dd04207f216fc33ed/propcache-0.4.1-cp312-cp312-win32.whl", hash = "sha256:671538c2262dadb5ba6395e26c1731e1d52534bfe9ae56d0b5573ce539266aa8", size = 38066, upload-time = "2025-10-08T19:47:03.503Z" },
+    { url = "https://files.pythonhosted.org/packages/54/09/d19cff2a5aaac632ec8fc03737b223597b1e347416934c1b3a7df079784c/propcache-0.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:cb2d222e72399fcf5890d1d5cc1060857b9b236adff2792ff48ca2dfd46c81db", size = 41655, upload-time = "2025-10-08T19:47:04.973Z" },
+    { url = "https://files.pythonhosted.org/packages/68/ab/6b5c191bb5de08036a8c697b265d4ca76148efb10fa162f14af14fb5f076/propcache-0.4.1-cp312-cp312-win_arm64.whl", hash = "sha256:204483131fb222bdaaeeea9f9e6c6ed0cac32731f75dfc1d4a567fc1926477c1", size = 37789, upload-time = "2025-10-08T19:47:06.077Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/df/6d9c1b6ac12b003837dde8a10231a7344512186e87b36e855bef32241942/propcache-0.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:43eedf29202c08550aac1d14e0ee619b0430aaef78f85864c1a892294fbc28cf", size = 77750, upload-time = "2025-10-08T19:47:07.648Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/e8/677a0025e8a2acf07d3418a2e7ba529c9c33caf09d3c1f25513023c1db56/propcache-0.4.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d62cdfcfd89ccb8de04e0eda998535c406bf5e060ffd56be6c586cbcc05b3311", size = 44780, upload-time = "2025-10-08T19:47:08.851Z" },
+    { url = "https://files.pythonhosted.org/packages/89/a4/92380f7ca60f99ebae761936bc48a72a639e8a47b29050615eef757cb2a7/propcache-0.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cae65ad55793da34db5f54e4029b89d3b9b9490d8abe1b4c7ab5d4b8ec7ebf74", size = 46308, upload-time = "2025-10-08T19:47:09.982Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/48/c5ac64dee5262044348d1d78a5f85dd1a57464a60d30daee946699963eb3/propcache-0.4.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:333ddb9031d2704a301ee3e506dc46b1fe5f294ec198ed6435ad5b6a085facfe", size = 208182, upload-time = "2025-10-08T19:47:11.319Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/0c/cd762dd011a9287389a6a3eb43aa30207bde253610cca06824aeabfe9653/propcache-0.4.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:fd0858c20f078a32cf55f7e81473d96dcf3b93fd2ccdb3d40fdf54b8573df3af", size = 211215, upload-time = "2025-10-08T19:47:13.146Z" },
+    { url = "https://files.pythonhosted.org/packages/30/3e/49861e90233ba36890ae0ca4c660e95df565b2cd15d4a68556ab5865974e/propcache-0.4.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:678ae89ebc632c5c204c794f8dab2837c5f159aeb59e6ed0539500400577298c", size = 218112, upload-time = "2025-10-08T19:47:14.913Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/8b/544bc867e24e1bd48f3118cecd3b05c694e160a168478fa28770f22fd094/propcache-0.4.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d472aeb4fbf9865e0c6d622d7f4d54a4e101a89715d8904282bb5f9a2f476c3f", size = 204442, upload-time = "2025-10-08T19:47:16.277Z" },
+    { url = "https://files.pythonhosted.org/packages/50/a6/4282772fd016a76d3e5c0df58380a5ea64900afd836cec2c2f662d1b9bb3/propcache-0.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4d3df5fa7e36b3225954fba85589da77a0fe6a53e3976de39caf04a0db4c36f1", size = 199398, upload-time = "2025-10-08T19:47:17.962Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/ec/d8a7cd406ee1ddb705db2139f8a10a8a427100347bd698e7014351c7af09/propcache-0.4.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:ee17f18d2498f2673e432faaa71698032b0127ebf23ae5974eeaf806c279df24", size = 196920, upload-time = "2025-10-08T19:47:19.355Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/6c/f38ab64af3764f431e359f8baf9e0a21013e24329e8b85d2da32e8ed07ca/propcache-0.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:580e97762b950f993ae618e167e7be9256b8353c2dcd8b99ec100eb50f5286aa", size = 203748, upload-time = "2025-10-08T19:47:21.338Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/e3/fa846bd70f6534d647886621388f0a265254d30e3ce47e5c8e6e27dbf153/propcache-0.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:501d20b891688eb8e7aa903021f0b72d5a55db40ffaab27edefd1027caaafa61", size = 205877, upload-time = "2025-10-08T19:47:23.059Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/39/8163fc6f3133fea7b5f2827e8eba2029a0277ab2c5beee6c1db7b10fc23d/propcache-0.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a0bd56e5b100aef69bd8562b74b46254e7c8812918d3baa700c8a8009b0af66", size = 199437, upload-time = "2025-10-08T19:47:24.445Z" },
+    { url = "https://files.pythonhosted.org/packages/93/89/caa9089970ca49c7c01662bd0eeedfe85494e863e8043565aeb6472ce8fe/propcache-0.4.1-cp313-cp313-win32.whl", hash = "sha256:bcc9aaa5d80322bc2fb24bb7accb4a30f81e90ab8d6ba187aec0744bc302ad81", size = 37586, upload-time = "2025-10-08T19:47:25.736Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/ab/f76ec3c3627c883215b5c8080debb4394ef5a7a29be811f786415fc1e6fd/propcache-0.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:381914df18634f5494334d201e98245c0596067504b9372d8cf93f4bb23e025e", size = 40790, upload-time = "2025-10-08T19:47:26.847Z" },
+    { url = "https://files.pythonhosted.org/packages/59/1b/e71ae98235f8e2ba5004d8cb19765a74877abf189bc53fc0c80d799e56c3/propcache-0.4.1-cp313-cp313-win_arm64.whl", hash = "sha256:8873eb4460fd55333ea49b7d189749ecf6e55bf85080f11b1c4530ed3034cba1", size = 37158, upload-time = "2025-10-08T19:47:27.961Z" },
+    { url = "https://files.pythonhosted.org/packages/83/ce/a31bbdfc24ee0dcbba458c8175ed26089cf109a55bbe7b7640ed2470cfe9/propcache-0.4.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:92d1935ee1f8d7442da9c0c4fa7ac20d07e94064184811b685f5c4fada64553b", size = 81451, upload-time = "2025-10-08T19:47:29.445Z" },
+    { url = "https://files.pythonhosted.org/packages/25/9c/442a45a470a68456e710d96cacd3573ef26a1d0a60067e6a7d5e655621ed/propcache-0.4.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:473c61b39e1460d386479b9b2f337da492042447c9b685f28be4f74d3529e566", size = 46374, upload-time = "2025-10-08T19:47:30.579Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/bf/b1d5e21dbc3b2e889ea4327044fb16312a736d97640fb8b6aa3f9c7b3b65/propcache-0.4.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:c0ef0aaafc66fbd87842a3fe3902fd889825646bc21149eafe47be6072725835", size = 48396, upload-time = "2025-10-08T19:47:31.79Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/04/5b4c54a103d480e978d3c8a76073502b18db0c4bc17ab91b3cb5092ad949/propcache-0.4.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f95393b4d66bfae908c3ca8d169d5f79cd65636ae15b5e7a4f6e67af675adb0e", size = 275950, upload-time = "2025-10-08T19:47:33.481Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/c1/86f846827fb969c4b78b0af79bba1d1ea2156492e1b83dea8b8a6ae27395/propcache-0.4.1-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c07fda85708bc48578467e85099645167a955ba093be0a2dcba962195676e859", size = 273856, upload-time = "2025-10-08T19:47:34.906Z" },
+    { url = "https://files.pythonhosted.org/packages/36/1d/fc272a63c8d3bbad6878c336c7a7dea15e8f2d23a544bda43205dfa83ada/propcache-0.4.1-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:af223b406d6d000830c6f65f1e6431783fc3f713ba3e6cc8c024d5ee96170a4b", size = 280420, upload-time = "2025-10-08T19:47:36.338Z" },
+    { url = "https://files.pythonhosted.org/packages/07/0c/01f2219d39f7e53d52e5173bcb09c976609ba30209912a0680adfb8c593a/propcache-0.4.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a78372c932c90ee474559c5ddfffd718238e8673c340dc21fe45c5b8b54559a0", size = 263254, upload-time = "2025-10-08T19:47:37.692Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/18/cd28081658ce597898f0c4d174d4d0f3c5b6d4dc27ffafeef835c95eb359/propcache-0.4.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:564d9f0d4d9509e1a870c920a89b2fec951b44bf5ba7d537a9e7c1ccec2c18af", size = 261205, upload-time = "2025-10-08T19:47:39.659Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/71/1f9e22eb8b8316701c2a19fa1f388c8a3185082607da8e406a803c9b954e/propcache-0.4.1-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:17612831fda0138059cc5546f4d12a2aacfb9e47068c06af35c400ba58ba7393", size = 247873, upload-time = "2025-10-08T19:47:41.084Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/65/3d4b61f36af2b4eddba9def857959f1016a51066b4f1ce348e0cf7881f58/propcache-0.4.1-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:41a89040cb10bd345b3c1a873b2bf36413d48da1def52f268a055f7398514874", size = 262739, upload-time = "2025-10-08T19:47:42.51Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/42/26746ab087faa77c1c68079b228810436ccd9a5ce9ac85e2b7307195fd06/propcache-0.4.1-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:e35b88984e7fa64aacecea39236cee32dd9bd8c55f57ba8a75cf2399553f9bd7", size = 263514, upload-time = "2025-10-08T19:47:43.927Z" },
+    { url = "https://files.pythonhosted.org/packages/94/13/630690fe201f5502d2403dd3cfd451ed8858fe3c738ee88d095ad2ff407b/propcache-0.4.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6f8b465489f927b0df505cbe26ffbeed4d6d8a2bbc61ce90eb074ff129ef0ab1", size = 257781, upload-time = "2025-10-08T19:47:45.448Z" },
+    { url = "https://files.pythonhosted.org/packages/92/f7/1d4ec5841505f423469efbfc381d64b7b467438cd5a4bbcbb063f3b73d27/propcache-0.4.1-cp313-cp313t-win32.whl", hash = "sha256:2ad890caa1d928c7c2965b48f3a3815c853180831d0e5503d35cf00c472f4717", size = 41396, upload-time = "2025-10-08T19:47:47.202Z" },
+    { url = "https://files.pythonhosted.org/packages/48/f0/615c30622316496d2cbbc29f5985f7777d3ada70f23370608c1d3e081c1f/propcache-0.4.1-cp313-cp313t-win_amd64.whl", hash = "sha256:f7ee0e597f495cf415bcbd3da3caa3bd7e816b74d0d52b8145954c5e6fd3ff37", size = 44897, upload-time = "2025-10-08T19:47:48.336Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/ca/6002e46eccbe0e33dcd4069ef32f7f1c9e243736e07adca37ae8c4830ec3/propcache-0.4.1-cp313-cp313t-win_arm64.whl", hash = "sha256:929d7cbe1f01bb7baffb33dc14eb5691c95831450a26354cd210a8155170c93a", size = 39789, upload-time = "2025-10-08T19:47:49.876Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/5c/bca52d654a896f831b8256683457ceddd490ec18d9ec50e97dfd8fc726a8/propcache-0.4.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3f7124c9d820ba5548d431afb4632301acf965db49e666aa21c305cbe8c6de12", size = 78152, upload-time = "2025-10-08T19:47:51.051Z" },
+    { url = "https://files.pythonhosted.org/packages/65/9b/03b04e7d82a5f54fb16113d839f5ea1ede58a61e90edf515f6577c66fa8f/propcache-0.4.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:c0d4b719b7da33599dfe3b22d3db1ef789210a0597bc650b7cee9c77c2be8c5c", size = 44869, upload-time = "2025-10-08T19:47:52.594Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/fa/89a8ef0468d5833a23fff277b143d0573897cf75bd56670a6d28126c7d68/propcache-0.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9f302f4783709a78240ebc311b793f123328716a60911d667e0c036bc5dcbded", size = 46596, upload-time = "2025-10-08T19:47:54.073Z" },
+    { url = "https://files.pythonhosted.org/packages/86/bd/47816020d337f4a746edc42fe8d53669965138f39ee117414c7d7a340cfe/propcache-0.4.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c80ee5802e3fb9ea37938e7eecc307fb984837091d5fd262bb37238b1ae97641", size = 206981, upload-time = "2025-10-08T19:47:55.715Z" },
+    { url = "https://files.pythonhosted.org/packages/df/f6/c5fa1357cc9748510ee55f37173eb31bfde6d94e98ccd9e6f033f2fc06e1/propcache-0.4.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ed5a841e8bb29a55fb8159ed526b26adc5bdd7e8bd7bf793ce647cb08656cdf4", size = 211490, upload-time = "2025-10-08T19:47:57.499Z" },
+    { url = "https://files.pythonhosted.org/packages/80/1e/e5889652a7c4a3846683401a48f0f2e5083ce0ec1a8a5221d8058fbd1adf/propcache-0.4.1-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:55c72fd6ea2da4c318e74ffdf93c4fe4e926051133657459131a95c846d16d44", size = 215371, upload-time = "2025-10-08T19:47:59.317Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/f2/889ad4b2408f72fe1a4f6a19491177b30ea7bf1a0fd5f17050ca08cfc882/propcache-0.4.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8326e144341460402713f91df60ade3c999d601e7eb5ff8f6f7862d54de0610d", size = 201424, upload-time = "2025-10-08T19:48:00.67Z" },
+    { url = "https://files.pythonhosted.org/packages/27/73/033d63069b57b0812c8bd19f311faebeceb6ba31b8f32b73432d12a0b826/propcache-0.4.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:060b16ae65bc098da7f6d25bf359f1f31f688384858204fe5d652979e0015e5b", size = 197566, upload-time = "2025-10-08T19:48:02.604Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/89/ce24f3dc182630b4e07aa6d15f0ff4b14ed4b9955fae95a0b54c58d66c05/propcache-0.4.1-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:89eb3fa9524f7bec9de6e83cf3faed9d79bffa560672c118a96a171a6f55831e", size = 193130, upload-time = "2025-10-08T19:48:04.499Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/24/ef0d5fd1a811fb5c609278d0209c9f10c35f20581fcc16f818da959fc5b4/propcache-0.4.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:dee69d7015dc235f526fe80a9c90d65eb0039103fe565776250881731f06349f", size = 202625, upload-time = "2025-10-08T19:48:06.213Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/02/98ec20ff5546f68d673df2f7a69e8c0d076b5abd05ca882dc7ee3a83653d/propcache-0.4.1-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:5558992a00dfd54ccbc64a32726a3357ec93825a418a401f5cc67df0ac5d9e49", size = 204209, upload-time = "2025-10-08T19:48:08.432Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/87/492694f76759b15f0467a2a93ab68d32859672b646aa8a04ce4864e7932d/propcache-0.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:c9b822a577f560fbd9554812526831712c1436d2c046cedee4c3796d3543b144", size = 197797, upload-time = "2025-10-08T19:48:09.968Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/36/66367de3575db1d2d3f3d177432bd14ee577a39d3f5d1b3d5df8afe3b6e2/propcache-0.4.1-cp314-cp314-win32.whl", hash = "sha256:ab4c29b49d560fe48b696cdcb127dd36e0bc2472548f3bf56cc5cb3da2b2984f", size = 38140, upload-time = "2025-10-08T19:48:11.232Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/2a/a758b47de253636e1b8aef181c0b4f4f204bf0dd964914fb2af90a95b49b/propcache-0.4.1-cp314-cp314-win_amd64.whl", hash = "sha256:5a103c3eb905fcea0ab98be99c3a9a5ab2de60228aa5aceedc614c0281cf6153", size = 41257, upload-time = "2025-10-08T19:48:12.707Z" },
+    { url = "https://files.pythonhosted.org/packages/34/5e/63bd5896c3fec12edcbd6f12508d4890d23c265df28c74b175e1ef9f4f3b/propcache-0.4.1-cp314-cp314-win_arm64.whl", hash = "sha256:74c1fb26515153e482e00177a1ad654721bf9207da8a494a0c05e797ad27b992", size = 38097, upload-time = "2025-10-08T19:48:13.923Z" },
+    { url = "https://files.pythonhosted.org/packages/99/85/9ff785d787ccf9bbb3f3106f79884a130951436f58392000231b4c737c80/propcache-0.4.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:824e908bce90fb2743bd6b59db36eb4f45cd350a39637c9f73b1c1ea66f5b75f", size = 81455, upload-time = "2025-10-08T19:48:15.16Z" },
+    { url = "https://files.pythonhosted.org/packages/90/85/2431c10c8e7ddb1445c1f7c4b54d886e8ad20e3c6307e7218f05922cad67/propcache-0.4.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c2b5e7db5328427c57c8e8831abda175421b709672f6cfc3d630c3b7e2146393", size = 46372, upload-time = "2025-10-08T19:48:16.424Z" },
+    { url = "https://files.pythonhosted.org/packages/01/20/b0972d902472da9bcb683fa595099911f4d2e86e5683bcc45de60dd05dc3/propcache-0.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:6f6ff873ed40292cd4969ef5310179afd5db59fdf055897e282485043fc80ad0", size = 48411, upload-time = "2025-10-08T19:48:17.577Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/e3/7dc89f4f21e8f99bad3d5ddb3a3389afcf9da4ac69e3deb2dcdc96e74169/propcache-0.4.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:49a2dc67c154db2c1463013594c458881a069fcf98940e61a0569016a583020a", size = 275712, upload-time = "2025-10-08T19:48:18.901Z" },
+    { url = "https://files.pythonhosted.org/packages/20/67/89800c8352489b21a8047c773067644e3897f02ecbbd610f4d46b7f08612/propcache-0.4.1-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:005f08e6a0529984491e37d8dbc3dd86f84bd78a8ceb5fa9a021f4c48d4984be", size = 273557, upload-time = "2025-10-08T19:48:20.762Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/a1/b52b055c766a54ce6d9c16d9aca0cad8059acd9637cdf8aa0222f4a026ef/propcache-0.4.1-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5c3310452e0d31390da9035c348633b43d7e7feb2e37be252be6da45abd1abcc", size = 280015, upload-time = "2025-10-08T19:48:22.592Z" },
+    { url = "https://files.pythonhosted.org/packages/48/c8/33cee30bd890672c63743049f3c9e4be087e6780906bfc3ec58528be59c1/propcache-0.4.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4c3c70630930447f9ef1caac7728c8ad1c56bc5015338b20fed0d08ea2480b3a", size = 262880, upload-time = "2025-10-08T19:48:23.947Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/b1/8f08a143b204b418285c88b83d00edbd61afbc2c6415ffafc8905da7038b/propcache-0.4.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8e57061305815dfc910a3634dcf584f08168a8836e6999983569f51a8544cd89", size = 260938, upload-time = "2025-10-08T19:48:25.656Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/12/96e4664c82ca2f31e1c8dff86afb867348979eb78d3cb8546a680287a1e9/propcache-0.4.1-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:521a463429ef54143092c11a77e04056dd00636f72e8c45b70aaa3140d639726", size = 247641, upload-time = "2025-10-08T19:48:27.207Z" },
+    { url = "https://files.pythonhosted.org/packages/18/ed/e7a9cfca28133386ba52278136d42209d3125db08d0a6395f0cba0c0285c/propcache-0.4.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:120c964da3fdc75e3731aa392527136d4ad35868cc556fd09bb6d09172d9a367", size = 262510, upload-time = "2025-10-08T19:48:28.65Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/76/16d8bf65e8845dd62b4e2b57444ab81f07f40caa5652b8969b87ddcf2ef6/propcache-0.4.1-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:d8f353eb14ee3441ee844ade4277d560cdd68288838673273b978e3d6d2c8f36", size = 263161, upload-time = "2025-10-08T19:48:30.133Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/70/c99e9edb5d91d5ad8a49fa3c1e8285ba64f1476782fed10ab251ff413ba1/propcache-0.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ab2943be7c652f09638800905ee1bab2c544e537edb57d527997a24c13dc1455", size = 257393, upload-time = "2025-10-08T19:48:31.567Z" },
+    { url = "https://files.pythonhosted.org/packages/08/02/87b25304249a35c0915d236575bc3574a323f60b47939a2262b77632a3ee/propcache-0.4.1-cp314-cp314t-win32.whl", hash = "sha256:05674a162469f31358c30bcaa8883cb7829fa3110bf9c0991fe27d7896c42d85", size = 42546, upload-time = "2025-10-08T19:48:32.872Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/ef/3c6ecf8b317aa982f309835e8f96987466123c6e596646d4e6a1dfcd080f/propcache-0.4.1-cp314-cp314t-win_amd64.whl", hash = "sha256:990f6b3e2a27d683cb7602ed6c86f15ee6b43b1194736f9baaeb93d0016633b1", size = 46259, upload-time = "2025-10-08T19:48:34.226Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/2d/346e946d4951f37eca1e4f55be0f0174c52cd70720f84029b02f296f4a38/propcache-0.4.1-cp314-cp314t-win_arm64.whl", hash = "sha256:ecef2343af4cc68e05131e45024ba34f6095821988a9d0a02aa7c73fcc448aa9", size = 40428, upload-time = "2025-10-08T19:48:35.441Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/5a/bc7b4a4ef808fa59a816c17b20c4bef6884daebbdf627ff2a161da67da19/propcache-0.4.1-py3-none-any.whl", hash = "sha256:af2a6052aeb6cf17d3e46ee169099044fd8224cbaf75c76a2ef596e8163e2237", size = 13305, upload-time = "2025-10-08T19:49:00.792Z" },
+]
+
+[[package]]
+name = "pycparser"
+version = "2.23"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fe/cf/d2d3b9f5699fb1e4615c8e32ff220203e43b248e1dfcc6736ad9057731ca/pycparser-2.23.tar.gz", hash = "sha256:78816d4f24add8f10a06d6f05b4d424ad9e96cfebf68a4ddc99c65c0720d00c2", size = 173734, upload-time = "2025-09-09T13:23:47.91Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a0/e3/59cd50310fc9b59512193629e1984c1f95e5c8ae6e5d8c69532ccc65a7fe/pycparser-2.23-py3-none-any.whl", hash = "sha256:e5c6e8d3fbad53479cab09ac03729e0a9faf2bee3db8208a550daf5af81a5934", size = 118140, upload-time = "2025-09-09T13:23:46.651Z" },
+]
+
+[[package]]
+name = "pygments"
+version = "2.19.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
+]
+
+[[package]]
+name = "pywin32-ctypes"
+version = "0.2.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/85/9f/01a1a99704853cb63f253eea009390c88e7131c67e66a0a02099a8c917cb/pywin32-ctypes-0.2.3.tar.gz", hash = "sha256:d162dc04946d704503b2edc4d55f3dba5c1d539ead017afa00142c38b9885755", size = 29471, upload-time = "2024-08-14T10:15:34.626Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/de/3d/8161f7711c017e01ac9f008dfddd9410dff3674334c233bde66e7ba65bbf/pywin32_ctypes-0.2.3-py3-none-any.whl", hash = "sha256:8a1513379d709975552d202d942d9837758905c8d01eb82b8bcc30918929e7b8", size = 30756, upload-time = "2024-08-14T10:15:33.187Z" },
+]
+
+[[package]]
+name = "rich"
+version = "14.2.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markdown-it-py" },
+    { name = "pygments" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fb/d2/8920e102050a0de7bfabeb4c4614a49248cf8d5d7a8d01885fbb24dc767a/rich-14.2.0.tar.gz", hash = "sha256:73ff50c7c0c1c77c8243079283f4edb376f0f6442433aecb8ce7e6d0b92d1fe4", size = 219990, upload-time = "2025-10-09T14:16:53.064Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/25/7a/b0178788f8dc6cafce37a212c99565fa1fe7872c70c6c9c1e1a372d9d88f/rich-14.2.0-py3-none-any.whl", hash = "sha256:76bc51fe2e57d2b1be1f96c524b890b816e334ab4c1e45888799bfaab0021edd", size = 243393, upload-time = "2025-10-09T14:16:51.245Z" },
+]
+
+[[package]]
+name = "secretstorage"
+version = "3.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cryptography" },
+    { name = "jeepney" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/31/9f/11ef35cf1027c1339552ea7bfe6aaa74a8516d8b5caf6e7d338daf54fd80/secretstorage-3.4.0.tar.gz", hash = "sha256:c46e216d6815aff8a8a18706a2fbfd8d53fcbb0dce99301881687a1b0289ef7c", size = 19748, upload-time = "2025-09-09T16:42:13.859Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/91/ff/2e2eed29e02c14a5cb6c57f09b2d5b40e65d6cc71f45b52e0be295ccbc2f/secretstorage-3.4.0-py3-none-any.whl", hash = "sha256:0e3b6265c2c63509fb7415717607e4b2c9ab767b7f344a57473b779ca13bd02e", size = 15272, upload-time = "2025-09-09T16:42:12.744Z" },
+]
+
+[[package]]
+name = "soupsieve"
+version = "2.8"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6d/e6/21ccce3262dd4889aa3332e5a119a3491a95e8f60939870a3a035aabac0d/soupsieve-2.8.tar.gz", hash = "sha256:e2dd4a40a628cb5f28f6d4b0db8800b8f581b65bb380b97de22ba5ca8d72572f", size = 103472, upload-time = "2025-08-27T15:39:51.78Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/14/a0/bb38d3b76b8cae341dad93a2dd83ab7462e6dbcdd84d43f54ee60a8dc167/soupsieve-2.8-py3-none-any.whl", hash = "sha256:0cc76456a30e20f5d7f2e14a98a4ae2ee4e5abdc7c5ea0aafe795f344bc7984c", size = 36679, upload-time = "2025-08-27T15:39:50.179Z" },
+]
+
+[[package]]
+name = "typing-extensions"
+version = "4.15.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" },
+]
+
+[[package]]
+name = "yarl"
+version = "1.22.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "idna" },
+    { name = "multidict" },
+    { name = "propcache" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/57/63/0c6ebca57330cd313f6102b16dd57ffaf3ec4c83403dcb45dbd15c6f3ea1/yarl-1.22.0.tar.gz", hash = "sha256:bebf8557577d4401ba8bd9ff33906f1376c877aa78d1fe216ad01b4d6745af71", size = 187169, upload-time = "2025-10-06T14:12:55.963Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4d/27/5ab13fc84c76a0250afd3d26d5936349a35be56ce5785447d6c423b26d92/yarl-1.22.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:1ab72135b1f2db3fed3997d7e7dc1b80573c67138023852b6efb336a5eae6511", size = 141607, upload-time = "2025-10-06T14:09:16.298Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/a1/d065d51d02dc02ce81501d476b9ed2229d9a990818332242a882d5d60340/yarl-1.22.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:669930400e375570189492dc8d8341301578e8493aec04aebc20d4717f899dd6", size = 94027, upload-time = "2025-10-06T14:09:17.786Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/da/8da9f6a53f67b5106ffe902c6fa0164e10398d4e150d85838b82f424072a/yarl-1.22.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:792a2af6d58177ef7c19cbf0097aba92ca1b9cb3ffdd9c7470e156c8f9b5e028", size = 94963, upload-time = "2025-10-06T14:09:19.662Z" },
+    { url = "https://files.pythonhosted.org/packages/68/fe/2c1f674960c376e29cb0bec1249b117d11738db92a6ccc4a530b972648db/yarl-1.22.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ea66b1c11c9150f1372f69afb6b8116f2dd7286f38e14ea71a44eee9ec51b9d", size = 368406, upload-time = "2025-10-06T14:09:21.402Z" },
+    { url = "https://files.pythonhosted.org/packages/95/26/812a540e1c3c6418fec60e9bbd38e871eaba9545e94fa5eff8f4a8e28e1e/yarl-1.22.0-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3e2daa88dc91870215961e96a039ec73e4937da13cf77ce17f9cad0c18df3503", size = 336581, upload-time = "2025-10-06T14:09:22.98Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/f5/5777b19e26fdf98563985e481f8be3d8a39f8734147a6ebf459d0dab5a6b/yarl-1.22.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ba440ae430c00eee41509353628600212112cd5018d5def7e9b05ea7ac34eb65", size = 388924, upload-time = "2025-10-06T14:09:24.655Z" },
+    { url = "https://files.pythonhosted.org/packages/86/08/24bd2477bd59c0bbd994fe1d93b126e0472e4e3df5a96a277b0a55309e89/yarl-1.22.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e6438cc8f23a9c1478633d216b16104a586b9761db62bfacb6425bac0a36679e", size = 392890, upload-time = "2025-10-06T14:09:26.617Z" },
+    { url = "https://files.pythonhosted.org/packages/46/00/71b90ed48e895667ecfb1eaab27c1523ee2fa217433ed77a73b13205ca4b/yarl-1.22.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4c52a6e78aef5cf47a98ef8e934755abf53953379b7d53e68b15ff4420e6683d", size = 365819, upload-time = "2025-10-06T14:09:28.544Z" },
+    { url = "https://files.pythonhosted.org/packages/30/2d/f715501cae832651d3282387c6a9236cd26bd00d0ff1e404b3dc52447884/yarl-1.22.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:3b06bcadaac49c70f4c88af4ffcfbe3dc155aab3163e75777818092478bcbbe7", size = 363601, upload-time = "2025-10-06T14:09:30.568Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/f9/a678c992d78e394e7126ee0b0e4e71bd2775e4334d00a9278c06a6cce96a/yarl-1.22.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:6944b2dc72c4d7f7052683487e3677456050ff77fcf5e6204e98caf785ad1967", size = 358072, upload-time = "2025-10-06T14:09:32.528Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/d1/b49454411a60edb6fefdcad4f8e6dbba7d8019e3a508a1c5836cba6d0781/yarl-1.22.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:d5372ca1df0f91a86b047d1277c2aaf1edb32d78bbcefffc81b40ffd18f027ed", size = 385311, upload-time = "2025-10-06T14:09:34.634Z" },
+    { url = "https://files.pythonhosted.org/packages/87/e5/40d7a94debb8448c7771a916d1861d6609dddf7958dc381117e7ba36d9e8/yarl-1.22.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:51af598701f5299012b8416486b40fceef8c26fc87dc6d7d1f6fc30609ea0aa6", size = 381094, upload-time = "2025-10-06T14:09:36.268Z" },
+    { url = "https://files.pythonhosted.org/packages/35/d8/611cc282502381ad855448643e1ad0538957fc82ae83dfe7762c14069e14/yarl-1.22.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b266bd01fedeffeeac01a79ae181719ff848a5a13ce10075adbefc8f1daee70e", size = 370944, upload-time = "2025-10-06T14:09:37.872Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/df/fadd00fb1c90e1a5a8bd731fa3d3de2e165e5a3666a095b04e31b04d9cb6/yarl-1.22.0-cp311-cp311-win32.whl", hash = "sha256:a9b1ba5610a4e20f655258d5a1fdc7ebe3d837bb0e45b581398b99eb98b1f5ca", size = 81804, upload-time = "2025-10-06T14:09:39.359Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/f7/149bb6f45f267cb5c074ac40c01c6b3ea6d8a620d34b337f6321928a1b4d/yarl-1.22.0-cp311-cp311-win_amd64.whl", hash = "sha256:078278b9b0b11568937d9509b589ee83ef98ed6d561dfe2020e24a9fd08eaa2b", size = 86858, upload-time = "2025-10-06T14:09:41.068Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/13/88b78b93ad3f2f0b78e13bfaaa24d11cbc746e93fe76d8c06bf139615646/yarl-1.22.0-cp311-cp311-win_arm64.whl", hash = "sha256:b6a6f620cfe13ccec221fa312139135166e47ae169f8253f72a0abc0dae94376", size = 81637, upload-time = "2025-10-06T14:09:42.712Z" },
+    { url = "https://files.pythonhosted.org/packages/75/ff/46736024fee3429b80a165a732e38e5d5a238721e634ab41b040d49f8738/yarl-1.22.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e340382d1afa5d32b892b3ff062436d592ec3d692aeea3bef3a5cfe11bbf8c6f", size = 142000, upload-time = "2025-10-06T14:09:44.631Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/9a/b312ed670df903145598914770eb12de1bac44599549b3360acc96878df8/yarl-1.22.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f1e09112a2c31ffe8d80be1b0988fa6a18c5d5cad92a9ffbb1c04c91bfe52ad2", size = 94338, upload-time = "2025-10-06T14:09:46.372Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/f5/0601483296f09c3c65e303d60c070a5c19fcdbc72daa061e96170785bc7d/yarl-1.22.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:939fe60db294c786f6b7c2d2e121576628468f65453d86b0fe36cb52f987bd74", size = 94909, upload-time = "2025-10-06T14:09:48.648Z" },
+    { url = "https://files.pythonhosted.org/packages/60/41/9a1fe0b73dbcefce72e46cf149b0e0a67612d60bfc90fb59c2b2efdfbd86/yarl-1.22.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e1651bf8e0398574646744c1885a41198eba53dc8a9312b954073f845c90a8df", size = 372940, upload-time = "2025-10-06T14:09:50.089Z" },
+    { url = "https://files.pythonhosted.org/packages/17/7a/795cb6dfee561961c30b800f0ed616b923a2ec6258b5def2a00bf8231334/yarl-1.22.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b8a0588521a26bf92a57a1705b77b8b59044cdceccac7151bd8d229e66b8dedb", size = 345825, upload-time = "2025-10-06T14:09:52.142Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/93/a58f4d596d2be2ae7bab1a5846c4d270b894958845753b2c606d666744d3/yarl-1.22.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:42188e6a615c1a75bcaa6e150c3fe8f3e8680471a6b10150c5f7e83f47cc34d2", size = 386705, upload-time = "2025-10-06T14:09:54.128Z" },
+    { url = "https://files.pythonhosted.org/packages/61/92/682279d0e099d0e14d7fd2e176bd04f48de1484f56546a3e1313cd6c8e7c/yarl-1.22.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f6d2cb59377d99718913ad9a151030d6f83ef420a2b8f521d94609ecc106ee82", size = 396518, upload-time = "2025-10-06T14:09:55.762Z" },
+    { url = "https://files.pythonhosted.org/packages/db/0f/0d52c98b8a885aeda831224b78f3be7ec2e1aa4a62091f9f9188c3c65b56/yarl-1.22.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:50678a3b71c751d58d7908edc96d332af328839eea883bb554a43f539101277a", size = 377267, upload-time = "2025-10-06T14:09:57.958Z" },
+    { url = "https://files.pythonhosted.org/packages/22/42/d2685e35908cbeaa6532c1fc73e89e7f2efb5d8a7df3959ea8e37177c5a3/yarl-1.22.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1e8fbaa7cec507aa24ea27a01456e8dd4b6fab829059b69844bd348f2d467124", size = 365797, upload-time = "2025-10-06T14:09:59.527Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/83/cf8c7bcc6355631762f7d8bdab920ad09b82efa6b722999dfb05afa6cfac/yarl-1.22.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:433885ab5431bc3d3d4f2f9bd15bfa1614c522b0f1405d62c4f926ccd69d04fa", size = 365535, upload-time = "2025-10-06T14:10:01.139Z" },
+    { url = "https://files.pythonhosted.org/packages/25/e1/5302ff9b28f0c59cac913b91fe3f16c59a033887e57ce9ca5d41a3a94737/yarl-1.22.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:b790b39c7e9a4192dc2e201a282109ed2985a1ddbd5ac08dc56d0e121400a8f7", size = 382324, upload-time = "2025-10-06T14:10:02.756Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/cd/4617eb60f032f19ae3a688dc990d8f0d89ee0ea378b61cac81ede3e52fae/yarl-1.22.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:31f0b53913220599446872d757257be5898019c85e7971599065bc55065dc99d", size = 383803, upload-time = "2025-10-06T14:10:04.552Z" },
+    { url = "https://files.pythonhosted.org/packages/59/65/afc6e62bb506a319ea67b694551dab4a7e6fb7bf604e9bd9f3e11d575fec/yarl-1.22.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a49370e8f711daec68d09b821a34e1167792ee2d24d405cbc2387be4f158b520", size = 374220, upload-time = "2025-10-06T14:10:06.489Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/3d/68bf18d50dc674b942daec86a9ba922d3113d8399b0e52b9897530442da2/yarl-1.22.0-cp312-cp312-win32.whl", hash = "sha256:70dfd4f241c04bd9239d53b17f11e6ab672b9f1420364af63e8531198e3f5fe8", size = 81589, upload-time = "2025-10-06T14:10:09.254Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/9a/6ad1a9b37c2f72874f93e691b2e7ecb6137fb2b899983125db4204e47575/yarl-1.22.0-cp312-cp312-win_amd64.whl", hash = "sha256:8884d8b332a5e9b88e23f60bb166890009429391864c685e17bd73a9eda9105c", size = 87213, upload-time = "2025-10-06T14:10:11.369Z" },
+    { url = "https://files.pythonhosted.org/packages/44/c5/c21b562d1680a77634d748e30c653c3ca918beb35555cff24986fff54598/yarl-1.22.0-cp312-cp312-win_arm64.whl", hash = "sha256:ea70f61a47f3cc93bdf8b2f368ed359ef02a01ca6393916bc8ff877427181e74", size = 81330, upload-time = "2025-10-06T14:10:13.112Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/f3/d67de7260456ee105dc1d162d43a019ecad6b91e2f51809d6cddaa56690e/yarl-1.22.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8dee9c25c74997f6a750cd317b8ca63545169c098faee42c84aa5e506c819b53", size = 139980, upload-time = "2025-10-06T14:10:14.601Z" },
+    { url = "https://files.pythonhosted.org/packages/01/88/04d98af0b47e0ef42597b9b28863b9060bb515524da0a65d5f4db160b2d5/yarl-1.22.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:01e73b85a5434f89fc4fe27dcda2aff08ddf35e4d47bbbea3bdcd25321af538a", size = 93424, upload-time = "2025-10-06T14:10:16.115Z" },
+    { url = "https://files.pythonhosted.org/packages/18/91/3274b215fd8442a03975ce6bee5fe6aa57a8326b29b9d3d56234a1dca244/yarl-1.22.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:22965c2af250d20c873cdbee8ff958fb809940aeb2e74ba5f20aaf6b7ac8c70c", size = 93821, upload-time = "2025-10-06T14:10:17.993Z" },
+    { url = "https://files.pythonhosted.org/packages/61/3a/caf4e25036db0f2da4ca22a353dfeb3c9d3c95d2761ebe9b14df8fc16eb0/yarl-1.22.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b4f15793aa49793ec8d1c708ab7f9eded1aa72edc5174cae703651555ed1b601", size = 373243, upload-time = "2025-10-06T14:10:19.44Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/9e/51a77ac7516e8e7803b06e01f74e78649c24ee1021eca3d6a739cb6ea49c/yarl-1.22.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5542339dcf2747135c5c85f68680353d5cb9ffd741c0f2e8d832d054d41f35a", size = 342361, upload-time = "2025-10-06T14:10:21.124Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/f8/33b92454789dde8407f156c00303e9a891f1f51a0330b0fad7c909f87692/yarl-1.22.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5c401e05ad47a75869c3ab3e35137f8468b846770587e70d71e11de797d113df", size = 387036, upload-time = "2025-10-06T14:10:22.902Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/9a/c5db84ea024f76838220280f732970aa4ee154015d7f5c1bfb60a267af6f/yarl-1.22.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:243dda95d901c733f5b59214d28b0120893d91777cb8aa043e6ef059d3cddfe2", size = 397671, upload-time = "2025-10-06T14:10:24.523Z" },
+    { url = "https://files.pythonhosted.org/packages/11/c9/cd8538dc2e7727095e0c1d867bad1e40c98f37763e6d995c1939f5fdc7b1/yarl-1.22.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bec03d0d388060058f5d291a813f21c011041938a441c593374da6077fe21b1b", size = 377059, upload-time = "2025-10-06T14:10:26.406Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/b9/ab437b261702ced75122ed78a876a6dec0a1b0f5e17a4ac7a9a2482d8abe/yarl-1.22.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b0748275abb8c1e1e09301ee3cf90c8a99678a4e92e4373705f2a2570d581273", size = 365356, upload-time = "2025-10-06T14:10:28.461Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/9d/8e1ae6d1d008a9567877b08f0ce4077a29974c04c062dabdb923ed98e6fe/yarl-1.22.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:47fdb18187e2a4e18fda2c25c05d8251a9e4a521edaed757fef033e7d8498d9a", size = 361331, upload-time = "2025-10-06T14:10:30.541Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/5a/09b7be3905962f145b73beb468cdd53db8aa171cf18c80400a54c5b82846/yarl-1.22.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c7044802eec4524fde550afc28edda0dd5784c4c45f0be151a2d3ba017daca7d", size = 382590, upload-time = "2025-10-06T14:10:33.352Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/7f/59ec509abf90eda5048b0bc3e2d7b5099dffdb3e6b127019895ab9d5ef44/yarl-1.22.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:139718f35149ff544caba20fce6e8a2f71f1e39b92c700d8438a0b1d2a631a02", size = 385316, upload-time = "2025-10-06T14:10:35.034Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/84/891158426bc8036bfdfd862fabd0e0fa25df4176ec793e447f4b85cf1be4/yarl-1.22.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e1b51bebd221006d3d2f95fbe124b22b247136647ae5dcc8c7acafba66e5ee67", size = 374431, upload-time = "2025-10-06T14:10:37.76Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/49/03da1580665baa8bef5e8ed34c6df2c2aca0a2f28bf397ed238cc1bbc6f2/yarl-1.22.0-cp313-cp313-win32.whl", hash = "sha256:d3e32536234a95f513bd374e93d717cf6b2231a791758de6c509e3653f234c95", size = 81555, upload-time = "2025-10-06T14:10:39.649Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/ee/450914ae11b419eadd067c6183ae08381cfdfcb9798b90b2b713bbebddda/yarl-1.22.0-cp313-cp313-win_amd64.whl", hash = "sha256:47743b82b76d89a1d20b83e60d5c20314cbd5ba2befc9cda8f28300c4a08ed4d", size = 86965, upload-time = "2025-10-06T14:10:41.313Z" },
+    { url = "https://files.pythonhosted.org/packages/98/4d/264a01eae03b6cf629ad69bae94e3b0e5344741e929073678e84bf7a3e3b/yarl-1.22.0-cp313-cp313-win_arm64.whl", hash = "sha256:5d0fcda9608875f7d052eff120c7a5da474a6796fe4d83e152e0e4d42f6d1a9b", size = 81205, upload-time = "2025-10-06T14:10:43.167Z" },
+    { url = "https://files.pythonhosted.org/packages/88/fc/6908f062a2f77b5f9f6d69cecb1747260831ff206adcbc5b510aff88df91/yarl-1.22.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:719ae08b6972befcba4310e49edb1161a88cdd331e3a694b84466bd938a6ab10", size = 146209, upload-time = "2025-10-06T14:10:44.643Z" },
+    { url = "https://files.pythonhosted.org/packages/65/47/76594ae8eab26210b4867be6f49129861ad33da1f1ebdf7051e98492bf62/yarl-1.22.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:47d8a5c446df1c4db9d21b49619ffdba90e77c89ec6e283f453856c74b50b9e3", size = 95966, upload-time = "2025-10-06T14:10:46.554Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/ce/05e9828a49271ba6b5b038b15b3934e996980dd78abdfeb52a04cfb9467e/yarl-1.22.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cfebc0ac8333520d2d0423cbbe43ae43c8838862ddb898f5ca68565e395516e9", size = 97312, upload-time = "2025-10-06T14:10:48.007Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/c5/7dffad5e4f2265b29c9d7ec869c369e4223166e4f9206fc2243ee9eea727/yarl-1.22.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4398557cbf484207df000309235979c79c4356518fd5c99158c7d38203c4da4f", size = 361967, upload-time = "2025-10-06T14:10:49.997Z" },
+    { url = "https://files.pythonhosted.org/packages/50/b2/375b933c93a54bff7fc041e1a6ad2c0f6f733ffb0c6e642ce56ee3b39970/yarl-1.22.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:2ca6fd72a8cd803be290d42f2dec5cdcd5299eeb93c2d929bf060ad9efaf5de0", size = 323949, upload-time = "2025-10-06T14:10:52.004Z" },
+    { url = "https://files.pythonhosted.org/packages/66/50/bfc2a29a1d78644c5a7220ce2f304f38248dc94124a326794e677634b6cf/yarl-1.22.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ca1f59c4e1ab6e72f0a23c13fca5430f889634166be85dbf1013683e49e3278e", size = 361818, upload-time = "2025-10-06T14:10:54.078Z" },
+    { url = "https://files.pythonhosted.org/packages/46/96/f3941a46af7d5d0f0498f86d71275696800ddcdd20426298e572b19b91ff/yarl-1.22.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6c5010a52015e7c70f86eb967db0f37f3c8bd503a695a49f8d45700144667708", size = 372626, upload-time = "2025-10-06T14:10:55.767Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/42/8b27c83bb875cd89448e42cd627e0fb971fa1675c9ec546393d18826cb50/yarl-1.22.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d7672ecf7557476642c88497c2f8d8542f8e36596e928e9bcba0e42e1e7d71f", size = 341129, upload-time = "2025-10-06T14:10:57.985Z" },
+    { url = "https://files.pythonhosted.org/packages/49/36/99ca3122201b382a3cf7cc937b95235b0ac944f7e9f2d5331d50821ed352/yarl-1.22.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:3b7c88eeef021579d600e50363e0b6ee4f7f6f728cd3486b9d0f3ee7b946398d", size = 346776, upload-time = "2025-10-06T14:10:59.633Z" },
+    { url = "https://files.pythonhosted.org/packages/85/b4/47328bf996acd01a4c16ef9dcd2f59c969f495073616586f78cd5f2efb99/yarl-1.22.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:f4afb5c34f2c6fecdcc182dfcfc6af6cccf1aa923eed4d6a12e9d96904e1a0d8", size = 334879, upload-time = "2025-10-06T14:11:01.454Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/ad/b77d7b3f14a4283bffb8e92c6026496f6de49751c2f97d4352242bba3990/yarl-1.22.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:59c189e3e99a59cf8d83cbb31d4db02d66cda5a1a4374e8a012b51255341abf5", size = 350996, upload-time = "2025-10-06T14:11:03.452Z" },
+    { url = "https://files.pythonhosted.org/packages/81/c8/06e1d69295792ba54d556f06686cbd6a7ce39c22307100e3fb4a2c0b0a1d/yarl-1.22.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:5a3bf7f62a289fa90f1990422dc8dff5a458469ea71d1624585ec3a4c8d6960f", size = 356047, upload-time = "2025-10-06T14:11:05.115Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/b8/4c0e9e9f597074b208d18cef227d83aac36184bfbc6eab204ea55783dbc5/yarl-1.22.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:de6b9a04c606978fdfe72666fa216ffcf2d1a9f6a381058d4378f8d7b1e5de62", size = 342947, upload-time = "2025-10-06T14:11:08.137Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/e5/11f140a58bf4c6ad7aca69a892bff0ee638c31bea4206748fc0df4ebcb3a/yarl-1.22.0-cp313-cp313t-win32.whl", hash = "sha256:1834bb90991cc2999f10f97f5f01317f99b143284766d197e43cd5b45eb18d03", size = 86943, upload-time = "2025-10-06T14:11:10.284Z" },
+    { url = "https://files.pythonhosted.org/packages/31/74/8b74bae38ed7fe6793d0c15a0c8207bbb819cf287788459e5ed230996cdd/yarl-1.22.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ff86011bd159a9d2dfc89c34cfd8aff12875980e3bd6a39ff097887520e60249", size = 93715, upload-time = "2025-10-06T14:11:11.739Z" },
+    { url = "https://files.pythonhosted.org/packages/69/66/991858aa4b5892d57aef7ee1ba6b4d01ec3b7eb3060795d34090a3ca3278/yarl-1.22.0-cp313-cp313t-win_arm64.whl", hash = "sha256:7861058d0582b847bc4e3a4a4c46828a410bca738673f35a29ba3ca5db0b473b", size = 83857, upload-time = "2025-10-06T14:11:13.586Z" },
+    { url = "https://files.pythonhosted.org/packages/46/b3/e20ef504049f1a1c54a814b4b9bed96d1ac0e0610c3b4da178f87209db05/yarl-1.22.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:34b36c2c57124530884d89d50ed2c1478697ad7473efd59cfd479945c95650e4", size = 140520, upload-time = "2025-10-06T14:11:15.465Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/04/3532d990fdbab02e5ede063676b5c4260e7f3abea2151099c2aa745acc4c/yarl-1.22.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:0dd9a702591ca2e543631c2a017e4a547e38a5c0f29eece37d9097e04a7ac683", size = 93504, upload-time = "2025-10-06T14:11:17.106Z" },
+    { url = "https://files.pythonhosted.org/packages/11/63/ff458113c5c2dac9a9719ac68ee7c947cb621432bcf28c9972b1c0e83938/yarl-1.22.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:594fcab1032e2d2cc3321bb2e51271e7cd2b516c7d9aee780ece81b07ff8244b", size = 94282, upload-time = "2025-10-06T14:11:19.064Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/bc/315a56aca762d44a6aaaf7ad253f04d996cb6b27bad34410f82d76ea8038/yarl-1.22.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f3d7a87a78d46a2e3d5b72587ac14b4c16952dd0887dbb051451eceac774411e", size = 372080, upload-time = "2025-10-06T14:11:20.996Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/3f/08e9b826ec2e099ea6e7c69a61272f4f6da62cb5b1b63590bb80ca2e4a40/yarl-1.22.0-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:852863707010316c973162e703bddabec35e8757e67fcb8ad58829de1ebc8590", size = 338696, upload-time = "2025-10-06T14:11:22.847Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/9f/90360108e3b32bd76789088e99538febfea24a102380ae73827f62073543/yarl-1.22.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:131a085a53bfe839a477c0845acf21efc77457ba2bcf5899618136d64f3303a2", size = 387121, upload-time = "2025-10-06T14:11:24.889Z" },
+    { url = "https://files.pythonhosted.org/packages/98/92/ab8d4657bd5b46a38094cfaea498f18bb70ce6b63508fd7e909bd1f93066/yarl-1.22.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:078a8aefd263f4d4f923a9677b942b445a2be970ca24548a8102689a3a8ab8da", size = 394080, upload-time = "2025-10-06T14:11:27.307Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/e7/d8c5a7752fef68205296201f8ec2bf718f5c805a7a7e9880576c67600658/yarl-1.22.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bca03b91c323036913993ff5c738d0842fc9c60c4648e5c8d98331526df89784", size = 372661, upload-time = "2025-10-06T14:11:29.387Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/2e/f4d26183c8db0bb82d491b072f3127fb8c381a6206a3a56332714b79b751/yarl-1.22.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:68986a61557d37bb90d3051a45b91fa3d5c516d177dfc6dd6f2f436a07ff2b6b", size = 364645, upload-time = "2025-10-06T14:11:31.423Z" },
+    { url = "https://files.pythonhosted.org/packages/80/7c/428e5812e6b87cd00ee8e898328a62c95825bf37c7fa87f0b6bb2ad31304/yarl-1.22.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:4792b262d585ff0dff6bcb787f8492e40698443ec982a3568c2096433660c694", size = 355361, upload-time = "2025-10-06T14:11:33.055Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/2a/249405fd26776f8b13c067378ef4d7dd49c9098d1b6457cdd152a99e96a9/yarl-1.22.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:ebd4549b108d732dba1d4ace67614b9545b21ece30937a63a65dd34efa19732d", size = 381451, upload-time = "2025-10-06T14:11:35.136Z" },
+    { url = "https://files.pythonhosted.org/packages/67/a8/fb6b1adbe98cf1e2dd9fad71003d3a63a1bc22459c6e15f5714eb9323b93/yarl-1.22.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:f87ac53513d22240c7d59203f25cc3beac1e574c6cd681bbfd321987b69f95fd", size = 383814, upload-time = "2025-10-06T14:11:37.094Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/f9/3aa2c0e480fb73e872ae2814c43bc1e734740bb0d54e8cb2a95925f98131/yarl-1.22.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:22b029f2881599e2f1b06f8f1db2ee63bd309e2293ba2d566e008ba12778b8da", size = 370799, upload-time = "2025-10-06T14:11:38.83Z" },
+    { url = "https://files.pythonhosted.org/packages/50/3c/af9dba3b8b5eeb302f36f16f92791f3ea62e3f47763406abf6d5a4a3333b/yarl-1.22.0-cp314-cp314-win32.whl", hash = "sha256:6a635ea45ba4ea8238463b4f7d0e721bad669f80878b7bfd1f89266e2ae63da2", size = 82990, upload-time = "2025-10-06T14:11:40.624Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/30/ac3a0c5bdc1d6efd1b41fa24d4897a4329b3b1e98de9449679dd327af4f0/yarl-1.22.0-cp314-cp314-win_amd64.whl", hash = "sha256:0d6e6885777af0f110b0e5d7e5dda8b704efed3894da26220b7f3d887b839a79", size = 88292, upload-time = "2025-10-06T14:11:42.578Z" },
+    { url = "https://files.pythonhosted.org/packages/df/0a/227ab4ff5b998a1b7410abc7b46c9b7a26b0ca9e86c34ba4b8d8bc7c63d5/yarl-1.22.0-cp314-cp314-win_arm64.whl", hash = "sha256:8218f4e98d3c10d683584cb40f0424f4b9fd6e95610232dd75e13743b070ee33", size = 82888, upload-time = "2025-10-06T14:11:44.863Z" },
+    { url = "https://files.pythonhosted.org/packages/06/5e/a15eb13db90abd87dfbefb9760c0f3f257ac42a5cac7e75dbc23bed97a9f/yarl-1.22.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:45c2842ff0e0d1b35a6bf1cd6c690939dacb617a70827f715232b2e0494d55d1", size = 146223, upload-time = "2025-10-06T14:11:46.796Z" },
+    { url = "https://files.pythonhosted.org/packages/18/82/9665c61910d4d84f41a5bf6837597c89e665fa88aa4941080704645932a9/yarl-1.22.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:d947071e6ebcf2e2bee8fce76e10faca8f7a14808ca36a910263acaacef08eca", size = 95981, upload-time = "2025-10-06T14:11:48.845Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/9a/2f65743589809af4d0a6d3aa749343c4b5f4c380cc24a8e94a3c6625a808/yarl-1.22.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:334b8721303e61b00019474cc103bdac3d7b1f65e91f0bfedeec2d56dfe74b53", size = 97303, upload-time = "2025-10-06T14:11:50.897Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/ab/5b13d3e157505c43c3b43b5a776cbf7b24a02bc4cccc40314771197e3508/yarl-1.22.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1e7ce67c34138a058fd092f67d07a72b8e31ff0c9236e751957465a24b28910c", size = 361820, upload-time = "2025-10-06T14:11:52.549Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/76/242a5ef4677615cf95330cfc1b4610e78184400699bdda0acb897ef5e49a/yarl-1.22.0-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d77e1b2c6d04711478cb1c4ab90db07f1609ccf06a287d5607fcd90dc9863acf", size = 323203, upload-time = "2025-10-06T14:11:54.225Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/96/475509110d3f0153b43d06164cf4195c64d16999e0c7e2d8a099adcd6907/yarl-1.22.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c4647674b6150d2cae088fc07de2738a84b8bcedebef29802cf0b0a82ab6face", size = 363173, upload-time = "2025-10-06T14:11:56.069Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/66/59db471aecfbd559a1fd48aedd954435558cd98c7d0da8b03cc6c140a32c/yarl-1.22.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:efb07073be061c8f79d03d04139a80ba33cbd390ca8f0297aae9cce6411e4c6b", size = 373562, upload-time = "2025-10-06T14:11:58.783Z" },
+    { url = "https://files.pythonhosted.org/packages/03/1f/c5d94abc91557384719da10ff166b916107c1b45e4d0423a88457071dd88/yarl-1.22.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e51ac5435758ba97ad69617e13233da53908beccc6cfcd6c34bbed8dcbede486", size = 339828, upload-time = "2025-10-06T14:12:00.686Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/97/aa6a143d3afba17b6465733681c70cf175af89f76ec8d9286e08437a7454/yarl-1.22.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:33e32a0dd0c8205efa8e83d04fc9f19313772b78522d1bdc7d9aed706bfd6138", size = 347551, upload-time = "2025-10-06T14:12:02.628Z" },
+    { url = "https://files.pythonhosted.org/packages/43/3c/45a2b6d80195959239a7b2a8810506d4eea5487dce61c2a3393e7fc3c52e/yarl-1.22.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:bf4a21e58b9cde0e401e683ebd00f6ed30a06d14e93f7c8fd059f8b6e8f87b6a", size = 334512, upload-time = "2025-10-06T14:12:04.871Z" },
+    { url = "https://files.pythonhosted.org/packages/86/a0/c2ab48d74599c7c84cb104ebd799c5813de252bea0f360ffc29d270c2caa/yarl-1.22.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:e4b582bab49ac33c8deb97e058cd67c2c50dac0dd134874106d9c774fd272529", size = 352400, upload-time = "2025-10-06T14:12:06.624Z" },
+    { url = "https://files.pythonhosted.org/packages/32/75/f8919b2eafc929567d3d8411f72bdb1a2109c01caaab4ebfa5f8ffadc15b/yarl-1.22.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:0b5bcc1a9c4839e7e30b7b30dd47fe5e7e44fb7054ec29b5bb8d526aa1041093", size = 357140, upload-time = "2025-10-06T14:12:08.362Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/72/6a85bba382f22cf78add705d8c3731748397d986e197e53ecc7835e76de7/yarl-1.22.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c0232bce2170103ec23c454e54a57008a9a72b5d1c3105dc2496750da8cfa47c", size = 341473, upload-time = "2025-10-06T14:12:10.994Z" },
+    { url = "https://files.pythonhosted.org/packages/35/18/55e6011f7c044dc80b98893060773cefcfdbf60dfefb8cb2f58b9bacbd83/yarl-1.22.0-cp314-cp314t-win32.whl", hash = "sha256:8009b3173bcd637be650922ac455946197d858b3630b6d8787aa9e5c4564533e", size = 89056, upload-time = "2025-10-06T14:12:13.317Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/86/0f0dccb6e59a9e7f122c5afd43568b1d31b8ab7dda5f1b01fb5c7025c9a9/yarl-1.22.0-cp314-cp314t-win_amd64.whl", hash = "sha256:9fb17ea16e972c63d25d4a97f016d235c78dd2344820eb35bc034bc32012ee27", size = 96292, upload-time = "2025-10-06T14:12:15.398Z" },
+    { url = "https://files.pythonhosted.org/packages/48/b7/503c98092fb3b344a179579f55814b613c1fbb1c23b3ec14a7b008a66a6e/yarl-1.22.0-cp314-cp314t-win_arm64.whl", hash = "sha256:9f6d73c1436b934e3f01df1e1b21ff765cd1d28c77dfb9ace207f746d4610ee1", size = 85171, upload-time = "2025-10-06T14:12:16.935Z" },
+    { url = "https://files.pythonhosted.org/packages/73/ae/b48f95715333080afb75a4504487cbe142cae1268afc482d06692d605ae6/yarl-1.22.0-py3-none-any.whl", hash = "sha256:1380560bdba02b6b6c90de54133c81c9f2a453dee9912fe58c1dcced1edb7cff", size = 46814, upload-time = "2025-10-06T14:12:53.872Z" },
+]
+
+[[package]]
+name = "zipp"
+version = "3.23.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e3/02/0f2892c661036d50ede074e376733dca2ae7c6eb617489437771209d4180/zipp-3.23.0.tar.gz", hash = "sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166", size = 25547, upload-time = "2025-06-08T17:06:39.4Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2e/54/647ade08bf0db230bfea292f893923872fd20be6ac6f53b2b936ba839d75/zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e", size = 10276, upload-time = "2025-06-08T17:06:38.034Z" },
+]

From 6e563134b2e31b4ad939929de5973eefec04bdc5 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 19 Oct 2025 15:25:40 +0200
Subject: [PATCH 214/224] Fix ruff errors

---
 PFERD/auth/__init__.py                     |  4 +-
 PFERD/auth/authenticator.py                |  3 +-
 PFERD/auth/credential_file.py              |  7 +-
 PFERD/auth/keyring.py                      |  4 +-
 PFERD/auth/pass_.py                        |  9 +--
 PFERD/auth/simple.py                       |  4 +-
 PFERD/auth/tfa.py                          |  4 +-
 PFERD/cli/parser.py                        |  9 ++-
 PFERD/config.py                            | 26 +++---
 PFERD/crawl/__init__.py                    |  6 +-
 PFERD/crawl/crawler.py                     | 14 ++--
 PFERD/crawl/http_crawler.py                |  8 +-
 PFERD/crawl/ilias/async_helper.py          |  7 +-
 PFERD/crawl/ilias/file_templates.py        |  6 +-
 PFERD/crawl/ilias/ilias_web_crawler.py     | 93 ++++++++++------------
 PFERD/crawl/ilias/kit_ilias_html.py        | 58 +++++++-------
 PFERD/crawl/ilias/kit_ilias_web_crawler.py |  4 +-
 PFERD/crawl/kit_ipd_crawler.py             | 16 ++--
 PFERD/deduplicator.py                      |  4 +-
 PFERD/limiter.py                           |  3 +-
 PFERD/logging.py                           |  5 +-
 PFERD/output_dir.py                        | 19 +++--
 PFERD/pferd.py                             | 27 +++----
 PFERD/report.py                            | 30 +++----
 PFERD/transformer.py                       | 26 +++---
 PFERD/utils.py                             |  7 +-
 26 files changed, 194 insertions(+), 209 deletions(-)

diff --git a/PFERD/auth/__init__.py b/PFERD/auth/__init__.py
index 80d4586..7295c7a 100644
--- a/PFERD/auth/__init__.py
+++ b/PFERD/auth/__init__.py
@@ -1,5 +1,5 @@
+from collections.abc import Callable
 from configparser import SectionProxy
-from typing import Callable, Dict
 
 from ..config import Config
 from .authenticator import Authenticator, AuthError, AuthLoadError, AuthSection  # noqa: F401
@@ -18,7 +18,7 @@ AuthConstructor = Callable[
     Authenticator,
 ]
 
-AUTHENTICATORS: Dict[str, AuthConstructor] = {
+AUTHENTICATORS: dict[str, AuthConstructor] = {
     "credential-file": lambda n, s, c: CredentialFileAuthenticator(n, CredentialFileAuthSection(s), c),
     "keyring": lambda n, s, c: KeyringAuthenticator(n, KeyringAuthSection(s)),
     "pass": lambda n, s, c: PassAuthenticator(n, PassAuthSection(s)),
diff --git a/PFERD/auth/authenticator.py b/PFERD/auth/authenticator.py
index 643a2d5..417b7ba 100644
--- a/PFERD/auth/authenticator.py
+++ b/PFERD/auth/authenticator.py
@@ -1,5 +1,4 @@
 from abc import ABC, abstractmethod
-from typing import Tuple
 
 from ..config import Section
 
@@ -35,7 +34,7 @@ class Authenticator(ABC):
         self.name = name
 
     @abstractmethod
-    async def credentials(self) -> Tuple[str, str]:
+    async def credentials(self) -> tuple[str, str]:
         pass
 
     async def username(self) -> str:
diff --git a/PFERD/auth/credential_file.py b/PFERD/auth/credential_file.py
index 94ffa73..cb7834c 100644
--- a/PFERD/auth/credential_file.py
+++ b/PFERD/auth/credential_file.py
@@ -1,5 +1,4 @@
 from pathlib import Path
-from typing import Tuple
 
 from ..config import Config
 from ..utils import fmt_real_path
@@ -23,7 +22,9 @@ class CredentialFileAuthenticator(Authenticator):
             with open(path, encoding="utf-8") as f:
                 lines = list(f)
         except UnicodeDecodeError:
-            raise AuthLoadError(f"Credential file at {fmt_real_path(path)} is not encoded using UTF-8")
+            raise AuthLoadError(
+                f"Credential file at {fmt_real_path(path)} is not encoded using UTF-8"
+            ) from None
         except OSError as e:
             raise AuthLoadError(f"No credential file at {fmt_real_path(path)}") from e
 
@@ -42,5 +43,5 @@ class CredentialFileAuthenticator(Authenticator):
         self._username = uline[9:]
         self._password = pline[9:]
 
-    async def credentials(self) -> Tuple[str, str]:
+    async def credentials(self) -> tuple[str, str]:
         return self._username, self._password
diff --git a/PFERD/auth/keyring.py b/PFERD/auth/keyring.py
index 7ff2673..e69a69e 100644
--- a/PFERD/auth/keyring.py
+++ b/PFERD/auth/keyring.py
@@ -1,4 +1,4 @@
-from typing import Optional, Tuple, cast
+from typing import Optional, cast
 
 import keyring
 
@@ -27,7 +27,7 @@ class KeyringAuthenticator(Authenticator):
         self._password_invalidated = False
         self._username_fixed = section.username() is not None
 
-    async def credentials(self) -> Tuple[str, str]:
+    async def credentials(self) -> tuple[str, str]:
         # Request the username
         if self._username is None:
             async with log.exclusive_output():
diff --git a/PFERD/auth/pass_.py b/PFERD/auth/pass_.py
index 4c8e775..c5d9b24 100644
--- a/PFERD/auth/pass_.py
+++ b/PFERD/auth/pass_.py
@@ -1,6 +1,5 @@
 import re
 import subprocess
-from typing import List, Tuple
 
 from ..logging import log
 from .authenticator import Authenticator, AuthError, AuthSection
@@ -12,11 +11,11 @@ class PassAuthSection(AuthSection):
             self.missing_value("passname")
         return value
 
-    def username_prefixes(self) -> List[str]:
+    def username_prefixes(self) -> list[str]:
         value = self.s.get("username_prefixes", "login,username,user")
         return [prefix.lower() for prefix in value.split(",")]
 
-    def password_prefixes(self) -> List[str]:
+    def password_prefixes(self) -> list[str]:
         value = self.s.get("password_prefixes", "password,pass,secret")
         return [prefix.lower() for prefix in value.split(",")]
 
@@ -31,14 +30,14 @@ class PassAuthenticator(Authenticator):
         self._username_prefixes = section.username_prefixes()
         self._password_prefixes = section.password_prefixes()
 
-    async def credentials(self) -> Tuple[str, str]:
+    async def credentials(self) -> tuple[str, str]:
         log.explain_topic("Obtaining credentials from pass")
 
         try:
             log.explain(f"Calling 'pass show {self._passname}'")
             result = subprocess.check_output(["pass", "show", self._passname], text=True)
         except subprocess.CalledProcessError as e:
-            raise AuthError(f"Failed to get password info from {self._passname}: {e}")
+            raise AuthError(f"Failed to get password info from {self._passname}: {e}") from e
 
         prefixed = {}
         unprefixed = []
diff --git a/PFERD/auth/simple.py b/PFERD/auth/simple.py
index 831c12f..dea4b67 100644
--- a/PFERD/auth/simple.py
+++ b/PFERD/auth/simple.py
@@ -1,4 +1,4 @@
-from typing import Optional, Tuple
+from typing import Optional
 
 from ..logging import log
 from ..utils import agetpass, ainput
@@ -23,7 +23,7 @@ class SimpleAuthenticator(Authenticator):
         self._username_fixed = self.username is not None
         self._password_fixed = self.password is not None
 
-    async def credentials(self) -> Tuple[str, str]:
+    async def credentials(self) -> tuple[str, str]:
         if self._username is not None and self._password is not None:
             return self._username, self._password
 
diff --git a/PFERD/auth/tfa.py b/PFERD/auth/tfa.py
index 26b1383..6ae48fe 100644
--- a/PFERD/auth/tfa.py
+++ b/PFERD/auth/tfa.py
@@ -1,5 +1,3 @@
-from typing import Tuple
-
 from ..logging import log
 from ..utils import ainput
 from .authenticator import Authenticator, AuthError
@@ -17,7 +15,7 @@ class TfaAuthenticator(Authenticator):
             code = await ainput("TFA code: ")
             return code
 
-    async def credentials(self) -> Tuple[str, str]:
+    async def credentials(self) -> tuple[str, str]:
         raise AuthError("TFA authenticator does not support usernames")
 
     def invalidate_username(self) -> None:
diff --git a/PFERD/cli/parser.py b/PFERD/cli/parser.py
index 12bfeac..c9bec13 100644
--- a/PFERD/cli/parser.py
+++ b/PFERD/cli/parser.py
@@ -1,8 +1,9 @@
 import argparse
 import configparser
 from argparse import ArgumentTypeError
+from collections.abc import Callable, Sequence
 from pathlib import Path
-from typing import Any, Callable, List, Optional, Sequence, Union
+from typing import Any, Optional
 
 from ..output_dir import OnConflict, Redownload
 from ..version import NAME, VERSION
@@ -16,7 +17,7 @@ class ParserLoadError(Exception):
 class BooleanOptionalAction(argparse.Action):
     def __init__(
         self,
-        option_strings: List[str],
+        option_strings: list[str],
         dest: Any,
         default: Any = None,
         type: Any = None,
@@ -51,7 +52,7 @@ class BooleanOptionalAction(argparse.Action):
         self,
         parser: argparse.ArgumentParser,
         namespace: argparse.Namespace,
-        values: Union[str, Sequence[Any], None],
+        values: str | Sequence[Any] | None,
         option_string: Optional[str] = None,
     ) -> None:
         if option_string and option_string in self.option_strings:
@@ -72,7 +73,7 @@ def show_value_error(inner: Callable[[str], Any]) -> Callable[[str], Any]:
         try:
             return inner(input)
         except ValueError as e:
-            raise ArgumentTypeError(e)
+            raise ArgumentTypeError(e) from e
 
     return wrapper
 
diff --git a/PFERD/config.py b/PFERD/config.py
index 1a0f017..7da2889 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -3,7 +3,7 @@ import os
 import sys
 from configparser import ConfigParser, SectionProxy
 from pathlib import Path
-from typing import Any, List, NoReturn, Optional, Tuple
+from typing import Any, NoReturn, Optional
 
 from rich.markup import escape
 
@@ -126,13 +126,13 @@ class Config:
             with open(path, encoding="utf-8") as f:
                 parser.read_file(f, source=str(path))
         except FileNotFoundError:
-            raise ConfigLoadError(path, "File does not exist")
+            raise ConfigLoadError(path, "File does not exist") from None
         except IsADirectoryError:
-            raise ConfigLoadError(path, "That's a directory, not a file")
+            raise ConfigLoadError(path, "That's a directory, not a file") from None
         except PermissionError:
-            raise ConfigLoadError(path, "Insufficient permissions")
+            raise ConfigLoadError(path, "Insufficient permissions") from None
         except UnicodeDecodeError:
-            raise ConfigLoadError(path, "File is not encoded using UTF-8")
+            raise ConfigLoadError(path, "File is not encoded using UTF-8") from None
 
     def dump(self, path: Optional[Path] = None) -> None:
         """
@@ -150,8 +150,8 @@ class Config:
 
         try:
             path.parent.mkdir(parents=True, exist_ok=True)
-        except PermissionError:
-            raise ConfigDumpError(path, "Could not create parent directory")
+        except PermissionError as e:
+            raise ConfigDumpError(path, "Could not create parent directory") from e
 
         try:
             # Ensuring we don't accidentally overwrite any existing files by
@@ -167,16 +167,16 @@ class Config:
                     with open(path, "w", encoding="utf-8") as f:
                         self._parser.write(f)
                 else:
-                    raise ConfigDumpError(path, "File already exists")
+                    raise ConfigDumpError(path, "File already exists") from None
         except IsADirectoryError:
-            raise ConfigDumpError(path, "That's a directory, not a file")
-        except PermissionError:
-            raise ConfigDumpError(path, "Insufficient permissions")
+            raise ConfigDumpError(path, "That's a directory, not a file") from None
+        except PermissionError as e:
+            raise ConfigDumpError(path, "Insufficient permissions") from e
 
     def dump_to_stdout(self) -> None:
         self._parser.write(sys.stdout)
 
-    def crawl_sections(self) -> List[Tuple[str, SectionProxy]]:
+    def crawl_sections(self) -> list[tuple[str, SectionProxy]]:
         result = []
         for name, proxy in self._parser.items():
             if name.startswith("crawl:"):
@@ -184,7 +184,7 @@ class Config:
 
         return result
 
-    def auth_sections(self) -> List[Tuple[str, SectionProxy]]:
+    def auth_sections(self) -> list[tuple[str, SectionProxy]]:
         result = []
         for name, proxy in self._parser.items():
             if name.startswith("auth:"):
diff --git a/PFERD/crawl/__init__.py b/PFERD/crawl/__init__.py
index 04a5e3f..6032c97 100644
--- a/PFERD/crawl/__init__.py
+++ b/PFERD/crawl/__init__.py
@@ -1,5 +1,5 @@
+from collections.abc import Callable
 from configparser import SectionProxy
-from typing import Callable, Dict
 
 from ..auth import Authenticator
 from ..config import Config
@@ -13,12 +13,12 @@ CrawlerConstructor = Callable[
         str,  # Name (without the "crawl:" prefix)
         SectionProxy,  # Crawler's section of global config
         Config,  # Global config
-        Dict[str, Authenticator],  # Loaded authenticators by name
+        dict[str, Authenticator],  # Loaded authenticators by name
     ],
     Crawler,
 ]
 
-CRAWLERS: Dict[str, CrawlerConstructor] = {
+CRAWLERS: dict[str, CrawlerConstructor] = {
     "local": lambda n, s, c, a: LocalCrawler(n, LocalCrawlerSection(s), c),
     "ilias-web": lambda n, s, c, a: IliasWebCrawler(n, IliasWebCrawlerSection(s), c, a),
     "kit-ilias-web": lambda n, s, c, a: KitIliasWebCrawler(n, KitIliasWebCrawlerSection(s), c, a),
diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index f1aec5a..e2cdf30 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -1,10 +1,10 @@
 import asyncio
 import os
 from abc import ABC, abstractmethod
-from collections.abc import Awaitable, Coroutine
+from collections.abc import Awaitable, Callable, Coroutine, Sequence
 from datetime import datetime
 from pathlib import Path, PurePath
-from typing import Any, Callable, Dict, List, Optional, Sequence, Set, Tuple, TypeVar
+from typing import Any, Optional, TypeVar
 
 from ..auth import Authenticator
 from ..config import Config, Section
@@ -116,7 +116,7 @@ class CrawlToken(ReusableAsyncContextManager[ProgressBar]):
         return bar
 
 
-class DownloadToken(ReusableAsyncContextManager[Tuple[ProgressBar, FileSink]]):
+class DownloadToken(ReusableAsyncContextManager[tuple[ProgressBar, FileSink]]):
     def __init__(self, limiter: Limiter, fs_token: FileSinkToken, path: PurePath):
         super().__init__()
 
@@ -128,7 +128,7 @@ class DownloadToken(ReusableAsyncContextManager[Tuple[ProgressBar, FileSink]]):
     def path(self) -> PurePath:
         return self._path
 
-    async def _on_aenter(self) -> Tuple[ProgressBar, FileSink]:
+    async def _on_aenter(self) -> tuple[ProgressBar, FileSink]:
         await self._stack.enter_async_context(self._limiter.limit_download())
         sink = await self._stack.enter_async_context(self._fs_token)
         # The "Downloaded ..." message is printed in the output dir, not here
@@ -205,7 +205,7 @@ class CrawlerSection(Section):
         on_windows = os.name == "nt"
         return self.s.getboolean("windows_paths", fallback=on_windows)
 
-    def auth(self, authenticators: Dict[str, Authenticator]) -> Authenticator:
+    def auth(self, authenticators: dict[str, Authenticator]) -> Authenticator:
         value = self.s.get("auth")
         if value is None:
             self.missing_value("auth")
@@ -262,7 +262,7 @@ class Crawler(ABC):
         return self._output_dir
 
     @staticmethod
-    async def gather(awaitables: Sequence[Awaitable[Any]]) -> List[Any]:
+    async def gather(awaitables: Sequence[Awaitable[Any]]) -> list[Any]:
         """
         Similar to asyncio.gather. However, in the case of an exception, all
         still running tasks are cancelled and the exception is rethrown.
@@ -394,7 +394,7 @@ class Crawler(ABC):
             log.warn("Couldn't find or load old report")
             return
 
-        seen: Set[PurePath] = set()
+        seen: set[PurePath] = set()
         for known in sorted(self.prev_report.found_paths):
             looking_at = list(reversed(known.parents)) + [known]
             for path in looking_at:
diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index 572b39d..830f537 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -3,7 +3,7 @@ import http.cookies
 import ssl
 from datetime import datetime
 from pathlib import Path, PurePath
-from typing import Any, Dict, List, Optional, Tuple, cast
+from typing import Any, Optional, cast
 
 import aiohttp
 import certifi
@@ -43,7 +43,7 @@ class HttpCrawler(Crawler):
         self._http_timeout = section.http_timeout()
 
         self._cookie_jar_path = self._output_dir.resolve(self.COOKIE_FILE)
-        self._shared_cookie_jar_paths: Optional[List[Path]] = None
+        self._shared_cookie_jar_paths: Optional[list[Path]] = None
         self._shared_auth = shared_auth
 
         self._output_dir.register_reserved(self.COOKIE_FILE)
@@ -98,7 +98,7 @@ class HttpCrawler(Crawler):
         """
         raise RuntimeError("_authenticate() was called but crawler doesn't provide an implementation")
 
-    def share_cookies(self, shared: Dict[Authenticator, List[Path]]) -> None:
+    def share_cookies(self, shared: dict[Authenticator, list[Path]]) -> None:
         if not self._shared_auth:
             return
 
@@ -219,7 +219,7 @@ class HttpCrawler(Crawler):
         etags[str(path)] = etag
         self._output_dir.report.add_custom_value(ETAGS_CUSTOM_REPORT_VALUE_KEY, etags)
 
-    async def _request_resource_version(self, resource_url: str) -> Tuple[Optional[str], Optional[datetime]]:
+    async def _request_resource_version(self, resource_url: str) -> tuple[Optional[str], Optional[datetime]]:
         """
         Requests the ETag and Last-Modified headers of a resource via a HEAD request.
         If no entity tag / modification date can be obtained, the according value will be None.
diff --git a/PFERD/crawl/ilias/async_helper.py b/PFERD/crawl/ilias/async_helper.py
index 5e586b1..2e6b301 100644
--- a/PFERD/crawl/ilias/async_helper.py
+++ b/PFERD/crawl/ilias/async_helper.py
@@ -1,5 +1,6 @@
 import asyncio
-from typing import Any, Callable, Optional
+from collections.abc import Callable
+from typing import Any, Optional
 
 import aiohttp
 
@@ -15,9 +16,9 @@ def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Calla
                 try:
                     return await f(*args, **kwargs)
                 except aiohttp.ContentTypeError:  # invalid content type
-                    raise CrawlWarning("ILIAS returned an invalid content type")
+                    raise CrawlWarning("ILIAS returned an invalid content type") from None
                 except aiohttp.TooManyRedirects:
-                    raise CrawlWarning("Got stuck in a redirect loop")
+                    raise CrawlWarning("Got stuck in a redirect loop") from None
                 except aiohttp.ClientPayloadError as e:  # encoding or not enough bytes
                     last_exception = e
                 except aiohttp.ClientConnectionError as e:  # e.g. timeout, disconnect, resolve failed, etc.
diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index 814bb7b..37691b2 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -297,9 +297,7 @@ class Links(Enum):
         raise ValueError("Missing switch case")
 
     def collection_as_one(self) -> bool:
-        if self == Links.FANCY:
-            return True
-        return False
+        return self == Links.FANCY
 
     def extension(self) -> Optional[str]:
         if self == Links.FANCY:
@@ -355,4 +353,4 @@ class Links(Enum):
             return Links(string)
         except ValueError:
             options = [f"'{option.value}'" for option in Links]
-            raise ValueError(f"must be one of {', '.join(options)}")
+            raise ValueError(f"must be one of {', '.join(options)}") from None
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index e6929b5..b8212a4 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -4,7 +4,7 @@ import os
 import re
 from collections.abc import Awaitable, Coroutine
 from pathlib import PurePath
-from typing import Any, Dict, List, Literal, Optional, Set, Union, cast
+from typing import Any, Literal, Optional, cast
 from urllib.parse import urljoin
 
 import aiohttp
@@ -33,7 +33,7 @@ from .kit_ilias_html import (
 )
 from .shibboleth_login import ShibbolethLogin
 
-TargetType = Union[str, int]
+TargetType = str | int
 
 
 class LoginTypeLocal:
@@ -49,7 +49,7 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
 
         return base_url
 
-    def login(self) -> Union[Literal["shibboleth"], LoginTypeLocal]:
+    def login(self) -> Literal["shibboleth"] | LoginTypeLocal:
         login_type = self.s.get("login_type")
         if not login_type:
             self.missing_value("login_type")
@@ -63,7 +63,7 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
 
         self.invalid_value("login_type", login_type, "Should be <shibboleth | local>")
 
-    def tfa_auth(self, authenticators: Dict[str, Authenticator]) -> Optional[Authenticator]:
+    def tfa_auth(self, authenticators: dict[str, Authenticator]) -> Optional[Authenticator]:
         value: Optional[str] = self.s.get("tfa_auth")
         if value is None:
             return None
@@ -110,7 +110,7 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
         return self.s.getboolean("forums", fallback=False)
 
 
-_DIRECTORY_PAGES: Set[IliasElementType] = {
+_DIRECTORY_PAGES: set[IliasElementType] = {
     IliasElementType.EXERCISE,
     IliasElementType.EXERCISE_FILES,
     IliasElementType.EXERCISE_OVERVIEW,
@@ -122,7 +122,7 @@ _DIRECTORY_PAGES: Set[IliasElementType] = {
     IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
 }
 
-_VIDEO_ELEMENTS: Set[IliasElementType] = {
+_VIDEO_ELEMENTS: set[IliasElementType] = {
     IliasElementType.MEDIACAST_VIDEO,
     IliasElementType.MEDIACAST_VIDEO_FOLDER,
     IliasElementType.OPENCAST_VIDEO,
@@ -172,7 +172,7 @@ class IliasWebCrawler(HttpCrawler):
         name: str,
         section: IliasWebCrawlerSection,
         config: Config,
-        authenticators: Dict[str, Authenticator],
+        authenticators: dict[str, Authenticator],
     ):
         # Setting a main authenticator for cookie sharing
         auth = section.auth(authenticators)
@@ -201,7 +201,7 @@ instance's greatest bottleneck.
         self._links = section.links()
         self._videos = section.videos()
         self._forums = section.forums()
-        self._visited_urls: Dict[str, PurePath] = dict()
+        self._visited_urls: dict[str, PurePath] = dict()
 
     async def _run(self) -> None:
         if isinstance(self._target, int):
@@ -264,9 +264,9 @@ instance's greatest bottleneck.
         expected_course_id: Optional[int] = None,
         crawl_nested_courses: bool = False,
     ) -> None:
-        elements: List[IliasPageElement] = []
+        elements: list[IliasPageElement] = []
         # A list as variable redefinitions are not propagated to outer scopes
-        description: List[BeautifulSoup] = []
+        description: list[BeautifulSoup] = []
 
         @_iorepeat(3, "crawling folder")
         async def gather_elements() -> None:
@@ -309,7 +309,7 @@ instance's greatest bottleneck.
 
         elements.sort(key=lambda e: e.id())
 
-        tasks: List[Awaitable[None]] = []
+        tasks: list[Awaitable[None]] = []
         for element in elements:
             if handle := await self._handle_ilias_element(cl.path, element, crawl_nested_courses):
                 tasks.append(asyncio.create_task(handle))
@@ -340,15 +340,14 @@ instance's greatest bottleneck.
             )
             return None
 
-        if element.type in _VIDEO_ELEMENTS:
-            if not self._videos:
-                log.status(
-                    "[bold bright_black]",
-                    "Ignored",
-                    fmt_path(element_path),
-                    "[bright_black](enable with option 'videos')",
-                )
-                return None
+        if element.type in _VIDEO_ELEMENTS and not self._videos:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](enable with option 'videos')",
+            )
+            return None
 
         if element.type == IliasElementType.FILE:
             return await self._handle_file(element, element_path)
@@ -522,8 +521,8 @@ instance's greatest bottleneck.
             sink.file.write(rendered.encode("utf-8"))
             sink.done()
 
-    async def _resolve_link_target(self, export_url: str) -> Union[BeautifulSoup, Literal["none"]]:
-        async def impl() -> Optional[Union[BeautifulSoup, Literal["none"]]]:
+    async def _resolve_link_target(self, export_url: str) -> BeautifulSoup | Literal["none"]:
+        async def impl() -> Optional[BeautifulSoup | Literal["none"]]:
             async with self.session.get(export_url, allow_redirects=False) as resp:
                 # No redirect means we were authenticated
                 if hdrs.LOCATION not in resp.headers:
@@ -658,7 +657,7 @@ instance's greatest bottleneck.
 
     def _previous_contained_opencast_videos(
         self, element: IliasPageElement, element_path: PurePath
-    ) -> List[PurePath]:
+    ) -> list[PurePath]:
         if not self.prev_report:
             return []
         custom_value = self.prev_report.get_custom_value(_get_video_cache_key(element))
@@ -714,7 +713,7 @@ instance's greatest bottleneck.
                 add_to_report([str(self._transformer.transform(dl.path))])
                 return
 
-        contained_video_paths: List[str] = []
+        contained_video_paths: list[str] = []
 
         for stream_element in stream_elements:
             video_path = dl.path.parent / stream_element.name
@@ -832,7 +831,7 @@ instance's greatest bottleneck.
 
             elements = parse_ilias_forum_export(soupify(export))
 
-        tasks: List[Awaitable[None]] = []
+        tasks: list[Awaitable[None]] = []
         for thread in elements:
             tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, thread, element.url)))
 
@@ -842,7 +841,7 @@ instance's greatest bottleneck.
     @anoncritical
     @_iorepeat(3, "saving forum thread")
     async def _download_forum_thread(
-        self, parent_path: PurePath, thread: Union[IliasForumThread, IliasPageElement], forum_url: str
+        self, parent_path: PurePath, thread: IliasForumThread | IliasPageElement, forum_url: str
     ) -> None:
         path = parent_path / (_sanitize_path_name(thread.name) + ".html")
         maybe_dl = await self.download(path, mtime=thread.mtime)
@@ -871,7 +870,7 @@ instance's greatest bottleneck.
     @_iorepeat(3, "crawling learning module")
     @anoncritical
     async def _crawl_learning_module(self, element: IliasPageElement, cl: CrawlToken) -> None:
-        elements: List[IliasLearningModulePage] = []
+        elements: list[IliasLearningModulePage] = []
 
         async with cl:
             log.explain_topic(f"Parsing initial HTML page for {fmt_path(cl.path)}")
@@ -891,7 +890,7 @@ instance's greatest bottleneck.
         for index, lm_element in enumerate(elements):
             lm_element.title = f"{index:02}_{lm_element.title}"
 
-        tasks: List[Awaitable[None]] = []
+        tasks: list[Awaitable[None]] = []
         for index, elem in enumerate(elements):
             prev_url = elements[index - 1].title if index > 0 else None
             next_url = elements[index + 1].title if index < len(elements) - 1 else None
@@ -906,10 +905,10 @@ instance's greatest bottleneck.
         self,
         path: PurePath,
         start_url: Optional[str],
-        dir: Union[Literal["left"], Literal["right"]],
+        dir: Literal["left"] | Literal["right"],
         parent_element: IliasPageElement,
-    ) -> List[IliasLearningModulePage]:
-        elements: List[IliasLearningModulePage] = []
+    ) -> list[IliasLearningModulePage]:
+        elements: list[IliasLearningModulePage] = []
 
         if not start_url:
             return elements
@@ -923,10 +922,7 @@ instance's greatest bottleneck.
             page = IliasPage(soup, parent_element)
             if next := page.get_learning_module_data():
                 elements.append(next)
-                if dir == "left":
-                    next_element_url = next.previous_url
-                else:
-                    next_element_url = next.next_url
+                next_element_url = next.previous_url if dir == "left" else next.next_url
             counter += 1
 
         return elements
@@ -950,16 +946,10 @@ instance's greatest bottleneck.
 
         if prev:
             prev_p = self._transformer.transform(parent_path / (_sanitize_path_name(prev) + ".html"))
-            if prev_p:
-                prev = cast(str, os.path.relpath(prev_p, my_path.parent))
-            else:
-                prev = None
+            prev = cast(str, os.path.relpath(prev_p, my_path.parent)) if prev_p else None
         if next:
             next_p = self._transformer.transform(parent_path / (_sanitize_path_name(next) + ".html"))
-            if next_p:
-                next = cast(str, os.path.relpath(next_p, my_path.parent))
-            else:
-                next = None
+            next = cast(str, os.path.relpath(next_p, my_path.parent)) if next_p else None
 
         async with maybe_dl as (bar, sink):
             content = element.content
@@ -973,14 +963,13 @@ instance's greatest bottleneck.
         """
         log.explain_topic("Internalizing images")
         for elem in tag.find_all(recursive=True):
-            if elem.name == "img":
-                if src := elem.attrs.get("src", None):
-                    url = urljoin(self._base_url, cast(str, src))
-                    if not url.startswith(self._base_url):
-                        continue
-                    log.explain(f"Internalizing {url!r}")
-                    img = await self._get_authenticated(url)
-                    elem.attrs["src"] = "data:;base64," + base64.b64encode(img).decode()
+            if elem.name == "img" and (src := elem.attrs.get("src", None)):
+                url = urljoin(self._base_url, cast(str, src))
+                if not url.startswith(self._base_url):
+                    continue
+                log.explain(f"Internalizing {url!r}")
+                img = await self._get_authenticated(url)
+                elem.attrs["src"] = "data:;base64," + base64.b64encode(img).decode()
             if elem.name == "iframe" and cast(str, elem.attrs.get("src", "")).startswith("//"):
                 # For unknown reasons the protocol seems to be stripped.
                 elem.attrs["src"] = "https:" + cast(str, elem.attrs["src"])
@@ -1025,7 +1014,7 @@ instance's greatest bottleneck.
             )
         return soup
 
-    async def _post(self, url: str, data: dict[str, Union[str, List[str]]]) -> bytes:
+    async def _post(self, url: str, data: dict[str, str | list[str]]) -> bytes:
         form_data = aiohttp.FormData()
         for key, val in data.items():
             form_data.add_field(key, val)
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 4abb350..d7f6f8d 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -1,9 +1,10 @@
 import json
 import re
+from collections.abc import Callable
 from dataclasses import dataclass
 from datetime import date, datetime, timedelta
 from enum import Enum
-from typing import Callable, Dict, Optional, Union, cast
+from typing import Optional, cast
 from urllib.parse import urljoin, urlparse
 
 from bs4 import BeautifulSoup, Tag
@@ -13,7 +14,7 @@ from PFERD.crawl.crawler import CrawlWarning
 from PFERD.logging import log
 from PFERD.utils import url_set_query_params
 
-TargetType = Union[str, int]
+TargetType = str | int
 
 
 class TypeMatcher:
@@ -308,7 +309,7 @@ class IliasPageElement:
         """
 
         # This checks whether we can reach a `:` without passing a `-`
-        if re.search(r"^[^-]+: ", meeting_name):
+        if re.search(r"^[^-]+: ", meeting_name):  # noqa: SIM108
             # Meeting name only contains date: "05. Jan 2000:"
             split_delimiter = ":"
         else:
@@ -331,7 +332,7 @@ class IliasPageElement:
 @dataclass
 class IliasDownloadForumData:
     url: str
-    form_data: Dict[str, Union[str, list[str]]]
+    form_data: dict[str, str | list[str]]
     empty: bool
 
 
@@ -433,21 +434,20 @@ class IliasPage:
         for p in paragraphs:
             if p.find_parent(class_=is_interesting_class):
                 continue
-            if "ilc_media_cont_MediaContainer" in p["class"]:
+            if "ilc_media_cont_MediaContainer" in p["class"] and (video := p.select_one("video")):
                 # We have an embedded video which should be downloaded by _find_mob_videos
-                if video := p.select_one("video"):
-                    url, title = self._find_mob_video_url_title(video, p)
-                    raw_html += '<div style="min-width: 100px; min-height: 100px; border: 1px solid black;'
-                    raw_html += "display: flex; justify-content: center; align-items: center;"
-                    raw_html += ' margin: 0.5rem;">'
-                    if url is not None and urlparse(url).hostname != urlparse(self._page_url).hostname:
-                        if url.startswith("//"):
-                            url = "https:" + url
-                        raw_html += f'<a href="{url}" target="_blank">External Video: {title}</a>'
-                    else:
-                        raw_html += f"Video elided. Filename: '{title}'."
-                    raw_html += "</div>\n"
-                    continue
+                url, title = self._find_mob_video_url_title(video, p)
+                raw_html += '<div style="min-width: 100px; min-height: 100px; border: 1px solid black;'
+                raw_html += "display: flex; justify-content: center; align-items: center;"
+                raw_html += ' margin: 0.5rem;">'
+                if url is not None and urlparse(url).hostname != urlparse(self._page_url).hostname:
+                    if url.startswith("//"):
+                        url = "https:" + url
+                    raw_html += f'<a href="{url}" target="_blank">External Video: {title}</a>'
+                else:
+                    raw_html += f"Video elided. Filename: '{title}'."
+                raw_html += "</div>\n"
+                continue
 
             # Ignore special listings (like folder groupings)
             if "ilc_section_Special" in p["class"]:
@@ -794,7 +794,7 @@ class IliasPage:
 
         is_paginated = self._soup.find(id=re.compile(r"tab_page_sel.+")) is not None
 
-        if is_paginated and not self._page_type == IliasElementType.OPENCAST_VIDEO_FOLDER:
+        if is_paginated and self._page_type != IliasElementType.OPENCAST_VIDEO_FOLDER:
             # We are in stage 2 - try to break pagination
             return self._find_opencast_video_entries_paginated()
 
@@ -1164,6 +1164,9 @@ class IliasPage:
         """
         found_titles = []
 
+        if None == "hey":
+            pass
+
         outer_accordion_content: Optional[Tag] = None
 
         parents: list[Tag] = list(tag.parents)
@@ -1302,10 +1305,7 @@ class IliasPage:
                 ),
             )
             caption_container = caption_parent.find_next_sibling("div")
-            if caption_container:
-                description = caption_container.get_text().strip()
-            else:
-                description = None
+            description = caption_container.get_text().strip() if caption_container else None
 
             if not typ:
                 _unexpected_html_warning()
@@ -1444,9 +1444,7 @@ class IliasPage:
             return True
         # The individual video player wrapper page has nothing of the above.
         # Match it by its playerContainer.
-        if soup.select_one("#playerContainer") is not None:
-            return True
-        return False
+        return soup.select_one("#playerContainer") is not None
 
     @staticmethod
     def _find_date_in_text(text: str) -> Optional[datetime]:
@@ -1505,11 +1503,11 @@ def demangle_date(date_str: str, fail_silently: bool = False) -> Optional[dateti
         # Normalize whitespace because users
         date_str = re.sub(r"\s+", " ", date_str)
 
-        date_str = re.sub("Gestern|Yesterday", _format_date_english(_yesterday()), date_str, re.I)
-        date_str = re.sub("Heute|Today", _format_date_english(date.today()), date_str, re.I)
-        date_str = re.sub("Morgen|Tomorrow", _format_date_english(_tomorrow()), date_str, re.I)
+        date_str = re.sub("Gestern|Yesterday", _format_date_english(_yesterday()), date_str, flags=re.I)
+        date_str = re.sub("Heute|Today", _format_date_english(date.today()), date_str, flags=re.I)
+        date_str = re.sub("Morgen|Tomorrow", _format_date_english(_tomorrow()), date_str, flags=re.I)
         date_str = date_str.strip()
-        for german, english in zip(german_months, english_months):
+        for german, english in zip(german_months, english_months, strict=True):
             date_str = date_str.replace(german, english)
             # Remove trailing dots for abbreviations, e.g. "20. Apr. 2020" -> "20. Apr 2020"
             date_str = date_str.replace(english + ".", english)
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index fc1d58f..5088e01 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -1,4 +1,4 @@
-from typing import Dict, Literal
+from typing import Literal
 
 from ...auth import Authenticator
 from ...config import Config
@@ -26,7 +26,7 @@ class KitIliasWebCrawler(IliasWebCrawler):
         name: str,
         section: KitIliasWebCrawlerSection,
         config: Config,
-        authenticators: Dict[str, Authenticator],
+        authenticators: dict[str, Authenticator],
     ):
         super().__init__(name, section, config, authenticators)
 
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index f47c969..165a661 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -1,9 +1,11 @@
 import os
 import re
+from collections.abc import Awaitable, Generator, Iterable
 from dataclasses import dataclass
 from datetime import datetime
 from pathlib import PurePath
-from typing import Any, Awaitable, Generator, Iterable, List, Optional, Pattern, Tuple, Union, cast
+from re import Pattern
+from typing import Any, Optional, Union, cast
 from urllib.parse import urljoin
 
 from bs4 import BeautifulSoup, Tag
@@ -44,7 +46,7 @@ class KitIpdFile:
 @dataclass
 class KitIpdFolder:
     name: str
-    entries: List[Union[KitIpdFile, "KitIpdFolder"]]
+    entries: list[Union[KitIpdFile, "KitIpdFolder"]]
 
     def explain(self) -> None:
         log.explain_topic(f"Folder {self.name!r}")
@@ -68,7 +70,7 @@ class KitIpdCrawler(HttpCrawler):
         if not maybe_cl:
             return
 
-        tasks: List[Awaitable[None]] = []
+        tasks: list[Awaitable[None]] = []
 
         async with maybe_cl:
             for item in await self._fetch_items():
@@ -120,9 +122,9 @@ class KitIpdCrawler(HttpCrawler):
         async with maybe_dl as (bar, sink):
             await self._stream_from_url(file.url, element_path, sink, bar)
 
-    async def _fetch_items(self) -> Iterable[Union[KitIpdFile, KitIpdFolder]]:
+    async def _fetch_items(self) -> Iterable[KitIpdFile | KitIpdFolder]:
         page, url = await self.get_page()
-        elements: List[Tag] = self._find_file_links(page)
+        elements: list[Tag] = self._find_file_links(page)
 
         # do not add unnecessary nesting for a single <h1> heading
         drop_h1: bool = len(page.find_all(name="h1")) <= 1
@@ -151,7 +153,7 @@ class KitIpdCrawler(HttpCrawler):
         name = os.path.basename(url)
         return KitIpdFile(name, url)
 
-    def _find_file_links(self, tag: Union[Tag, BeautifulSoup]) -> list[Tag]:
+    def _find_file_links(self, tag: Tag | BeautifulSoup) -> list[Tag]:
         return cast(list[Tag], tag.find_all(name="a", attrs={"href": self._file_regex}))
 
     def _abs_url_from_link(self, url: str, link_tag: Tag) -> str:
@@ -172,7 +174,7 @@ class KitIpdCrawler(HttpCrawler):
 
             self._add_etag_to_report(path, resp.headers.get("ETag"))
 
-    async def get_page(self) -> Tuple[BeautifulSoup, str]:
+    async def get_page(self) -> tuple[BeautifulSoup, str]:
         async with self.session.get(self._url) as request:
             # The web page for Algorithmen für Routenplanung contains some
             # weird comments that beautifulsoup doesn't parse correctly. This
diff --git a/PFERD/deduplicator.py b/PFERD/deduplicator.py
index c204726..18940c5 100644
--- a/PFERD/deduplicator.py
+++ b/PFERD/deduplicator.py
@@ -1,5 +1,5 @@
+from collections.abc import Iterator
 from pathlib import PurePath
-from typing import Iterator, Set
 
 from .logging import log
 from .utils import fmt_path
@@ -43,7 +43,7 @@ class Deduplicator:
     def __init__(self, windows_paths: bool) -> None:
         self._windows_paths = windows_paths
 
-        self._known: Set[PurePath] = set()
+        self._known: set[PurePath] = set()
 
     def _add(self, path: PurePath) -> None:
         self._known.add(path)
diff --git a/PFERD/limiter.py b/PFERD/limiter.py
index 49de0ed..01b4914 100644
--- a/PFERD/limiter.py
+++ b/PFERD/limiter.py
@@ -1,8 +1,9 @@
 import asyncio
 import time
+from collections.abc import AsyncIterator
 from contextlib import asynccontextmanager
 from dataclasses import dataclass
-from typing import AsyncIterator, Optional
+from typing import Optional
 
 
 @dataclass
diff --git a/PFERD/logging.py b/PFERD/logging.py
index e371494..a810aa9 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -1,8 +1,9 @@
 import asyncio
 import sys
 import traceback
+from collections.abc import AsyncIterator, Iterator
 from contextlib import AbstractContextManager, asynccontextmanager, contextmanager
-from typing import AsyncIterator, Iterator, List, Optional
+from typing import Optional
 
 from rich.console import Console, Group
 from rich.live import Live
@@ -60,7 +61,7 @@ class Log:
         self._showing_progress = False
         self._progress_suspended = False
         self._lock = asyncio.Lock()
-        self._lines: List[str] = []
+        self._lines: list[str] = []
 
         # Whether different parts of the output are enabled or disabled
         self.output_explain = False
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index c452c0f..159e1db 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -4,12 +4,13 @@ import os
 import random
 import shutil
 import string
-from contextlib import contextmanager
+from collections.abc import Iterator
+from contextlib import contextmanager, suppress
 from dataclasses import dataclass
 from datetime import datetime
 from enum import Enum
 from pathlib import Path, PurePath
-from typing import BinaryIO, Iterator, Optional, Tuple
+from typing import BinaryIO, Optional
 
 from .logging import log
 from .report import Report, ReportLoadError
@@ -35,7 +36,7 @@ class Redownload(Enum):
         try:
             return Redownload(string)
         except ValueError:
-            raise ValueError("must be one of 'never', 'never-smart', 'always', 'always-smart'")
+            raise ValueError("must be one of 'never', 'never-smart', 'always', 'always-smart'") from None
 
 
 class OnConflict(Enum):
@@ -53,7 +54,7 @@ class OnConflict(Enum):
             raise ValueError(
                 "must be one of 'prompt', 'local-first',"
                 " 'remote-first', 'no-delete', 'no-delete-prompt-overwrite'"
-            )
+            ) from None
 
 
 @dataclass
@@ -177,8 +178,8 @@ class OutputDirectory:
 
         try:
             self._root.mkdir(parents=True, exist_ok=True)
-        except OSError:
-            raise OutputDirError("Failed to create base directory")
+        except OSError as e:
+            raise OutputDirError("Failed to create base directory") from e
 
     def register_reserved(self, path: PurePath) -> None:
         self._report.mark_reserved(path)
@@ -358,7 +359,7 @@ class OutputDirectory:
     async def _create_tmp_file(
         self,
         local_path: Path,
-    ) -> Tuple[Path, BinaryIO]:
+    ) -> tuple[Path, BinaryIO]:
         """
         May raise an OutputDirError.
         """
@@ -509,10 +510,8 @@ class OutputDirectory:
             await self._cleanup(child, pure_child)
 
         if delete_self:
-            try:
+            with suppress(OSError):
                 path.rmdir()
-            except OSError:
-                pass
 
     async def _cleanup_file(self, path: Path, pure: PurePath) -> None:
         if self._report.is_marked(pure):
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index c805c13..1fe37d0 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -1,5 +1,5 @@
 from pathlib import Path, PurePath
-from typing import Dict, List, Optional
+from typing import Optional
 
 from rich.markup import escape
 
@@ -15,7 +15,7 @@ class PferdLoadError(Exception):
 
 
 class Pferd:
-    def __init__(self, config: Config, cli_crawlers: Optional[List[str]], cli_skips: Optional[List[str]]):
+    def __init__(self, config: Config, cli_crawlers: Optional[list[str]], cli_skips: Optional[list[str]]):
         """
         May throw PferdLoadError.
         """
@@ -23,10 +23,10 @@ class Pferd:
         self._config = config
         self._crawlers_to_run = self._find_crawlers_to_run(config, cli_crawlers, cli_skips)
 
-        self._authenticators: Dict[str, Authenticator] = {}
-        self._crawlers: Dict[str, Crawler] = {}
+        self._authenticators: dict[str, Authenticator] = {}
+        self._crawlers: dict[str, Crawler] = {}
 
-    def _find_config_crawlers(self, config: Config) -> List[str]:
+    def _find_config_crawlers(self, config: Config) -> list[str]:
         crawl_sections = []
 
         for name, section in config.crawl_sections():
@@ -37,7 +37,7 @@ class Pferd:
 
         return crawl_sections
 
-    def _find_cli_crawlers(self, config: Config, cli_crawlers: List[str]) -> List[str]:
+    def _find_cli_crawlers(self, config: Config, cli_crawlers: list[str]) -> list[str]:
         if len(cli_crawlers) != len(set(cli_crawlers)):
             raise PferdLoadError("Some crawlers were selected multiple times")
 
@@ -68,12 +68,12 @@ class Pferd:
     def _find_crawlers_to_run(
         self,
         config: Config,
-        cli_crawlers: Optional[List[str]],
-        cli_skips: Optional[List[str]],
-    ) -> List[str]:
+        cli_crawlers: Optional[list[str]],
+        cli_skips: Optional[list[str]],
+    ) -> list[str]:
         log.explain_topic("Deciding which crawlers to run")
 
-        crawlers: List[str]
+        crawlers: list[str]
         if cli_crawlers is None:
             log.explain("No crawlers specified on CLI")
             log.explain("Running crawlers specified in config")
@@ -104,7 +104,7 @@ class Pferd:
 
     def _load_crawlers(self) -> None:
         # Cookie sharing
-        kit_ilias_web_paths: Dict[Authenticator, List[Path]] = {}
+        kit_ilias_web_paths: dict[Authenticator, list[Path]] = {}
 
         for name, section in self._config.crawl_sections():
             log.print(f"[bold bright_cyan]Loading[/] {escape(name)}")
@@ -117,9 +117,8 @@ class Pferd:
             crawler = crawler_constructor(name, section, self._config, self._authenticators)
             self._crawlers[name] = crawler
 
-            if self._config.default_section.share_cookies():
-                if isinstance(crawler, KitIliasWebCrawler):
-                    crawler.share_cookies(kit_ilias_web_paths)
+            if self._config.default_section.share_cookies() and isinstance(crawler, KitIliasWebCrawler):
+                crawler.share_cookies(kit_ilias_web_paths)
 
     def debug_transforms(self) -> None:
         for name in self._crawlers_to_run:
diff --git a/PFERD/report.py b/PFERD/report.py
index 72e2727..5b37c1c 100644
--- a/PFERD/report.py
+++ b/PFERD/report.py
@@ -1,6 +1,6 @@
 import json
 from pathlib import Path, PurePath
-from typing import Any, Dict, List, Optional, Set
+from typing import Any, Optional
 
 
 class ReportLoadError(Exception):
@@ -42,32 +42,32 @@ class Report:
 
     def __init__(self) -> None:
         # Paths found by the crawler, untransformed
-        self.found_paths: Set[PurePath] = set()
+        self.found_paths: set[PurePath] = set()
 
         # Files reserved for metadata files (e. g. the report file or cookies)
         # that can't be overwritten by user transforms and won't be cleaned up
         # at the end.
-        self.reserved_files: Set[PurePath] = set()
+        self.reserved_files: set[PurePath] = set()
 
         # Files found by the crawler, transformed. Only includes files that
         # were downloaded (or a download was attempted)
-        self.known_files: Set[PurePath] = set()
+        self.known_files: set[PurePath] = set()
 
-        self.added_files: Set[PurePath] = set()
-        self.changed_files: Set[PurePath] = set()
-        self.deleted_files: Set[PurePath] = set()
+        self.added_files: set[PurePath] = set()
+        self.changed_files: set[PurePath] = set()
+        self.deleted_files: set[PurePath] = set()
         # Files that should have been deleted by the cleanup but weren't
-        self.not_deleted_files: Set[PurePath] = set()
+        self.not_deleted_files: set[PurePath] = set()
 
         # Custom crawler-specific data
-        self.custom: Dict[str, Any] = dict()
+        self.custom: dict[str, Any] = dict()
 
         # Encountered errors and warnings
-        self.encountered_warnings: List[str] = []
-        self.encountered_errors: List[str] = []
+        self.encountered_warnings: list[str] = []
+        self.encountered_errors: list[str] = []
 
     @staticmethod
-    def _get_list_of_strs(data: Dict[str, Any], key: str) -> List[str]:
+    def _get_list_of_strs(data: dict[str, Any], key: str) -> list[str]:
         result: Any = data.get(key, [])
 
         if not isinstance(result, list):
@@ -80,8 +80,8 @@ class Report:
         return result
 
     @staticmethod
-    def _get_str_dictionary(data: Dict[str, Any], key: str) -> Dict[str, Any]:
-        result: Dict[str, Any] = data.get(key, {})
+    def _get_str_dictionary(data: dict[str, Any], key: str) -> dict[str, Any]:
+        result: dict[str, Any] = data.get(key, {})
 
         if not isinstance(result, dict):
             raise ReportLoadError(f"Incorrect format: {key!r} is not a dictionary")
@@ -170,7 +170,7 @@ class Report:
         self.known_files.add(path)
 
     @property
-    def marked(self) -> Set[PurePath]:
+    def marked(self) -> set[PurePath]:
         return self.known_files | self.reserved_files
 
     def is_marked(self, path: PurePath) -> bool:
diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index 96b5ca7..2cfb28d 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -1,10 +1,12 @@
 import ast
+import contextlib
 import re
 from abc import ABC, abstractmethod
+from collections.abc import Callable, Sequence
 from dataclasses import dataclass
 from enum import Enum
 from pathlib import PurePath
-from typing import Callable, Dict, List, Optional, Sequence, TypeVar, Union
+from typing import Optional, TypeVar
 
 from .logging import log
 from .utils import fmt_path, str_path
@@ -23,7 +25,7 @@ class Empty:
     pass
 
 
-RightSide = Union[str, Ignore, Empty]
+RightSide = str | Ignore | Empty
 
 
 @dataclass
@@ -35,7 +37,7 @@ class Ignored:
     pass
 
 
-TransformResult = Optional[Union[Transformed, Ignored]]
+TransformResult = Transformed | Ignored | None
 
 
 @dataclass
@@ -47,7 +49,7 @@ class Rule:
     right: RightSide
     right_index: int
 
-    def right_result(self, path: PurePath) -> Union[str, Transformed, Ignored]:
+    def right_result(self, path: PurePath) -> str | Transformed | Ignored:
         if isinstance(self.right, str):
             return self.right
         elif isinstance(self.right, Ignore):
@@ -93,24 +95,20 @@ class ExactReTf(Transformation):
         # since elements of "match.groups()" can be None, mypy is wrong.
         groups: Sequence[Optional[str]] = [match[0]] + list(match.groups())
 
-        locals_dir: Dict[str, Union[str, int, float]] = {}
+        locals_dir: dict[str, str | int | float] = {}
         for i, group in enumerate(groups):
             if group is None:
                 continue
 
             locals_dir[f"g{i}"] = group
 
-            try:
+            with contextlib.suppress(ValueError):
                 locals_dir[f"i{i}"] = int(group)
-            except ValueError:
-                pass
 
-            try:
+            with contextlib.suppress(ValueError):
                 locals_dir[f"f{i}"] = float(group)
-            except ValueError:
-                pass
 
-        named_groups: Dict[str, str] = match.groupdict()
+        named_groups: dict[str, str] = match.groupdict()
         for name, capture in named_groups.items():
             locals_dir[name] = capture
 
@@ -228,7 +226,7 @@ class Line:
         self.expect(string)
         return value
 
-    def one_of(self, parsers: List[Callable[[], T]], description: str) -> T:
+    def one_of(self, parsers: list[Callable[[], T]], description: str) -> T:
         for parser in parsers:
             index = self.index
             try:
@@ -315,7 +313,7 @@ def parse_left(line: Line) -> str:
         return parse_str(line)
 
 
-def parse_right(line: Line) -> Union[str, Ignore]:
+def parse_right(line: Line) -> str | Ignore:
     c = line.peek()
     if c in QUOTATION_MARKS:
         return parse_quoted_str(line)
diff --git a/PFERD/utils.py b/PFERD/utils.py
index acd282e..2d01713 100644
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -3,10 +3,11 @@ import getpass
 import sys
 import threading
 from abc import ABC, abstractmethod
+from collections.abc import Callable
 from contextlib import AsyncExitStack
 from pathlib import Path, PurePath
 from types import TracebackType
-from typing import Any, Callable, Dict, Generic, Optional, Type, TypeVar
+from typing import Any, Generic, Optional, TypeVar
 from urllib.parse import parse_qs, urlencode, urlsplit, urlunsplit
 
 import bs4
@@ -79,7 +80,7 @@ def url_set_query_param(url: str, param: str, value: str) -> str:
     return urlunsplit((scheme, netloc, path, new_query_string, fragment))
 
 
-def url_set_query_params(url: str, params: Dict[str, str]) -> str:
+def url_set_query_params(url: str, params: dict[str, str]) -> str:
     """
     Sets multiple query parameters in an url, overwriting existing ones.
     """
@@ -132,7 +133,7 @@ class ReusableAsyncContextManager(ABC, Generic[T]):
 
     async def __aexit__(
         self,
-        exc_type: Optional[Type[BaseException]],
+        exc_type: Optional[type[BaseException]],
         exc_value: Optional[BaseException],
         traceback: Optional[TracebackType],
     ) -> Optional[bool]:

From 5646e933fdb66d0de531d0f0aa725b977ac13294 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 19 Oct 2025 15:46:04 +0200
Subject: [PATCH 215/224] Ignore reformat in git blame

---
 .git-blame-ignore-revs | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 .git-blame-ignore-revs

diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
new file mode 100644
index 0000000..27246bf
--- /dev/null
+++ b/.git-blame-ignore-revs
@@ -0,0 +1 @@
+2cf0e060ed126537dd993896b6aa793e2a6b9e80

From ebcfb2a2f360c1c265b78bd7562a4ab6fa6a40ad Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 19 Oct 2025 15:59:08 +0200
Subject: [PATCH 216/224] Fix some typing errors

It seems like the type hints have gotten better :)
---
 PFERD/auth/keyring.py                   |  4 +-
 PFERD/crawl/http_crawler.py             |  4 +-
 PFERD/crawl/ilias/file_templates.py     |  8 +--
 PFERD/crawl/ilias/ilias_html_cleaner.py |  2 +-
 PFERD/crawl/ilias/ilias_web_crawler.py  | 10 ++--
 PFERD/crawl/ilias/kit_ilias_html.py     | 73 +++++++++----------------
 PFERD/crawl/ilias/shibboleth_login.py   |  6 +-
 PFERD/logging.py                        |  4 +-
 PFERD/pferd.py                          |  5 +-
 PFERD/utils.py                          |  6 +-
 10 files changed, 51 insertions(+), 71 deletions(-)

diff --git a/PFERD/auth/keyring.py b/PFERD/auth/keyring.py
index e69a69e..414640a 100644
--- a/PFERD/auth/keyring.py
+++ b/PFERD/auth/keyring.py
@@ -1,4 +1,4 @@
-from typing import Optional, cast
+from typing import Optional
 
 import keyring
 
@@ -13,7 +13,7 @@ class KeyringAuthSection(AuthSection):
         return self.s.get("username")
 
     def keyring_name(self) -> str:
-        return cast(str, self.s.get("keyring_name", fallback=NAME))
+        return self.s.get("keyring_name", fallback=NAME)
 
 
 class KeyringAuthenticator(Authenticator):
diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index 830f537..70ec5c1 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -3,7 +3,7 @@ import http.cookies
 import ssl
 from datetime import datetime
 from pathlib import Path, PurePath
-from typing import Any, Optional, cast
+from typing import Any, Optional
 
 import aiohttp
 import certifi
@@ -187,7 +187,7 @@ class HttpCrawler(Crawler):
             if level == 0 or (level == 1 and drop_h1):
                 return PurePath()
 
-            level_heading = cast(Optional[Tag], tag.find_previous(name=f"h{level}"))
+            level_heading = tag.find_previous(name=f"h{level}")
 
             if level_heading is None:
                 return find_associated_headings(tag, level - 1)
diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index 37691b2..c832977 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -258,17 +258,17 @@ def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next
             soupify(nav_template.replace("{{left}}", left).replace("{{right}}", right).encode())
         )
 
-    body_str = cast(str, body.prettify())
+    body_str = body.prettify()
     return _learning_module_template.replace("{{body}}", body_str).replace("{{name}}", name)
 
 
 def forum_thread_template(name: str, url: str, heading: bs4.Tag, content: bs4.Tag) -> str:
-    if title := cast(Optional[bs4.Tag], heading.find(name="b")):
+    if title := heading.find(name="b"):
         title.wrap(bs4.Tag(name="a", attrs={"href": url}))
     return (
         _forum_thread_template.replace("{{name}}", name)
-        .replace("{{heading}}", cast(str, heading.prettify()))
-        .replace("{{content}}", cast(str, content.prettify()))
+        .replace("{{heading}}", heading.prettify())
+        .replace("{{content}}", content.prettify())
     )
 
 
diff --git a/PFERD/crawl/ilias/ilias_html_cleaner.py b/PFERD/crawl/ilias/ilias_html_cleaner.py
index 958860a..35a7ea0 100644
--- a/PFERD/crawl/ilias/ilias_html_cleaner.py
+++ b/PFERD/crawl/ilias/ilias_html_cleaner.py
@@ -86,7 +86,7 @@ def clean(soup: BeautifulSoup) -> BeautifulSoup:
 
     for block in cast(list[Tag], soup.find_all(class_="ilc_va_ihcap_VAccordIHeadCap")):
         block.name = "h3"
-        block["class"] += ["accordion-head"]
+        block["class"] += ["accordion-head"]  # type: ignore
 
     for dummy in soup.select(".ilc_text_block_Standard.ilc_Paragraph"):
         children = list(dummy.children)
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index b8212a4..12d8700 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -548,7 +548,7 @@ instance's greatest bottleneck.
 
     @staticmethod
     def _parse_link_content(element: IliasPageElement, content: BeautifulSoup) -> list[LinkData]:
-        links = cast(list[Tag], list(content.select("a")))
+        links = list(content.select("a"))
         if len(links) == 1:
             url = str(links[0].get("href")).strip()
             return [LinkData(name=element.name, description=element.description or "", url=url)]
@@ -598,7 +598,7 @@ instance's greatest bottleneck.
         async with dl as (_bar, sink):
             description = clean(insert_base_markup(description))
             description_tag = await self.internalize_images(description)
-            sink.file.write(cast(str, description_tag.prettify()).encode("utf-8"))
+            sink.file.write(description_tag.prettify().encode("utf-8"))
             sink.done()
 
     @anoncritical
@@ -946,10 +946,10 @@ instance's greatest bottleneck.
 
         if prev:
             prev_p = self._transformer.transform(parent_path / (_sanitize_path_name(prev) + ".html"))
-            prev = cast(str, os.path.relpath(prev_p, my_path.parent)) if prev_p else None
+            prev = os.path.relpath(prev_p, my_path.parent) if prev_p else None
         if next:
             next_p = self._transformer.transform(parent_path / (_sanitize_path_name(next) + ".html"))
-            next = cast(str, os.path.relpath(next_p, my_path.parent)) if next_p else None
+            next = os.path.relpath(next_p, my_path.parent) if next_p else None
 
         async with maybe_dl as (bar, sink):
             content = element.content
@@ -1052,7 +1052,7 @@ instance's greatest bottleneck.
             async with self.session.get(urljoin(self._base_url, "/login.php"), params=params) as request:
                 login_page = soupify(await request.read())
 
-            login_form = cast(Optional[Tag], login_page.find("form", attrs={"name": "login_form"}))
+            login_form = login_page.find("form", attrs={"name": "login_form"})
             if login_form is None:
                 raise CrawlError("Could not find the login form! Specified client id might be invalid.")
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index d7f6f8d..db965b0 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -401,11 +401,8 @@ class IliasPage:
         return self._find_normal_entries()
 
     def get_info_tab(self) -> Optional[IliasPageElement]:
-        tab: Optional[Tag] = cast(
-            Optional[Tag],
-            self._soup.find(
-                name="a", attrs={"href": lambda x: x is not None and "cmdClass=ilinfoscreengui" in x}
-            ),
+        tab: Optional[Tag] = self._soup.find(
+            name="a", attrs={"href": lambda x: x is not None and "cmdClass=ilinfoscreengui" in x}
         )
         if tab is not None:
             return IliasPageElement.create_new(
@@ -496,10 +493,7 @@ class IliasPage:
         base_url = re.sub(r"cmd=\w+", "cmd=post", base_url)
         base_url = re.sub(r"cmdClass=\w+", "cmdClass=ilExportGUI", base_url)
 
-        rtoken_form = cast(
-            Optional[Tag],
-            self._soup.find("form", attrs={"action": lambda x: x is not None and "rtoken=" in x}),
-        )
+        rtoken_form = self._soup.find("form", attrs={"action": lambda x: x is not None and "rtoken=" in x})
         if not rtoken_form:
             log.explain("Found no rtoken anywhere")
             return None
@@ -579,14 +573,9 @@ class IliasPage:
         return self._uncollapse_future_meetings_url() is not None
 
     def _uncollapse_future_meetings_url(self) -> Optional[IliasPageElement]:
-        element = cast(
-            Optional[Tag],
-            self._soup.find(
-                "a",
-                attrs={
-                    "href": lambda x: x is not None and ("crs_next_sess=1" in x or "crs_prev_sess=1" in x)
-                },
-            ),
+        element = self._soup.find(
+            "a",
+            attrs={"href": lambda x: x is not None and ("crs_next_sess=1" in x or "crs_prev_sess=1" in x)},
         )
         if not element:
             return None
@@ -614,16 +603,13 @@ class IliasPage:
         return "baseClass=ilmembershipoverviewgui" in self._page_url
 
     def _select_content_page_url(self) -> Optional[IliasPageElement]:
-        tab = cast(
-            Optional[Tag],
-            self._soup.find(
-                id="tab_view_content", attrs={"class": lambda x: x is not None and "active" not in x}
-            ),
+        tab = self._soup.find(
+            id="tab_view_content", attrs={"class": lambda x: x is not None and "active" not in x}
         )
         # Already selected (or not found)
         if not tab:
             return None
-        link = cast(Optional[Tag], tab.find("a"))
+        link = tab.find("a")
         if link:
             link_str = self._abs_url_from_link(link)
             return IliasPageElement.create_new(IliasElementType.FOLDER, link_str, "select content page")
@@ -670,11 +656,8 @@ class IliasPage:
     def _get_show_max_forum_entries_per_page_url(
         self, wanted_max: Optional[int] = None
     ) -> Optional[IliasPageElement]:
-        correct_link = cast(
-            Optional[Tag],
-            self._soup.find(
-                "a", attrs={"href": lambda x: x is not None and "trows=800" in x and "cmd=showThreads" in x}
-            ),
+        correct_link = self._soup.find(
+            "a", attrs={"href": lambda x: x is not None and "trows=800" in x and "cmd=showThreads" in x}
         )
 
         if not correct_link:
@@ -706,7 +689,7 @@ class IliasPage:
 
         titles: list[Tag] = self._soup.select("#block_pditems_0 .il-item-title")
         for title in titles:
-            link = cast(Optional[Tag], title.find("a"))
+            link = title.find("a")
 
             if not link:
                 log.explain(f"Skipping offline item: {title.get_text().strip()!r}")
@@ -720,7 +703,7 @@ class IliasPage:
                 continue
 
             typ = IliasPage._find_type_for_element(
-                name, url, lambda: IliasPage._find_icon_for_folder_entry(link)
+                name, url, lambda: IliasPage._find_icon_for_folder_entry(cast(Tag, link))
             )
             if not typ:
                 _unexpected_html_warning()
@@ -776,9 +759,7 @@ class IliasPage:
         #
         # We need to figure out where we are.
 
-        video_element_table = cast(
-            Optional[Tag], self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
-        )
+        video_element_table = self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
 
         if video_element_table is None:
             # We are in stage 1
@@ -801,7 +782,7 @@ class IliasPage:
         return self._find_opencast_video_entries_no_paging()
 
     def _find_opencast_video_entries_paginated(self) -> list[IliasPageElement]:
-        table_element = cast(Optional[Tag], self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+")))
+        table_element = self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
 
         if table_element is None:
             log.warn("Couldn't increase elements per page (table not found). I might miss elements.")
@@ -841,12 +822,10 @@ class IliasPage:
         # 6th or 7th child (1 indexed) is the modification time string. Try to find it
         # by parsing backwards from the end and finding something that looks like a date
         modification_time = None
-        row: Tag = link.parent.parent.parent
+        row: Tag = link.parent.parent.parent  # type: ignore
         column_count = len(row.select("td.std"))
         for index in range(column_count, 0, -1):
-            modification_string = (
-                link.parent.parent.parent.select_one(f"td.std:nth-child({index})").get_text().strip()
-            )
+            modification_string = cast(Tag, row.select_one(f"td.std:nth-child({index})")).get_text().strip()
             if match := re.search(r"\d+\.\d+.\d+ \d+:\d+", modification_string):
                 modification_time = datetime.strptime(match.group(0), "%d.%m.%Y %H:%M")
                 break
@@ -855,7 +834,7 @@ class IliasPage:
             log.warn(f"Could not determine upload time for {link}")
             modification_time = datetime.now()
 
-        title = link.parent.parent.parent.select_one("td.std:nth-child(3)").get_text().strip()
+        title = cast(Tag, row.select_one("td.std:nth-child(3)")).get_text().strip()
         title += ".mp4"
 
         video_name: str = _sanitize_path_name(title)
@@ -883,7 +862,7 @@ class IliasPage:
     def _find_exercise_entries_detail_page(self) -> list[IliasPageElement]:
         results: list[IliasPageElement] = []
 
-        if link := cast(Optional[Tag], self._soup.select_one("#tab_submission > a")):
+        if link := self._soup.select_one("#tab_submission > a"):
             results.append(
                 IliasPageElement.create_new(
                     IliasElementType.EXERCISE_FILES, self._abs_url_from_link(link), "Submission"
@@ -907,7 +886,7 @@ class IliasPage:
             parent_row: Tag = cast(
                 Tag, link.find_parent(attrs={"class": lambda x: x is not None and "row" in x})
             )
-            name_tag = cast(Optional[Tag], parent_row.find(name="div"))
+            name_tag = parent_row.find(name="div")
 
             if not name_tag:
                 log.warn("Could not find name tag for exercise entry")
@@ -961,7 +940,7 @@ class IliasPage:
     def _find_exercise_entries_root_page(self) -> list[IliasPageElement]:
         results: list[IliasPageElement] = []
 
-        content_tab = cast(Optional[Tag], self._soup.find(id="ilContentContainer"))
+        content_tab = self._soup.find(id="ilContentContainer")
         if not content_tab:
             log.warn("Could not find content tab in exercise overview page")
             _unexpected_html_warning()
@@ -1118,7 +1097,7 @@ class IliasPage:
         if url is None and video_element.get("src"):
             url = cast(Optional[str], video_element.get("src"))
 
-        fig_caption = cast(Optional[Tag], figure.select_one("figcaption"))
+        fig_caption = figure.select_one("figcaption")
         if fig_caption:
             title = cast(Tag, figure.select_one("figcaption")).get_text().strip() + ".mp4"
         elif url is not None:
@@ -1146,7 +1125,7 @@ class IliasPage:
 
             # We should not crawl files under meetings
             if "ilContainerListItemContentCB" in cast(str, parent.get("class")):
-                link: Tag = parent.parent.find("a")
+                link: Tag = cast(Tag, cast(Tag, parent.parent).find("a"))
                 typ = IliasPage._find_type_for_element(
                     "meeting",
                     self._abs_url_from_link(link),
@@ -1179,7 +1158,7 @@ class IliasPage:
 
             # This is for these weird JS-y blocks and custom item groups
             if "ilContainerItemsContainer" in cast(str, parent.get("class")):
-                data_store_url = parent.parent.get("data-store-url", "").lower()
+                data_store_url = cast(str, cast(Tag, parent.parent).get("data-store-url", "")).lower()
                 is_custom_item_group = (
                     "baseclass=ilcontainerblockpropertiesstoragegui" in data_store_url
                     and "cont_block_id=" in data_store_url
@@ -1417,7 +1396,7 @@ class IliasPage:
     def is_logged_in(ilias_soup: IliasSoup) -> bool:
         soup = ilias_soup.soup
         # Normal ILIAS pages
-        mainbar = cast(Optional[Tag], soup.find(class_="il-maincontrols-metabar"))
+        mainbar = soup.find(class_="il-maincontrols-metabar")
         if mainbar is not None:
             login_button = mainbar.find(attrs={"href": lambda x: x is not None and "login.php" in x})
             shib_login = soup.find(id="button_shib_login")
@@ -1561,7 +1540,7 @@ def parse_ilias_forum_export(forum_export: BeautifulSoup) -> list[IliasForumThre
     elements = []
     for p in forum_export.select("body > p"):
         title_tag = p
-        content_tag = cast(Optional[Tag], p.find_next_sibling("ul"))
+        content_tag = p.find_next_sibling("ul")
 
         title = cast(Tag, p.find("b")).text
         if ":" in title:
diff --git a/PFERD/crawl/ilias/shibboleth_login.py b/PFERD/crawl/ilias/shibboleth_login.py
index bdff4ea..bffb183 100644
--- a/PFERD/crawl/ilias/shibboleth_login.py
+++ b/PFERD/crawl/ilias/shibboleth_login.py
@@ -60,7 +60,7 @@ class ShibbolethLogin:
                 "fudis_web_authn_assertion_input": "",
             }
             if csrf_token_input := form.find("input", {"name": "csrf_token"}):
-                data["csrf_token"] = csrf_token_input["value"]
+                data["csrf_token"] = csrf_token_input["value"]  # type: ignore
             soup = await _post(sess, url, data)
 
             if soup.find(id="attributeRelease"):
@@ -79,7 +79,7 @@ class ShibbolethLogin:
         # (or clicking "Continue" if you have JS disabled)
         relay_state = cast(Tag, soup.find("input", {"name": "RelayState"}))
         saml_response = cast(Tag, soup.find("input", {"name": "SAMLResponse"}))
-        url = form = soup.find("form", {"method": "post"})["action"]
+        url = cast(str, cast(Tag, soup.find("form", {"method": "post"}))["action"])
         data = {  # using the info obtained in the while loop above
             "RelayState": cast(str, relay_state["value"]),
             "SAMLResponse": cast(str, saml_response["value"]),
@@ -108,7 +108,7 @@ class ShibbolethLogin:
             "fudis_otp_input": tfa_token,
         }
         if csrf_token_input := form.find("input", {"name": "csrf_token"}):
-            data["csrf_token"] = csrf_token_input["value"]
+            data["csrf_token"] = csrf_token_input["value"]  # type: ignore
         return await _post(session, url, data)
 
     @staticmethod
diff --git a/PFERD/logging.py b/PFERD/logging.py
index a810aa9..ac633ec 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -3,7 +3,7 @@ import sys
 import traceback
 from collections.abc import AsyncIterator, Iterator
 from contextlib import AbstractContextManager, asynccontextmanager, contextmanager
-from typing import Optional
+from typing import Any, Optional
 
 from rich.console import Console, Group
 from rich.live import Live
@@ -122,7 +122,7 @@ class Log:
         for line in self._lines:
             self.print(line)
 
-    def print(self, text: str) -> None:
+    def print(self, text: Any) -> None:
         """
         Print a normal message. Allows markup.
         """
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index 1fe37d0..9a6035f 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -160,9 +160,10 @@ class Pferd:
 
     def print_report(self) -> None:
         for name in self._crawlers_to_run:
-            crawler = self._crawlers.get(name)
-            if crawler is None:
+            crawlerOpt = self._crawlers.get(name)
+            if crawlerOpt is None:
                 continue  # Crawler failed to load
+            crawler = crawlerOpt
 
             log.report("")
             log.report(f"[bold bright_cyan]Report[/] for {escape(name)}")
diff --git a/PFERD/utils.py b/PFERD/utils.py
index 2d01713..918a9b6 100644
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -125,11 +125,11 @@ class ReusableAsyncContextManager(ABC, Generic[T]):
         # See https://stackoverflow.com/a/13075071
         try:
             result: T = await self._on_aenter()
-        except:  # noqa: E722 do not use bare 'except'
+            return result
+        except:
             if not await self.__aexit__(*sys.exc_info()):
                 raise
-
-        return result
+            raise
 
     async def __aexit__(
         self,

From c1c78673aa9fa046fdc80ff7fee72c58fc095ca7 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 19 Oct 2025 16:17:13 +0200
Subject: [PATCH 217/224] Switch to uv

---
 .github/workflows/build-and-release.yml |  12 +-
 DEV.md                                  |  21 +--
 pyproject.toml                          |   7 +
 scripts/build                           |   2 +-
 scripts/check                           |   4 +-
 scripts/format                          |   2 +-
 uv.lock                                 | 185 ++++++++++++++++++++++++
 7 files changed, 207 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/build-and-release.yml b/.github/workflows/build-and-release.yml
index 0117222..9cd962f 100644
--- a/.github/workflows/build-and-release.yml
+++ b/.github/workflows/build-and-release.yml
@@ -18,19 +18,13 @@ jobs:
     steps:
       - uses: actions/checkout@v4
 
-      - uses: actions/setup-python@v5
+      - name: Install uv
+        uses: astral-sh/setup-uv@v7
         with:
           python-version: ${{ matrix.python }}
 
       - name: Set up project
-        if: matrix.os != 'windows-latest'
-        run: ./scripts/setup
-
-      - name: Set up project on windows
-        if: matrix.os == 'windows-latest'
-        # For some reason, `pip install --upgrade pip` doesn't work on
-        # 'windows-latest'. The installed pip version works fine however.
-        run: ./scripts/setup --no-pip
+        run: uv sync
 
       - name: Run checks
         run: |
diff --git a/DEV.md b/DEV.md
index f577b93..8cc42c2 100644
--- a/DEV.md
+++ b/DEV.md
@@ -9,30 +9,25 @@ particular [this][ppug-1] and [this][ppug-2] guide).
 
 ## Setting up a dev environment
 
-The use of [venv][venv] is recommended. To initially set up a development
-environment, run these commands in the same directory as this file:
+The use of [venv][venv] and [uv][uv] is recommended. To initially set up a
+development environment, run these commands in the same directory as this file:
 
 ```
-$ python -m venv .venv
+$ uv sync
 $ . .venv/bin/activate
-$ ./scripts/setup
 ```
 
-The setup script installs a few required dependencies and tools. It also
-installs PFERD via `pip install --editable .`, which means that you can just run
-`pferd` as if it was installed normally. Since PFERD was installed with
-`--editable`, there is no need to re-run `pip install` when the source code is
-changed.
-
-If you get any errors because pip can't update itself, try running
-`./scripts/setup --no-pip` instead of `./scripts/setup`.
+This install all required dependencies and tools. It also installs PFERD as
+*editable*, which means that you can just run `pferd` as if it was installed
+normally. Since PFERD was installed with `--editable`, there is no need to
+re-run `uv sync` when the source code is changed.
 
 For more details, see [this part of the Python Tutorial][venv-tut] and
 [this section on "development mode"][ppug-dev].
 
 [venv]: <https://docs.python.org/3/library/venv.html> "venv - Creation of virtual environments"
 [venv-tut]: <https://docs.python.org/3/tutorial/venv.html> "12. Virtual Environments and Packages"
-[ppug-dev]: <https://packaging.python.org/guides/distributing-packages-using-setuptools/#working-in-development-mode> "Working in “development mode”"
+[uv]: <https://docs.astral.sh/uv/> "uv - An extremely fast Python package and project manager"
 
 ## Checking and formatting the code
 
diff --git a/pyproject.toml b/pyproject.toml
index 9d4460b..93251ce 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -53,3 +53,10 @@ warn_unused_ignores = true
 warn_unreachable = true
 show_error_context = true
 ignore_missing_imports = true
+
+[dependency-groups]
+dev = [
+    "mypy>=1.18.2",
+    "pyinstaller>=6.16.0",
+    "ruff>=0.14.1",
+]
diff --git a/scripts/build b/scripts/build
index 6f88655..65746c7 100755
--- a/scripts/build
+++ b/scripts/build
@@ -2,4 +2,4 @@
 
 set -e
 
-pyinstaller --onefile pferd.py
+uv run pyinstaller --onefile pferd.py
diff --git a/scripts/check b/scripts/check
index cce6a38..609c4df 100755
--- a/scripts/check
+++ b/scripts/check
@@ -2,5 +2,5 @@
 
 set -e
 
-mypy .
-ruff check
+uv run mypy .
+uv run ruff check
diff --git a/scripts/format b/scripts/format
index 38b10fd..6e814b5 100755
--- a/scripts/format
+++ b/scripts/format
@@ -2,4 +2,4 @@
 
 set -e
 
-ruff format
+uv run ruff format
diff --git a/uv.lock b/uv.lock
index eba384b..691ba1d 100644
--- a/uv.lock
+++ b/uv.lock
@@ -126,6 +126,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" },
 ]
 
+[[package]]
+name = "altgraph"
+version = "0.17.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/de/a8/7145824cf0b9e3c28046520480f207df47e927df83aa9555fb47f8505922/altgraph-0.17.4.tar.gz", hash = "sha256:1b5afbb98f6c4dcadb2e2ae6ab9fa994bbb8c1d75f4fa96d340f9437ae454406", size = 48418, upload-time = "2023-09-25T09:04:52.164Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4d/3f/3bc3f1d83f6e4a7fcb834d3720544ca597590425be5ba9db032b2bf322a2/altgraph-0.17.4-py2.py3-none-any.whl", hash = "sha256:642743b4750de17e655e6711601b077bc6598dbfa3ba5fa2b2a35ce12b508dff", size = 21212, upload-time = "2023-09-25T09:04:50.691Z" },
+]
+
 [[package]]
 name = "attrs"
 version = "25.4.0"
@@ -448,6 +457,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d3/32/da7f44bcb1105d3e88a0b74ebdca50c59121d2ddf71c9e34ba47df7f3a56/keyring-25.6.0-py3-none-any.whl", hash = "sha256:552a3f7af126ece7ed5c89753650eec89c7eaae8617d0aa4d9ad2b75111266bd", size = 39085, upload-time = "2024-12-25T15:26:44.377Z" },
 ]
 
+[[package]]
+name = "macholib"
+version = "1.16.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "altgraph" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/95/ee/af1a3842bdd5902ce133bd246eb7ffd4375c38642aeb5dc0ae3a0329dfa2/macholib-1.16.3.tar.gz", hash = "sha256:07ae9e15e8e4cd9a788013d81f5908b3609aa76f9b1421bae9c4d7606ec86a30", size = 59309, upload-time = "2023-09-25T09:10:16.155Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d1/5d/c059c180c84f7962db0aeae7c3b9303ed1d73d76f2bfbc32bc231c8be314/macholib-1.16.3-py2.py3-none-any.whl", hash = "sha256:0e315d7583d38b8c77e815b1ecbdbf504a8258d8b3e17b61165c6feb60d18f2c", size = 38094, upload-time = "2023-09-25T09:10:14.188Z" },
+]
+
 [[package]]
 name = "markdown-it-py"
 version = "4.0.0"
@@ -595,6 +616,80 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b7/da/7d22601b625e241d4f23ef1ebff8acfc60da633c9e7e7922e24d10f592b3/multidict-6.7.0-py3-none-any.whl", hash = "sha256:394fc5c42a333c9ffc3e421a4c85e08580d990e08b99f6bf35b4132114c5dcb3", size = 12317, upload-time = "2025-10-06T14:52:29.272Z" },
 ]
 
+[[package]]
+name = "mypy"
+version = "1.18.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mypy-extensions" },
+    { name = "pathspec" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c0/77/8f0d0001ffad290cef2f7f216f96c814866248a0b92a722365ed54648e7e/mypy-1.18.2.tar.gz", hash = "sha256:06a398102a5f203d7477b2923dda3634c36727fa5c237d8f859ef90c42a9924b", size = 3448846, upload-time = "2025-09-19T00:11:10.519Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/88/87/cafd3ae563f88f94eec33f35ff722d043e09832ea8530ef149ec1efbaf08/mypy-1.18.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:807d9315ab9d464125aa9fcf6d84fde6e1dc67da0b6f80e7405506b8ac72bc7f", size = 12731198, upload-time = "2025-09-19T00:09:44.857Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/e0/1e96c3d4266a06d4b0197ace5356d67d937d8358e2ee3ffac71faa843724/mypy-1.18.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:776bb00de1778caf4db739c6e83919c1d85a448f71979b6a0edd774ea8399341", size = 11817879, upload-time = "2025-09-19T00:09:47.131Z" },
+    { url = "https://files.pythonhosted.org/packages/72/ef/0c9ba89eb03453e76bdac5a78b08260a848c7bfc5d6603634774d9cd9525/mypy-1.18.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1379451880512ffce14505493bd9fe469e0697543717298242574882cf8cdb8d", size = 12427292, upload-time = "2025-09-19T00:10:22.472Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/52/ec4a061dd599eb8179d5411d99775bec2a20542505988f40fc2fee781068/mypy-1.18.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1331eb7fd110d60c24999893320967594ff84c38ac6d19e0a76c5fd809a84c86", size = 13163750, upload-time = "2025-09-19T00:09:51.472Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/5f/2cf2ceb3b36372d51568f2208c021870fe7834cf3186b653ac6446511839/mypy-1.18.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3ca30b50a51e7ba93b00422e486cbb124f1c56a535e20eff7b2d6ab72b3b2e37", size = 13351827, upload-time = "2025-09-19T00:09:58.311Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/7d/2697b930179e7277529eaaec1513f8de622818696857f689e4a5432e5e27/mypy-1.18.2-cp311-cp311-win_amd64.whl", hash = "sha256:664dc726e67fa54e14536f6e1224bcfce1d9e5ac02426d2326e2bb4e081d1ce8", size = 9757983, upload-time = "2025-09-19T00:10:09.071Z" },
+    { url = "https://files.pythonhosted.org/packages/07/06/dfdd2bc60c66611dd8335f463818514733bc763e4760dee289dcc33df709/mypy-1.18.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:33eca32dd124b29400c31d7cf784e795b050ace0e1f91b8dc035672725617e34", size = 12908273, upload-time = "2025-09-19T00:10:58.321Z" },
+    { url = "https://files.pythonhosted.org/packages/81/14/6a9de6d13a122d5608e1a04130724caf9170333ac5a924e10f670687d3eb/mypy-1.18.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a3c47adf30d65e89b2dcd2fa32f3aeb5e94ca970d2c15fcb25e297871c8e4764", size = 11920910, upload-time = "2025-09-19T00:10:20.043Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/a9/b29de53e42f18e8cc547e38daa9dfa132ffdc64f7250e353f5c8cdd44bee/mypy-1.18.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d6c838e831a062f5f29d11c9057c6009f60cb294fea33a98422688181fe2893", size = 12465585, upload-time = "2025-09-19T00:10:33.005Z" },
+    { url = "https://files.pythonhosted.org/packages/77/ae/6c3d2c7c61ff21f2bee938c917616c92ebf852f015fb55917fd6e2811db2/mypy-1.18.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01199871b6110a2ce984bde85acd481232d17413868c9807e95c1b0739a58914", size = 13348562, upload-time = "2025-09-19T00:10:11.51Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/31/aec68ab3b4aebdf8f36d191b0685d99faa899ab990753ca0fee60fb99511/mypy-1.18.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a2afc0fa0b0e91b4599ddfe0f91e2c26c2b5a5ab263737e998d6817874c5f7c8", size = 13533296, upload-time = "2025-09-19T00:10:06.568Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/83/abcb3ad9478fca3ebeb6a5358bb0b22c95ea42b43b7789c7fb1297ca44f4/mypy-1.18.2-cp312-cp312-win_amd64.whl", hash = "sha256:d8068d0afe682c7c4897c0f7ce84ea77f6de953262b12d07038f4d296d547074", size = 9828828, upload-time = "2025-09-19T00:10:28.203Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/04/7f462e6fbba87a72bc8097b93f6842499c428a6ff0c81dd46948d175afe8/mypy-1.18.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:07b8b0f580ca6d289e69209ec9d3911b4a26e5abfde32228a288eb79df129fcc", size = 12898728, upload-time = "2025-09-19T00:10:01.33Z" },
+    { url = "https://files.pythonhosted.org/packages/99/5b/61ed4efb64f1871b41fd0b82d29a64640f3516078f6c7905b68ab1ad8b13/mypy-1.18.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ed4482847168439651d3feee5833ccedbf6657e964572706a2adb1f7fa4dfe2e", size = 11910758, upload-time = "2025-09-19T00:10:42.607Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/46/d297d4b683cc89a6e4108c4250a6a6b717f5fa96e1a30a7944a6da44da35/mypy-1.18.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c3ad2afadd1e9fea5cf99a45a822346971ede8685cc581ed9cd4d42eaf940986", size = 12475342, upload-time = "2025-09-19T00:11:00.371Z" },
+    { url = "https://files.pythonhosted.org/packages/83/45/4798f4d00df13eae3bfdf726c9244bcb495ab5bd588c0eed93a2f2dd67f3/mypy-1.18.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a431a6f1ef14cf8c144c6b14793a23ec4eae3db28277c358136e79d7d062f62d", size = 13338709, upload-time = "2025-09-19T00:11:03.358Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/09/479f7358d9625172521a87a9271ddd2441e1dab16a09708f056e97007207/mypy-1.18.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7ab28cc197f1dd77a67e1c6f35cd1f8e8b73ed2217e4fc005f9e6a504e46e7ba", size = 13529806, upload-time = "2025-09-19T00:10:26.073Z" },
+    { url = "https://files.pythonhosted.org/packages/71/cf/ac0f2c7e9d0ea3c75cd99dff7aec1c9df4a1376537cb90e4c882267ee7e9/mypy-1.18.2-cp313-cp313-win_amd64.whl", hash = "sha256:0e2785a84b34a72ba55fb5daf079a1003a34c05b22238da94fcae2bbe46f3544", size = 9833262, upload-time = "2025-09-19T00:10:40.035Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/0c/7d5300883da16f0063ae53996358758b2a2df2a09c72a5061fa79a1f5006/mypy-1.18.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:62f0e1e988ad41c2a110edde6c398383a889d95b36b3e60bcf155f5164c4fdce", size = 12893775, upload-time = "2025-09-19T00:10:03.814Z" },
+    { url = "https://files.pythonhosted.org/packages/50/df/2cffbf25737bdb236f60c973edf62e3e7b4ee1c25b6878629e88e2cde967/mypy-1.18.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8795a039bab805ff0c1dfdb8cd3344642c2b99b8e439d057aba30850b8d3423d", size = 11936852, upload-time = "2025-09-19T00:10:51.631Z" },
+    { url = "https://files.pythonhosted.org/packages/be/50/34059de13dd269227fb4a03be1faee6e2a4b04a2051c82ac0a0b5a773c9a/mypy-1.18.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6ca1e64b24a700ab5ce10133f7ccd956a04715463d30498e64ea8715236f9c9c", size = 12480242, upload-time = "2025-09-19T00:11:07.955Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/11/040983fad5132d85914c874a2836252bbc57832065548885b5bb5b0d4359/mypy-1.18.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d924eef3795cc89fecf6bedc6ed32b33ac13e8321344f6ddbf8ee89f706c05cb", size = 13326683, upload-time = "2025-09-19T00:09:55.572Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/ba/89b2901dd77414dd7a8c8729985832a5735053be15b744c18e4586e506ef/mypy-1.18.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:20c02215a080e3a2be3aa50506c67242df1c151eaba0dcbc1e4e557922a26075", size = 13514749, upload-time = "2025-09-19T00:10:44.827Z" },
+    { url = "https://files.pythonhosted.org/packages/25/bc/cc98767cffd6b2928ba680f3e5bc969c4152bf7c2d83f92f5a504b92b0eb/mypy-1.18.2-cp314-cp314-win_amd64.whl", hash = "sha256:749b5f83198f1ca64345603118a6f01a4e99ad4bf9d103ddc5a3200cc4614adf", size = 9982959, upload-time = "2025-09-19T00:10:37.344Z" },
+    { url = "https://files.pythonhosted.org/packages/87/e3/be76d87158ebafa0309946c4a73831974d4d6ab4f4ef40c3b53a385a66fd/mypy-1.18.2-py3-none-any.whl", hash = "sha256:22a1748707dd62b58d2ae53562ffc4d7f8bcc727e8ac7cbc69c053ddc874d47e", size = 2352367, upload-time = "2025-09-19T00:10:15.489Z" },
+]
+
+[[package]]
+name = "mypy-extensions"
+version = "1.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
+]
+
+[[package]]
+name = "packaging"
+version = "25.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
+]
+
+[[package]]
+name = "pathspec"
+version = "0.12.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043, upload-time = "2023-12-10T22:30:45Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" },
+]
+
+[[package]]
+name = "pefile"
+version = "2023.2.7"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/78/c5/3b3c62223f72e2360737fd2a57c30e5b2adecd85e70276879609a7403334/pefile-2023.2.7.tar.gz", hash = "sha256:82e6114004b3d6911c77c3953e3838654b04511b8b66e8583db70c65998017dc", size = 74854, upload-time = "2023-02-07T12:23:55.958Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/55/26/d0ad8b448476d0a1e8d3ea5622dc77b916db84c6aa3cb1e1c0965af948fc/pefile-2023.2.7-py3-none-any.whl", hash = "sha256:da185cd2af68c08a6cd4481f7325ed600a88f6a813bad9dea07ab3ef73d8d8d6", size = 71791, upload-time = "2023-02-07T12:28:36.678Z" },
+]
+
 [[package]]
 name = "pferd"
 source = { editable = "." }
@@ -606,6 +701,13 @@ dependencies = [
     { name = "rich" },
 ]
 
+[package.dev-dependencies]
+dev = [
+    { name = "mypy" },
+    { name = "pyinstaller" },
+    { name = "ruff" },
+]
+
 [package.metadata]
 requires-dist = [
     { name = "aiohttp", specifier = ">=3.8.1" },
@@ -615,6 +717,13 @@ requires-dist = [
     { name = "rich", specifier = ">=11.0.0" },
 ]
 
+[package.metadata.requires-dev]
+dev = [
+    { name = "mypy", specifier = ">=1.18.2" },
+    { name = "pyinstaller", specifier = ">=6.16.0" },
+    { name = "ruff", specifier = ">=0.14.1" },
+]
+
 [[package]]
 name = "propcache"
 version = "0.4.1"
@@ -732,6 +841,47 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
 ]
 
+[[package]]
+name = "pyinstaller"
+version = "6.16.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "altgraph" },
+    { name = "macholib", marker = "sys_platform == 'darwin'" },
+    { name = "packaging" },
+    { name = "pefile", marker = "sys_platform == 'win32'" },
+    { name = "pyinstaller-hooks-contrib" },
+    { name = "pywin32-ctypes", marker = "sys_platform == 'win32'" },
+    { name = "setuptools" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/94/94/1f62e95e4a28b64cfbb5b922ef3046f968b47170d37a1e1a029f56ac9cb4/pyinstaller-6.16.0.tar.gz", hash = "sha256:53559fe1e041a234f2b4dcc3288ea8bdd57f7cad8a6644e422c27bb407f3edef", size = 4008473, upload-time = "2025-09-13T20:07:01.733Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7b/0a/c42ce6e5d3de287f2e9432a074fb209f1fb72a86a72f3903849fdb5e4829/pyinstaller-6.16.0-py3-none-macosx_10_13_universal2.whl", hash = "sha256:7fd1c785219a87ca747c21fa92f561b0d2926a7edc06d0a0fe37f3736e00bd7a", size = 1027899, upload-time = "2025-09-13T20:05:59.2Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/d0/f18fedde32835d5a758f464c75924e2154065625f09d5456c3c303527654/pyinstaller-6.16.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:b756ddb9007b8141c5476b553351f9d97559b8af5d07f9460869bfae02be26b0", size = 727990, upload-time = "2025-09-13T20:06:03.583Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/db/c8bb47514ce857b24bf9294cf1ff74844b6a489fa0ab4ef6f923288c4e38/pyinstaller-6.16.0-py3-none-manylinux2014_i686.whl", hash = "sha256:0a48f55b85ff60f83169e10050f2759019cf1d06773ad1c4da3a411cd8751058", size = 739238, upload-time = "2025-09-13T20:06:07.69Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/3e/451dc784a8fcca0fe9f9b6b802d58555364a95b60f253613a2c83fc6b023/pyinstaller-6.16.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:73ba72e04fcece92e32518bbb1e1fb5ac2892677943dfdff38e01a06e8742851", size = 737142, upload-time = "2025-09-13T20:06:11.732Z" },
+    { url = "https://files.pythonhosted.org/packages/71/37/2f457479ef8fa2821cdb448acee2421dfb19fbe908bf5499d1930c164084/pyinstaller-6.16.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:b1752488248f7899281b17ca3238eefb5410521291371a686a4f5830f29f52b3", size = 734133, upload-time = "2025-09-13T20:06:15.477Z" },
+    { url = "https://files.pythonhosted.org/packages/63/c4/0f7daac4d062a4d1ac2571d8a8b9b5d6812094fcd914d139af591ca5e1ba/pyinstaller-6.16.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:ba618a61627ee674d6d68e5de084ba17c707b59a4f2a856084b3999bdffbd3f0", size = 733817, upload-time = "2025-09-13T20:06:19.683Z" },
+    { url = "https://files.pythonhosted.org/packages/11/e4/b6127265b42bef883e8873d850becadf748bc5652e5a7029b059328f3c31/pyinstaller-6.16.0-py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:c8b7ef536711617e12fef4673806198872033fa06fa92326ad7fd1d84a9fa454", size = 732912, upload-time = "2025-09-13T20:06:23.46Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/00/c6663107bdf814b2916e71563beabd09f693c47712213bc228994cb2cc65/pyinstaller-6.16.0-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:d1ebf84d02c51fed19b82a8abb4df536923abd55bb684d694e1356e4ae2a0ce5", size = 732773, upload-time = "2025-09-13T20:06:27.352Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/14/cabe9bc5f60b95d2e70e7d045ab94b0015ff8f6c8b16e2142d3597e30749/pyinstaller-6.16.0-py3-none-win32.whl", hash = "sha256:6d5f8617f3650ff9ef893e2ab4ddbf3c0d23d0c602ef74b5df8fbef4607840c8", size = 1313878, upload-time = "2025-09-13T20:06:33.234Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/99/2005efbc297e7813c1d6f18484aa94a1a81ce87b6a5b497c563681f4c4ea/pyinstaller-6.16.0-py3-none-win_amd64.whl", hash = "sha256:bc10eb1a787f99fea613509f55b902fbd2d8b73ff5f51ff245ea29a481d97d41", size = 1374706, upload-time = "2025-09-13T20:06:39.95Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/f4/4dfcf69b86d60fcaae05a42bbff1616d48a91e71726e5ed795d773dae9b3/pyinstaller-6.16.0-py3-none-win_arm64.whl", hash = "sha256:d0af8a401de792c233c32c44b16d065ca9ab8262ee0c906835c12bdebc992a64", size = 1315923, upload-time = "2025-09-13T20:06:45.846Z" },
+]
+
+[[package]]
+name = "pyinstaller-hooks-contrib"
+version = "2025.9"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "packaging" },
+    { name = "setuptools" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/7d/83/be0f57c0b77b66c33c2283ebd4ea341022b5a743e97c5fb3bebab82b38b9/pyinstaller_hooks_contrib-2025.9.tar.gz", hash = "sha256:56e972bdaad4e9af767ed47d132362d162112260cbe488c9da7fee01f228a5a6", size = 165189, upload-time = "2025-09-24T11:21:35.113Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a2/26/23b4cfc77d7f808c69f59070e1e8293a579ec281a547c61562357160b346/pyinstaller_hooks_contrib-2025.9-py3-none-any.whl", hash = "sha256:ccbfaa49399ef6b18486a165810155e5a8d4c59b41f20dc5da81af7482aaf038", size = 444283, upload-time = "2025-09-24T11:21:33.67Z" },
+]
+
 [[package]]
 name = "pywin32-ctypes"
 version = "0.2.3"
@@ -754,6 +904,32 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/25/7a/b0178788f8dc6cafce37a212c99565fa1fe7872c70c6c9c1e1a372d9d88f/rich-14.2.0-py3-none-any.whl", hash = "sha256:76bc51fe2e57d2b1be1f96c524b890b816e334ab4c1e45888799bfaab0021edd", size = 243393, upload-time = "2025-10-09T14:16:51.245Z" },
 ]
 
+[[package]]
+name = "ruff"
+version = "0.14.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/9e/58/6ca66896635352812de66f71cdf9ff86b3a4f79071ca5730088c0cd0fc8d/ruff-0.14.1.tar.gz", hash = "sha256:1dd86253060c4772867c61791588627320abcb6ed1577a90ef432ee319729b69", size = 5513429, upload-time = "2025-10-16T18:05:41.766Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8d/39/9cc5ab181478d7a18adc1c1e051a84ee02bec94eb9bdfd35643d7c74ca31/ruff-0.14.1-py3-none-linux_armv6l.whl", hash = "sha256:083bfc1f30f4a391ae09c6f4f99d83074416b471775b59288956f5bc18e82f8b", size = 12445415, upload-time = "2025-10-16T18:04:48.227Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/2e/1226961855ccd697255988f5a2474890ac7c5863b080b15bd038df820818/ruff-0.14.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:f6fa757cd717f791009f7669fefb09121cc5f7d9bd0ef211371fad68c2b8b224", size = 12784267, upload-time = "2025-10-16T18:04:52.515Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/ea/fd9e95863124ed159cd0667ec98449ae461de94acda7101f1acb6066da00/ruff-0.14.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d6191903d39ac156921398e9c86b7354d15e3c93772e7dbf26c9fcae59ceccd5", size = 11781872, upload-time = "2025-10-16T18:04:55.396Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/5a/e890f7338ff537dba4589a5e02c51baa63020acfb7c8cbbaea4831562c96/ruff-0.14.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed04f0e04f7a4587244e5c9d7df50e6b5bf2705d75059f409a6421c593a35896", size = 12226558, upload-time = "2025-10-16T18:04:58.166Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/7a/8ab5c3377f5bf31e167b73651841217542bcc7aa1c19e83030835cc25204/ruff-0.14.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5c9e6cf6cd4acae0febbce29497accd3632fe2025c0c583c8b87e8dbdeae5f61", size = 12187898, upload-time = "2025-10-16T18:05:01.455Z" },
+    { url = "https://files.pythonhosted.org/packages/48/8d/ba7c33aa55406955fc124e62c8259791c3d42e3075a71710fdff9375134f/ruff-0.14.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a6fa2458527794ecdfbe45f654e42c61f2503a230545a91af839653a0a93dbc6", size = 12939168, upload-time = "2025-10-16T18:05:04.397Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/c2/70783f612b50f66d083380e68cbd1696739d88e9b4f6164230375532c637/ruff-0.14.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:39f1c392244e338b21d42ab29b8a6392a722c5090032eb49bb4d6defcdb34345", size = 14386942, upload-time = "2025-10-16T18:05:07.102Z" },
+    { url = "https://files.pythonhosted.org/packages/48/44/cd7abb9c776b66d332119d67f96acf15830d120f5b884598a36d9d3f4d83/ruff-0.14.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7382fa12a26cce1f95070ce450946bec357727aaa428983036362579eadcc5cf", size = 13990622, upload-time = "2025-10-16T18:05:09.882Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/56/4259b696db12ac152fe472764b4f78bbdd9b477afd9bc3a6d53c01300b37/ruff-0.14.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dd0bf2be3ae8521e1093a487c4aa3b455882f139787770698530d28ed3fbb37c", size = 13431143, upload-time = "2025-10-16T18:05:13.46Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/35/266a80d0eb97bd224b3265b9437bd89dde0dcf4faf299db1212e81824e7e/ruff-0.14.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cabcaa9ccf8089fb4fdb78d17cc0e28241520f50f4c2e88cb6261ed083d85151", size = 13132844, upload-time = "2025-10-16T18:05:16.1Z" },
+    { url = "https://files.pythonhosted.org/packages/65/6e/d31ce218acc11a8d91ef208e002a31acf315061a85132f94f3df7a252b18/ruff-0.14.1-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:747d583400f6125ec11a4c14d1c8474bf75d8b419ad22a111a537ec1a952d192", size = 13401241, upload-time = "2025-10-16T18:05:19.395Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/b5/dbc4221bf0b03774b3b2f0d47f39e848d30664157c15b965a14d890637d2/ruff-0.14.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:5a6e74c0efd78515a1d13acbfe6c90f0f5bd822aa56b4a6d43a9ffb2ae6e56cd", size = 12132476, upload-time = "2025-10-16T18:05:22.163Z" },
+    { url = "https://files.pythonhosted.org/packages/98/4b/ac99194e790ccd092d6a8b5f341f34b6e597d698e3077c032c502d75ea84/ruff-0.14.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:0ea6a864d2fb41a4b6d5b456ed164302a0d96f4daac630aeba829abfb059d020", size = 12139749, upload-time = "2025-10-16T18:05:25.162Z" },
+    { url = "https://files.pythonhosted.org/packages/47/26/7df917462c3bb5004e6fdfcc505a49e90bcd8a34c54a051953118c00b53a/ruff-0.14.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:0826b8764f94229604fa255918d1cc45e583e38c21c203248b0bfc9a0e930be5", size = 12544758, upload-time = "2025-10-16T18:05:28.018Z" },
+    { url = "https://files.pythonhosted.org/packages/64/d0/81e7f0648e9764ad9b51dd4be5e5dac3fcfff9602428ccbae288a39c2c22/ruff-0.14.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:cbc52160465913a1a3f424c81c62ac8096b6a491468e7d872cb9444a860bc33d", size = 13221811, upload-time = "2025-10-16T18:05:30.707Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/07/3c45562c67933cc35f6d5df4ca77dabbcd88fddaca0d6b8371693d29fd56/ruff-0.14.1-py3-none-win32.whl", hash = "sha256:e037ea374aaaff4103240ae79168c0945ae3d5ae8db190603de3b4012bd1def6", size = 12319467, upload-time = "2025-10-16T18:05:33.261Z" },
+    { url = "https://files.pythonhosted.org/packages/02/88/0ee4ca507d4aa05f67e292d2e5eb0b3e358fbcfe527554a2eda9ac422d6b/ruff-0.14.1-py3-none-win_amd64.whl", hash = "sha256:59d599cdff9c7f925a017f6f2c256c908b094e55967f93f2821b1439928746a1", size = 13401123, upload-time = "2025-10-16T18:05:35.984Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/81/4b6387be7014858d924b843530e1b2a8e531846807516e9bea2ee0936bf7/ruff-0.14.1-py3-none-win_arm64.whl", hash = "sha256:e3b443c4c9f16ae850906b8d0a707b2a4c16f8d2f0a7fe65c475c5886665ce44", size = 12436636, upload-time = "2025-10-16T18:05:38.995Z" },
+]
+
 [[package]]
 name = "secretstorage"
 version = "3.4.0"
@@ -767,6 +943,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/91/ff/2e2eed29e02c14a5cb6c57f09b2d5b40e65d6cc71f45b52e0be295ccbc2f/secretstorage-3.4.0-py3-none-any.whl", hash = "sha256:0e3b6265c2c63509fb7415717607e4b2c9ab767b7f344a57473b779ca13bd02e", size = 15272, upload-time = "2025-09-09T16:42:12.744Z" },
 ]
 
+[[package]]
+name = "setuptools"
+version = "80.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/18/5d/3bf57dcd21979b887f014ea83c24ae194cfcd12b9e0fda66b957c69d1fca/setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c", size = 1319958, upload-time = "2025-05-27T00:56:51.443Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload-time = "2025-05-27T00:56:49.664Z" },
+]
+
 [[package]]
 name = "soupsieve"
 version = "2.8"

From bb0d68da65605066cbc36593a8246e6c8898a09c Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 19 Oct 2025 16:28:41 +0200
Subject: [PATCH 218/224] Switch to pyright

---
 pyproject.toml | 12 +--------
 scripts/check  |  2 +-
 uv.lock        | 72 +++++++++++++-------------------------------------
 3 files changed, 21 insertions(+), 65 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 93251ce..96aa4a9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,19 +44,9 @@ ignore = [
   "B023"
 ]
 
-[tool.mypy]
-disallow_any_generics = true
-disallow_untyped_defs = true
-disallow_incomplete_defs = true
-no_implicit_optional = true
-warn_unused_ignores = true
-warn_unreachable = true
-show_error_context = true
-ignore_missing_imports = true
-
 [dependency-groups]
 dev = [
-    "mypy>=1.18.2",
     "pyinstaller>=6.16.0",
+    "pyright>=1.1.406",
     "ruff>=0.14.1",
 ]
diff --git a/scripts/check b/scripts/check
index 609c4df..0552f07 100755
--- a/scripts/check
+++ b/scripts/check
@@ -2,5 +2,5 @@
 
 set -e
 
-uv run mypy .
+uv run pyright .
 uv run ruff check
diff --git a/uv.lock b/uv.lock
index 691ba1d..9c2a58e 100644
--- a/uv.lock
+++ b/uv.lock
@@ -617,50 +617,12 @@ wheels = [
 ]
 
 [[package]]
-name = "mypy"
-version = "1.18.2"
+name = "nodeenv"
+version = "1.9.1"
 source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "mypy-extensions" },
-    { name = "pathspec" },
-    { name = "typing-extensions" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/c0/77/8f0d0001ffad290cef2f7f216f96c814866248a0b92a722365ed54648e7e/mypy-1.18.2.tar.gz", hash = "sha256:06a398102a5f203d7477b2923dda3634c36727fa5c237d8f859ef90c42a9924b", size = 3448846, upload-time = "2025-09-19T00:11:10.519Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/43/16/fc88b08840de0e0a72a2f9d8c6bae36be573e475a6326ae854bcc549fc45/nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f", size = 47437, upload-time = "2024-06-04T18:44:11.171Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/88/87/cafd3ae563f88f94eec33f35ff722d043e09832ea8530ef149ec1efbaf08/mypy-1.18.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:807d9315ab9d464125aa9fcf6d84fde6e1dc67da0b6f80e7405506b8ac72bc7f", size = 12731198, upload-time = "2025-09-19T00:09:44.857Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/e0/1e96c3d4266a06d4b0197ace5356d67d937d8358e2ee3ffac71faa843724/mypy-1.18.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:776bb00de1778caf4db739c6e83919c1d85a448f71979b6a0edd774ea8399341", size = 11817879, upload-time = "2025-09-19T00:09:47.131Z" },
-    { url = "https://files.pythonhosted.org/packages/72/ef/0c9ba89eb03453e76bdac5a78b08260a848c7bfc5d6603634774d9cd9525/mypy-1.18.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1379451880512ffce14505493bd9fe469e0697543717298242574882cf8cdb8d", size = 12427292, upload-time = "2025-09-19T00:10:22.472Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/52/ec4a061dd599eb8179d5411d99775bec2a20542505988f40fc2fee781068/mypy-1.18.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1331eb7fd110d60c24999893320967594ff84c38ac6d19e0a76c5fd809a84c86", size = 13163750, upload-time = "2025-09-19T00:09:51.472Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/5f/2cf2ceb3b36372d51568f2208c021870fe7834cf3186b653ac6446511839/mypy-1.18.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3ca30b50a51e7ba93b00422e486cbb124f1c56a535e20eff7b2d6ab72b3b2e37", size = 13351827, upload-time = "2025-09-19T00:09:58.311Z" },
-    { url = "https://files.pythonhosted.org/packages/c8/7d/2697b930179e7277529eaaec1513f8de622818696857f689e4a5432e5e27/mypy-1.18.2-cp311-cp311-win_amd64.whl", hash = "sha256:664dc726e67fa54e14536f6e1224bcfce1d9e5ac02426d2326e2bb4e081d1ce8", size = 9757983, upload-time = "2025-09-19T00:10:09.071Z" },
-    { url = "https://files.pythonhosted.org/packages/07/06/dfdd2bc60c66611dd8335f463818514733bc763e4760dee289dcc33df709/mypy-1.18.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:33eca32dd124b29400c31d7cf784e795b050ace0e1f91b8dc035672725617e34", size = 12908273, upload-time = "2025-09-19T00:10:58.321Z" },
-    { url = "https://files.pythonhosted.org/packages/81/14/6a9de6d13a122d5608e1a04130724caf9170333ac5a924e10f670687d3eb/mypy-1.18.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a3c47adf30d65e89b2dcd2fa32f3aeb5e94ca970d2c15fcb25e297871c8e4764", size = 11920910, upload-time = "2025-09-19T00:10:20.043Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/a9/b29de53e42f18e8cc547e38daa9dfa132ffdc64f7250e353f5c8cdd44bee/mypy-1.18.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d6c838e831a062f5f29d11c9057c6009f60cb294fea33a98422688181fe2893", size = 12465585, upload-time = "2025-09-19T00:10:33.005Z" },
-    { url = "https://files.pythonhosted.org/packages/77/ae/6c3d2c7c61ff21f2bee938c917616c92ebf852f015fb55917fd6e2811db2/mypy-1.18.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01199871b6110a2ce984bde85acd481232d17413868c9807e95c1b0739a58914", size = 13348562, upload-time = "2025-09-19T00:10:11.51Z" },
-    { url = "https://files.pythonhosted.org/packages/4d/31/aec68ab3b4aebdf8f36d191b0685d99faa899ab990753ca0fee60fb99511/mypy-1.18.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a2afc0fa0b0e91b4599ddfe0f91e2c26c2b5a5ab263737e998d6817874c5f7c8", size = 13533296, upload-time = "2025-09-19T00:10:06.568Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/83/abcb3ad9478fca3ebeb6a5358bb0b22c95ea42b43b7789c7fb1297ca44f4/mypy-1.18.2-cp312-cp312-win_amd64.whl", hash = "sha256:d8068d0afe682c7c4897c0f7ce84ea77f6de953262b12d07038f4d296d547074", size = 9828828, upload-time = "2025-09-19T00:10:28.203Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/04/7f462e6fbba87a72bc8097b93f6842499c428a6ff0c81dd46948d175afe8/mypy-1.18.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:07b8b0f580ca6d289e69209ec9d3911b4a26e5abfde32228a288eb79df129fcc", size = 12898728, upload-time = "2025-09-19T00:10:01.33Z" },
-    { url = "https://files.pythonhosted.org/packages/99/5b/61ed4efb64f1871b41fd0b82d29a64640f3516078f6c7905b68ab1ad8b13/mypy-1.18.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ed4482847168439651d3feee5833ccedbf6657e964572706a2adb1f7fa4dfe2e", size = 11910758, upload-time = "2025-09-19T00:10:42.607Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/46/d297d4b683cc89a6e4108c4250a6a6b717f5fa96e1a30a7944a6da44da35/mypy-1.18.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c3ad2afadd1e9fea5cf99a45a822346971ede8685cc581ed9cd4d42eaf940986", size = 12475342, upload-time = "2025-09-19T00:11:00.371Z" },
-    { url = "https://files.pythonhosted.org/packages/83/45/4798f4d00df13eae3bfdf726c9244bcb495ab5bd588c0eed93a2f2dd67f3/mypy-1.18.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a431a6f1ef14cf8c144c6b14793a23ec4eae3db28277c358136e79d7d062f62d", size = 13338709, upload-time = "2025-09-19T00:11:03.358Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/09/479f7358d9625172521a87a9271ddd2441e1dab16a09708f056e97007207/mypy-1.18.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7ab28cc197f1dd77a67e1c6f35cd1f8e8b73ed2217e4fc005f9e6a504e46e7ba", size = 13529806, upload-time = "2025-09-19T00:10:26.073Z" },
-    { url = "https://files.pythonhosted.org/packages/71/cf/ac0f2c7e9d0ea3c75cd99dff7aec1c9df4a1376537cb90e4c882267ee7e9/mypy-1.18.2-cp313-cp313-win_amd64.whl", hash = "sha256:0e2785a84b34a72ba55fb5daf079a1003a34c05b22238da94fcae2bbe46f3544", size = 9833262, upload-time = "2025-09-19T00:10:40.035Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/0c/7d5300883da16f0063ae53996358758b2a2df2a09c72a5061fa79a1f5006/mypy-1.18.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:62f0e1e988ad41c2a110edde6c398383a889d95b36b3e60bcf155f5164c4fdce", size = 12893775, upload-time = "2025-09-19T00:10:03.814Z" },
-    { url = "https://files.pythonhosted.org/packages/50/df/2cffbf25737bdb236f60c973edf62e3e7b4ee1c25b6878629e88e2cde967/mypy-1.18.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8795a039bab805ff0c1dfdb8cd3344642c2b99b8e439d057aba30850b8d3423d", size = 11936852, upload-time = "2025-09-19T00:10:51.631Z" },
-    { url = "https://files.pythonhosted.org/packages/be/50/34059de13dd269227fb4a03be1faee6e2a4b04a2051c82ac0a0b5a773c9a/mypy-1.18.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6ca1e64b24a700ab5ce10133f7ccd956a04715463d30498e64ea8715236f9c9c", size = 12480242, upload-time = "2025-09-19T00:11:07.955Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/11/040983fad5132d85914c874a2836252bbc57832065548885b5bb5b0d4359/mypy-1.18.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d924eef3795cc89fecf6bedc6ed32b33ac13e8321344f6ddbf8ee89f706c05cb", size = 13326683, upload-time = "2025-09-19T00:09:55.572Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/ba/89b2901dd77414dd7a8c8729985832a5735053be15b744c18e4586e506ef/mypy-1.18.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:20c02215a080e3a2be3aa50506c67242df1c151eaba0dcbc1e4e557922a26075", size = 13514749, upload-time = "2025-09-19T00:10:44.827Z" },
-    { url = "https://files.pythonhosted.org/packages/25/bc/cc98767cffd6b2928ba680f3e5bc969c4152bf7c2d83f92f5a504b92b0eb/mypy-1.18.2-cp314-cp314-win_amd64.whl", hash = "sha256:749b5f83198f1ca64345603118a6f01a4e99ad4bf9d103ddc5a3200cc4614adf", size = 9982959, upload-time = "2025-09-19T00:10:37.344Z" },
-    { url = "https://files.pythonhosted.org/packages/87/e3/be76d87158ebafa0309946c4a73831974d4d6ab4f4ef40c3b53a385a66fd/mypy-1.18.2-py3-none-any.whl", hash = "sha256:22a1748707dd62b58d2ae53562ffc4d7f8bcc727e8ac7cbc69c053ddc874d47e", size = 2352367, upload-time = "2025-09-19T00:10:15.489Z" },
-]
-
-[[package]]
-name = "mypy-extensions"
-version = "1.1.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/1d/1b658dbd2b9fa9c4c9f32accbfc0205d532c8c6194dc0f2a4c0428e7128a/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9", size = 22314, upload-time = "2024-06-04T18:44:08.352Z" },
 ]
 
 [[package]]
@@ -672,15 +634,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
 ]
 
-[[package]]
-name = "pathspec"
-version = "0.12.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043, upload-time = "2023-12-10T22:30:45Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" },
-]
-
 [[package]]
 name = "pefile"
 version = "2023.2.7"
@@ -703,8 +656,8 @@ dependencies = [
 
 [package.dev-dependencies]
 dev = [
-    { name = "mypy" },
     { name = "pyinstaller" },
+    { name = "pyright" },
     { name = "ruff" },
 ]
 
@@ -719,8 +672,8 @@ requires-dist = [
 
 [package.metadata.requires-dev]
 dev = [
-    { name = "mypy", specifier = ">=1.18.2" },
     { name = "pyinstaller", specifier = ">=6.16.0" },
+    { name = "pyright", specifier = ">=1.1.406" },
     { name = "ruff", specifier = ">=0.14.1" },
 ]
 
@@ -882,6 +835,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a2/26/23b4cfc77d7f808c69f59070e1e8293a579ec281a547c61562357160b346/pyinstaller_hooks_contrib-2025.9-py3-none-any.whl", hash = "sha256:ccbfaa49399ef6b18486a165810155e5a8d4c59b41f20dc5da81af7482aaf038", size = 444283, upload-time = "2025-09-24T11:21:33.67Z" },
 ]
 
+[[package]]
+name = "pyright"
+version = "1.1.406"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nodeenv" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f7/16/6b4fbdd1fef59a0292cbb99f790b44983e390321eccbc5921b4d161da5d1/pyright-1.1.406.tar.gz", hash = "sha256:c4872bc58c9643dac09e8a2e74d472c62036910b3bd37a32813989ef7576ea2c", size = 4113151, upload-time = "2025-10-02T01:04:45.488Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f6/a2/e309afbb459f50507103793aaef85ca4348b66814c86bc73908bdeb66d12/pyright-1.1.406-py3-none-any.whl", hash = "sha256:1d81fb43c2407bf566e97e57abb01c811973fdb21b2df8df59f870f688bdca71", size = 5980982, upload-time = "2025-10-02T01:04:43.137Z" },
+]
+
 [[package]]
 name = "pywin32-ctypes"
 version = "0.2.3"

From 1e56976b9f58a6a0dcae56ed16e99c8fbc2f4644 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 19 Oct 2025 16:31:16 +0200
Subject: [PATCH 219/224] Update nix flake.lock

---
 flake.lock | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/flake.lock b/flake.lock
index 8d211fc..ae603f1 100644
--- a/flake.lock
+++ b/flake.lock
@@ -2,11 +2,11 @@
   "nodes": {
     "nixpkgs": {
       "locked": {
-        "lastModified": 1751211869,
-        "narHash": "sha256-1Cu92i1KSPbhPCKxoiVG5qnoRiKTgR5CcGSRyLpOd7Y=",
+        "lastModified": 1760725957,
+        "narHash": "sha256-tdoIhL/NlER290HfSjOkgi4jfmjeqmqrzgnmiMtGepE=",
         "owner": "NixOS",
         "repo": "nixpkgs",
-        "rev": "b43c397f6c213918d6cfe6e3550abfe79b5d1c51",
+        "rev": "81b927b14b7b3988334d5282ef9cba802e193fe1",
         "type": "github"
       },
       "original": {

From 6353571eb4c1812fd3f4a06fb3d5812b42676095 Mon Sep 17 00:00:00 2001
From: randomNumber101 <m.khal@outlook.de>
Date: Sat, 18 Oct 2025 17:46:37 +0200
Subject: [PATCH 220/224] Added Ilias configuration for HHU Duesseldorf to
 Congig file

Closes #125
---
 CONFIG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CONFIG.md b/CONFIG.md
index 201ddde..4bf082f 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -166,6 +166,7 @@ out of the box for the corresponding universities:
 | University      | `base_url`                              | `login_type` | `client_id`   |
 |-----------------|-----------------------------------------|--------------|---------------|
 | FH Aachen       | https://www.ili.fh-aachen.de            | local        | elearning     |
+| HHU Düsseldorf  | https://ilias.hhu.de                    | local        | UniRZ         |
 | Uni Köln        | https://www.ilias.uni-koeln.de/ilias    | local        | uk            |
 | Uni Konstanz    | https://ilias.uni-konstanz.de           | local        | ILIASKONSTANZ |
 | Uni Stuttgart   | https://ilias3.uni-stuttgart.de         | local        | Uni_Stuttgart |

From bd7b384e8f25674755c8235158117fd43a30e60f Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 27 Oct 2025 12:43:51 +0100
Subject: [PATCH 221/224] Manually set event loop on windows

The behaviour of get_event_loop changed in 3.14 and no longer creates
one. Instead, it will crash.
---
 CHANGELOG.md      | 3 +++
 PFERD/__main__.py | 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7da225b..4fef0e1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,9 @@ ambiguous situations.
 ## Added
 - Store the description when using the `internet-shortcut` link format
 
+## Fixed
+- Event loop errors on Windows with Python 3.14
+
 ## 3.8.3 - 2025-07-01
 
 ## Added
diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index cb8c67c..2de9dbc 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -133,7 +133,8 @@ def main() -> None:
             # https://bugs.python.org/issue39232
             # https://github.com/encode/httpx/issues/914#issuecomment-780023632
             # TODO Fix this properly
-            loop = asyncio.get_event_loop()
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
             loop.run_until_complete(pferd.run(args.debug_transforms))
             loop.run_until_complete(asyncio.sleep(1))
             loop.close()

From 3453bbc99135f2c7af236f82c40f304ad1ab6148 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 29 Oct 2025 13:02:18 +0100
Subject: [PATCH 222/224] Add basic auth to KIT-IPD crawler

---
 CHANGELOG.md                   |  1 +
 CONFIG.md                      |  1 +
 PFERD/cli/command_kit_ipd.py   | 11 +++++++++++
 PFERD/crawl/__init__.py        |  2 +-
 PFERD/crawl/kit_ipd_crawler.py | 27 +++++++++++++++++++++++++--
 5 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4fef0e1..729299e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ ambiguous situations.
 
 ## Added
 - Store the description when using the `internet-shortcut` link format
+- Support for basic auth with the kit-ipd crawler
 
 ## Fixed
 - Event loop errors on Windows with Python 3.14
diff --git a/CONFIG.md b/CONFIG.md
index 4bf082f..b87f75c 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -153,6 +153,7 @@ requests is likely a good idea.
 - `link_regex`: A regex that is matched against the `href` part of links. If it
   matches, the given link is downloaded as a file. This is used to extract
   files from KIT-IPD pages. (Default: `^.*?[^/]+\.(pdf|zip|c|cpp|java)$`)
+- `auth`: Name of auth section to use for basic authentication. (Optional)
 
 ### The `ilias-web` crawler
 
diff --git a/PFERD/cli/command_kit_ipd.py b/PFERD/cli/command_kit_ipd.py
index 589d9a3..a80af03 100644
--- a/PFERD/cli/command_kit_ipd.py
+++ b/PFERD/cli/command_kit_ipd.py
@@ -20,6 +20,11 @@ GROUP.add_argument(
     metavar="REGEX",
     help="href-matching regex to identify downloadable files",
 )
+GROUP.add_argument(
+    "--basic-auth",
+    action="store_true",
+    help="enable basic authentication",
+)
 GROUP.add_argument(
     "target",
     type=str,
@@ -50,5 +55,11 @@ def load(
     if args.link_regex:
         section["link_regex"] = str(args.link_regex)
 
+    if args.basic_auth:
+        section["auth"] = "auth:kit-ipd"
+        parser["auth:kit-ipd"] = {}
+        auth_section = parser["auth:kit-ipd"]
+        auth_section["type"] = "simple"
+
 
 SUBPARSER.set_defaults(command=load)
diff --git a/PFERD/crawl/__init__.py b/PFERD/crawl/__init__.py
index 6032c97..9ba6a37 100644
--- a/PFERD/crawl/__init__.py
+++ b/PFERD/crawl/__init__.py
@@ -22,5 +22,5 @@ CRAWLERS: dict[str, CrawlerConstructor] = {
     "local": lambda n, s, c, a: LocalCrawler(n, LocalCrawlerSection(s), c),
     "ilias-web": lambda n, s, c, a: IliasWebCrawler(n, IliasWebCrawlerSection(s), c, a),
     "kit-ilias-web": lambda n, s, c, a: KitIliasWebCrawler(n, KitIliasWebCrawlerSection(s), c, a),
-    "kit-ipd": lambda n, s, c, a: KitIpdCrawler(n, KitIpdCrawlerSection(s), c),
+    "kit-ipd": lambda n, s, c, a: KitIpdCrawler(n, KitIpdCrawlerSection(s), c, a),
 }
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index 165a661..4dad8f0 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -8,8 +8,10 @@ from re import Pattern
 from typing import Any, Optional, Union, cast
 from urllib.parse import urljoin
 
+import aiohttp
 from bs4 import BeautifulSoup, Tag
 
+from ..auth import Authenticator
 from ..config import Config
 from ..logging import ProgressBar, log
 from ..output_dir import FileSink
@@ -33,6 +35,15 @@ class KitIpdCrawlerSection(HttpCrawlerSection):
         regex = self.s.get("link_regex", r"^.*?[^/]+\.(pdf|zip|c|cpp|java)$")
         return re.compile(regex)
 
+    def basic_auth(self, authenticators: dict[str, Authenticator]) -> Optional[Authenticator]:
+        value: Optional[str] = self.s.get("auth")
+        if value is None:
+            return None
+        auth = authenticators.get(value)
+        if auth is None:
+            self.invalid_value("auth", value, "No such auth section exists")
+        return auth
+
 
 @dataclass
 class KitIpdFile:
@@ -60,12 +71,19 @@ class KitIpdCrawler(HttpCrawler):
         name: str,
         section: KitIpdCrawlerSection,
         config: Config,
+        authenticators: dict[str, Authenticator],
     ):
         super().__init__(name, section, config)
         self._url = section.target()
         self._file_regex = section.link_regex()
+        self._authenticator = section.basic_auth(authenticators)
+        self._basic_auth: Optional[aiohttp.BasicAuth] = None
 
     async def _run(self) -> None:
+        if self._authenticator:
+            username, password = await self._authenticator.credentials()
+            self._basic_auth = aiohttp.BasicAuth(username, password)
+
         maybe_cl = await self.crawl(PurePath("."))
         if not maybe_cl:
             return
@@ -160,9 +178,14 @@ class KitIpdCrawler(HttpCrawler):
         return urljoin(url, cast(str, link_tag.get("href")))
 
     async def _stream_from_url(self, url: str, path: PurePath, sink: FileSink, bar: ProgressBar) -> None:
-        async with self.session.get(url, allow_redirects=False) as resp:
+        async with self.session.get(url, allow_redirects=False, auth=self._basic_auth) as resp:
             if resp.status == 403:
                 raise CrawlError("Received a 403. Are you within the KIT network/VPN?")
+            if resp.status == 401:
+                raise CrawlError("Received a 401. Do you maybe need credentials?")
+            if resp.status >= 400:
+                raise CrawlError(f"Received HTTP {resp.status} when trying to download {url!r}")
+
             if resp.content_length:
                 bar.set_total(resp.content_length)
 
@@ -175,7 +198,7 @@ class KitIpdCrawler(HttpCrawler):
             self._add_etag_to_report(path, resp.headers.get("ETag"))
 
     async def get_page(self) -> tuple[BeautifulSoup, str]:
-        async with self.session.get(self._url) as request:
+        async with self.session.get(self._url, auth=self._basic_auth) as request:
             # The web page for Algorithmen für Routenplanung contains some
             # weird comments that beautifulsoup doesn't parse correctly. This
             # hack enables those pages to be crawled, and should hopefully not

From 3f5637366e3c33af864663e559f4051ccfb5eb16 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 30 Oct 2025 20:19:30 +0100
Subject: [PATCH 223/224] Sanitize `/` in kit-ipd heading hierarchy

---
 CHANGELOG.md                           |  1 +
 PFERD/crawl/http_crawler.py            |  4 +--
 PFERD/crawl/ilias/ilias_web_crawler.py | 13 ++++-----
 PFERD/crawl/ilias/kit_ilias_html.py    | 38 ++++++++++++--------------
 PFERD/crawl/kit_ipd_crawler.py         |  6 ++--
 PFERD/utils.py                         |  4 +++
 6 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 729299e..e80f345 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -28,6 +28,7 @@ ambiguous situations.
 
 ## Fixed
 - Event loop errors on Windows with Python 3.14
+- Sanitize `/` in headings in kit-ipd crawler
 
 ## 3.8.3 - 2025-07-01
 
diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index 70ec5c1..49d6013 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -13,7 +13,7 @@ from bs4 import Tag
 from ..auth import Authenticator
 from ..config import Config
 from ..logging import log
-from ..utils import fmt_real_path
+from ..utils import fmt_real_path, sanitize_path_name
 from ..version import NAME, VERSION
 from .crawler import Crawler, CrawlerSection
 
@@ -192,7 +192,7 @@ class HttpCrawler(Crawler):
             if level_heading is None:
                 return find_associated_headings(tag, level - 1)
 
-            folder_name = level_heading.get_text().strip()
+            folder_name = sanitize_path_name(level_heading.get_text().strip())
             return find_associated_headings(level_heading, level - 1) / folder_name
 
         # start at level <h3> because paragraph-level headings are usually too granular for folder names
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 12d8700..fda9f6d 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -15,7 +15,7 @@ from ...auth import Authenticator
 from ...config import Config
 from ...logging import ProgressBar, log
 from ...output_dir import FileSink, Redownload
-from ...utils import fmt_path, soupify, url_set_query_param
+from ...utils import fmt_path, sanitize_path_name, soupify, url_set_query_param
 from ..crawler import CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
 from ..http_crawler import HttpCrawler, HttpCrawlerSection
 from .async_helper import _iorepeat
@@ -28,7 +28,6 @@ from .kit_ilias_html import (
     IliasPage,
     IliasPageElement,
     IliasSoup,
-    _sanitize_path_name,
     parse_ilias_forum_export,
 )
 from .shibboleth_login import ShibbolethLogin
@@ -505,7 +504,7 @@ instance's greatest bottleneck.
 
         async def download_all() -> None:
             for link in links:
-                path = cl.path / (_sanitize_path_name(link.name) + extension)
+                path = cl.path / (sanitize_path_name(link.name) + extension)
                 if dl := await self.download(path, mtime=element.mtime):
                     await self._download_link(self._links, element.name, [link], dl)
 
@@ -843,7 +842,7 @@ instance's greatest bottleneck.
     async def _download_forum_thread(
         self, parent_path: PurePath, thread: IliasForumThread | IliasPageElement, forum_url: str
     ) -> None:
-        path = parent_path / (_sanitize_path_name(thread.name) + ".html")
+        path = parent_path / (sanitize_path_name(thread.name) + ".html")
         maybe_dl = await self.download(path, mtime=thread.mtime)
         if not maybe_dl or not isinstance(thread, IliasForumThread):
             return
@@ -936,7 +935,7 @@ instance's greatest bottleneck.
         prev: Optional[str],
         next: Optional[str],
     ) -> None:
-        path = parent_path / (_sanitize_path_name(element.title) + ".html")
+        path = parent_path / (sanitize_path_name(element.title) + ".html")
         maybe_dl = await self.download(path)
         if not maybe_dl:
             return
@@ -945,10 +944,10 @@ instance's greatest bottleneck.
             return
 
         if prev:
-            prev_p = self._transformer.transform(parent_path / (_sanitize_path_name(prev) + ".html"))
+            prev_p = self._transformer.transform(parent_path / (sanitize_path_name(prev) + ".html"))
             prev = os.path.relpath(prev_p, my_path.parent) if prev_p else None
         if next:
-            next_p = self._transformer.transform(parent_path / (_sanitize_path_name(next) + ".html"))
+            next_p = self._transformer.transform(parent_path / (sanitize_path_name(next) + ".html"))
             next = os.path.relpath(next_p, my_path.parent) if next_p else None
 
         async with maybe_dl as (bar, sink):
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index db965b0..e23469c 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -12,7 +12,7 @@ from bs4 import BeautifulSoup, Tag
 from PFERD.crawl import CrawlError
 from PFERD.crawl.crawler import CrawlWarning
 from PFERD.logging import log
-from PFERD.utils import url_set_query_params
+from PFERD.utils import sanitize_path_name, url_set_query_params
 
 TargetType = str | int
 
@@ -297,7 +297,7 @@ class IliasPageElement:
             name = normalized
 
         if not skip_sanitize:
-            name = _sanitize_path_name(name)
+            name = sanitize_path_name(name)
 
         return IliasPageElement(typ, url, name, mtime, description)
 
@@ -695,7 +695,7 @@ class IliasPage:
                 log.explain(f"Skipping offline item: {title.get_text().strip()!r}")
                 continue
 
-            name = _sanitize_path_name(link.text.strip())
+            name = sanitize_path_name(link.text.strip())
             url = self._abs_url_from_link(link)
 
             if "cmd=manage" in url and "cmdClass=ilPDSelectedItemsBlockGUI" in url:
@@ -723,7 +723,7 @@ class IliasPage:
         for link in links:
             url = self._abs_url_from_link(link)
             name = re.sub(r"\([\d,.]+ [MK]B\)", "", link.get_text()).strip().replace("\t", "")
-            name = _sanitize_path_name(name)
+            name = sanitize_path_name(name)
 
             if "file_id" not in url:
                 _unexpected_html_warning()
@@ -745,7 +745,7 @@ class IliasPage:
                 continue
             items.append(
                 IliasPageElement.create_new(
-                    IliasElementType.FILE, self._abs_url_from_link(link), _sanitize_path_name(link.get_text())
+                    IliasElementType.FILE, self._abs_url_from_link(link), sanitize_path_name(link.get_text())
                 )
             )
 
@@ -837,7 +837,7 @@ class IliasPage:
         title = cast(Tag, row.select_one("td.std:nth-child(3)")).get_text().strip()
         title += ".mp4"
 
-        video_name: str = _sanitize_path_name(title)
+        video_name: str = sanitize_path_name(title)
 
         video_url = self._abs_url_from_link(link)
 
@@ -893,7 +893,7 @@ class IliasPage:
                 _unexpected_html_warning()
                 continue
 
-            name = _sanitize_path_name(name_tag.get_text().strip())
+            name = sanitize_path_name(name_tag.get_text().strip())
             log.explain(f"Found exercise detail entry {name!r}")
 
             results.append(
@@ -920,7 +920,7 @@ class IliasPage:
             parent_row: Tag = cast(Tag, link.find_parent("tr"))
             children = cast(list[Tag], parent_row.find_all("td"))
 
-            name = _sanitize_path_name(children[1].get_text().strip())
+            name = sanitize_path_name(children[1].get_text().strip())
             log.explain(f"Found exercise file entry {name!r}")
 
             date = None
@@ -957,7 +957,7 @@ class IliasPage:
             if "ass_id=" not in href or "cmdclass=ilassignmentpresentationgui" not in href.lower():
                 continue
 
-            name = _sanitize_path_name(exercise.get_text().strip())
+            name = sanitize_path_name(exercise.get_text().strip())
             results.append(
                 IliasPageElement.create_new(
                     IliasElementType.EXERCISE, self._abs_url_from_link(exercise), name
@@ -983,12 +983,12 @@ class IliasPage:
         for link in links:
             abs_url = self._abs_url_from_link(link)
             # Make sure parents are sanitized. We do not want accidental parents
-            parents = [_sanitize_path_name(x) for x in IliasPage._find_upwards_folder_hierarchy(link)]
+            parents = [sanitize_path_name(x) for x in IliasPage._find_upwards_folder_hierarchy(link)]
 
             if parents:
-                element_name = "/".join(parents) + "/" + _sanitize_path_name(link.get_text())
+                element_name = "/".join(parents) + "/" + sanitize_path_name(link.get_text())
             else:
-                element_name = _sanitize_path_name(link.get_text())
+                element_name = sanitize_path_name(link.get_text())
 
             element_type = IliasPage._find_type_for_element(
                 element_name, abs_url, lambda: IliasPage._find_icon_for_folder_entry(link)
@@ -1053,7 +1053,7 @@ class IliasPage:
                         IliasPageElement.create_new(
                             typ=IliasElementType.MEDIACAST_VIDEO,
                             url=self._abs_url_from_relative(cast(str, url)),
-                            name=_sanitize_path_name(title),
+                            name=sanitize_path_name(title),
                         )
                     )
 
@@ -1081,7 +1081,7 @@ class IliasPage:
 
             videos.append(
                 IliasPageElement.create_new(
-                    typ=IliasElementType.MOB_VIDEO, url=url, name=_sanitize_path_name(title), mtime=None
+                    typ=IliasElementType.MOB_VIDEO, url=url, name=sanitize_path_name(title), mtime=None
                 )
             )
 
@@ -1192,7 +1192,7 @@ class IliasPage:
             )
             found_titles.append(head_tag.get_text().strip())
 
-        return [_sanitize_path_name(x) for x in reversed(found_titles)]
+        return [sanitize_path_name(x) for x in reversed(found_titles)]
 
     @staticmethod
     def _find_link_description(link: Tag) -> Optional[str]:
@@ -1247,7 +1247,7 @@ class IliasPage:
 
         for title in card_titles:
             url = self._abs_url_from_link(title)
-            name = _sanitize_path_name(title.get_text().strip())
+            name = sanitize_path_name(title.get_text().strip())
             typ = IliasPage._find_type_for_element(name, url, lambda: IliasPage._find_icon_from_card(title))
 
             if not typ:
@@ -1274,7 +1274,7 @@ class IliasPage:
                 log.warn_contd(f"Could not find click handler target for signal {signal} for {button}")
                 continue
             url = self._abs_url_from_relative(open_match.group(1))
-            name = _sanitize_path_name(button.get_text().strip())
+            name = sanitize_path_name(button.get_text().strip())
             typ = IliasPage._find_type_for_element(name, url, lambda: IliasPage._find_icon_from_card(button))
             caption_parent = cast(
                 Tag,
@@ -1532,10 +1532,6 @@ def _tomorrow() -> date:
     return date.today() + timedelta(days=1)
 
 
-def _sanitize_path_name(name: str) -> str:
-    return name.replace("/", "-").replace("\\", "-").strip()
-
-
 def parse_ilias_forum_export(forum_export: BeautifulSoup) -> list[IliasForumThread]:
     elements = []
     for p in forum_export.select("body > p"):
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index 4dad8f0..7094b9c 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -15,7 +15,7 @@ from ..auth import Authenticator
 from ..config import Config
 from ..logging import ProgressBar, log
 from ..output_dir import FileSink
-from ..utils import soupify
+from ..utils import sanitize_path_name, soupify
 from .crawler import CrawlError
 from .http_crawler import HttpCrawler, HttpCrawlerSection
 
@@ -106,7 +106,7 @@ class KitIpdCrawler(HttpCrawler):
         await self.gather(tasks)
 
     async def _crawl_folder(self, parent: PurePath, folder: KitIpdFolder) -> None:
-        path = parent / folder.name
+        path = parent / sanitize_path_name(folder.name)
         if not await self.crawl(path):
             return
 
@@ -125,7 +125,7 @@ class KitIpdCrawler(HttpCrawler):
     async def _download_file(
         self, parent: PurePath, file: KitIpdFile, etag: Optional[str], mtime: Optional[datetime]
     ) -> None:
-        element_path = parent / file.name
+        element_path = parent / sanitize_path_name(file.name)
 
         prev_etag = self._get_previous_etag_from_report(element_path)
         etag_differs = None if prev_etag is None else prev_etag != etag
diff --git a/PFERD/utils.py b/PFERD/utils.py
index 918a9b6..1aa0585 100644
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -106,6 +106,10 @@ def fmt_real_path(path: Path) -> str:
     return repr(str(path.absolute()))
 
 
+def sanitize_path_name(name: str) -> str:
+    return name.replace("/", "-").replace("\\", "-").strip()
+
+
 class ReusableAsyncContextManager(ABC, Generic[T]):
     def __init__(self) -> None:
         self._active = False

From e246053de22c54b42df0885082b687b362ce7678 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 2 Dec 2025 16:33:04 +0100
Subject: [PATCH 224/224] Crawl the info tab of courses again

This got lost in a refactor
---
 CHANGELOG.md                           | 1 +
 PFERD/crawl/ilias/ilias_web_crawler.py | 2 ++
 PFERD/crawl/ilias/kit_ilias_html.py    | 6 ++++--
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e80f345..2a2848c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ ambiguous situations.
 ## Fixed
 - Event loop errors on Windows with Python 3.14
 - Sanitize `/` in headings in kit-ipd crawler
+- Crawl info tab again
 
 ## 3.8.3 - 2025-07-01
 
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index fda9f6d..b5041b3 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -297,6 +297,8 @@ instance's greatest bottleneck.
 
                 page = cast(IliasPage, page)
                 elements.extend(page.get_child_elements())
+                if current_element is None and (info_tab := page.get_info_tab()):
+                    elements.append(info_tab)
                 if description_string := page.get_description():
                     description.append(description_string)
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index e23469c..5966141 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -739,9 +739,10 @@ class IliasPage:
         links: list[Tag] = self._soup.select("a.il_ContainerItemCommand")
 
         for link in links:
-            if "cmdClass=ilobjcoursegui" not in link["href"]:
+            log.explain(f"Found info tab link: {self._abs_url_from_link(link)}")
+            if "cmdclass=ilobjcoursegui" not in cast(str, link["href"]).lower():
                 continue
-            if "cmd=sendfile" not in link["href"]:
+            if "cmd=sendfile" not in cast(str, link["href"]).lower():
                 continue
             items.append(
                 IliasPageElement.create_new(
@@ -749,6 +750,7 @@ class IliasPage:
                 )
             )
 
+        log.explain(f"Found {len(items)} info tab entries {items}")
         return items
 
     def _find_opencast_video_entries(self) -> list[IliasPageElement]: