From b8fe25c580a8cafc14c32890f0635c7daecafc4d Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 4 May 2022 14:13:39 +0200
Subject: [PATCH 001/147] Add `.cpp` to ipd link regex

---
 CHANGELOG.md                   | 3 +++
 PFERD/crawl/kit_ipd_crawler.py | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 310059a..22fdd29 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Changed
+- Add `.cpp` to IPD link regex
+
 ## 3.4.0 - 2022-05-01
 
 ### Added
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index 1a5314b..e5ec58f 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -27,7 +27,7 @@ class KitIpdCrawlerSection(HttpCrawlerSection):
         return target
 
     def link_regex(self) -> Pattern[str]:
-        regex = self.s.get("link_regex", r"^.*/[^/]*\.(?:pdf|zip|c|java)$")
+        regex = self.s.get("link_regex", r"^.*/[^/]*\.(?:pdf|zip|c|cpp|java)$")
         return re.compile(regex)
 
 

From afbd03f7774a1c0f22c471d98f995153bb08edcd Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 5 May 2022 14:15:48 +0200
Subject: [PATCH 002/147] Fix docs

---
 CHANGELOG.md | 2 +-
 CONFIG.md    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 22fdd29..f5af29d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,7 +23,7 @@ ambiguous situations.
 ## Unreleased
 
 ### Changed
-- Add `.cpp` to IPD link regex
+- Add `cpp` extension to default `link_regex` of IPD crawler
 
 ## 3.4.0 - 2022-05-01
 
diff --git a/CONFIG.md b/CONFIG.md
index 569780d..1355c34 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -146,7 +146,7 @@ requests is likely a good idea.
 - `target`: URL to a KIT-IPD page
 - `link_regex`: A regex that is matched against the `href` part of links. If it
   matches, the given link is downloaded as a file. This is used to extract
-  files from KIT-IPD pages. (Default: `^.*/[^/]*\.(?:pdf|zip|c|java)$`)
+  files from KIT-IPD pages. (Default: `^.*/[^/]*\.(?:pdf|zip|c|cpp|java)$`)
 
 ### The `kit-ilias-web` crawler
 

From bc3fa36637b5a4f4ea26db1a9437e4cbd5cad5c4 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 5 May 2022 14:20:45 +0200
Subject: [PATCH 003/147] Fix IPD crawler crashing on weird HTML comments

---
 CHANGELOG.md                   | 3 +++
 PFERD/crawl/kit_ipd_crawler.py | 8 +++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f5af29d..de7b795 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,9 @@ ambiguous situations.
 ### Changed
 - Add `cpp` extension to default `link_regex` of IPD crawler
 
+### Fixed
+- IPD crawler crashes on some sites
+
 ## 3.4.0 - 2022-05-01
 
 ### Added
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index e5ec58f..58e71f8 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -161,4 +161,10 @@ class KitIpdCrawler(HttpCrawler):
 
     async def get_page(self) -> BeautifulSoup:
         async with self.session.get(self._url) as request:
-            return soupify(await request.read())
+            # The web page for Algorithmen für Routenplanung contains some
+            # weird comments that beautifulsoup doesn't parse correctly. This
+            # hack enables those pages to be crawled, and should hopefully not
+            # cause issues on other pages.
+            content = (await request.read()).decode("utf-8")
+            content = re.sub(r"<!--.*?-->", "", content)
+            return soupify(content.encode("utf-8"))

From af2cc1169ace7154349518f7f709023eeb76ba95 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Thu, 5 May 2022 14:23:19 +0200
Subject: [PATCH 004/147] Mention href for users of link_regex option

---
 CHANGELOG.md                   | 1 +
 PFERD/crawl/kit_ipd_crawler.py | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index de7b795..959fda0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ ambiguous situations.
 
 ### Changed
 - Add `cpp` extension to default `link_regex` of IPD crawler
+- Mention hrefs in IPD crawler for users of `link_regex` option
 
 ### Fixed
 - IPD crawler crashes on some sites
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index 58e71f8..78fe0b1 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -45,7 +45,7 @@ class KitIpdFolder:
     def explain(self) -> None:
         log.explain_topic(f"Folder {self.name!r}")
         for file in self.files:
-            log.explain(f"File {file.name!r}")
+            log.explain(f"File {file.name!r} (href={file.url!r})")
 
     def __hash__(self) -> int:
         return self.name.__hash__()
@@ -113,7 +113,7 @@ class KitIpdCrawler(HttpCrawler):
             else:
                 file = self._extract_file(element)
                 items.add(file)
-                log.explain_topic(f"Orphan file {file.name!r}")
+                log.explain_topic(f"Orphan file {file.name!r} (href={file.url!r})")
                 log.explain("Attributing it to root folder")
 
         return items

From 694ffb4d7711265d768a636cf1843e302485c62d Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 5 May 2022 22:28:30 +0200
Subject: [PATCH 005/147] Fix meeting date parsing

Apparently the new pattern "<relative time qualifier>: <date>," was
added. This patch adds support for it.
---
 PFERD/crawl/ilias/kit_ilias_html.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 94b2e4b..dfe111d 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -763,9 +763,14 @@ def demangle_date(date_str: str, fail_silently: bool = False) -> Optional[dateti
     """
     try:
         date_str = re.sub(r"\s+", " ", date_str)
+        date_str = re.sub("(Gestern|Yesterday):", "", date_str, re.I)
+        date_str = re.sub("(Heute|Today):", "", date_str, re.I)
+        date_str = re.sub("(Morgen|Tomorrow):",  "", date_str, re.I)
+
         date_str = re.sub("Gestern|Yesterday", _format_date_english(_yesterday()), date_str, re.I)
         date_str = re.sub("Heute|Today", _format_date_english(date.today()), date_str, re.I)
         date_str = re.sub("Morgen|Tomorrow",  _format_date_english(_tomorrow()), date_str, re.I)
+        date_str = date_str.strip()
         for german, english in zip(german_months, english_months):
             date_str = date_str.replace(german, english)
             # Remove trailing dots for abbreviations, e.g. "20. Apr. 2020" -> "20. Apr 2020"

From bcc537468c46088f78a037fb28364866e8653bb5 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 5 May 2022 22:53:37 +0200
Subject: [PATCH 006/147] Fix crawling of expanded meetings

The last meeting on every page is expanded by default.
Its content is then shown inline *and* in the meeting page itself.
We should skip the inline content.
---
 PFERD/crawl/ilias/kit_ilias_html.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index dfe111d..d93684c 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -428,6 +428,12 @@ class IliasPage:
             element_type = self._find_type_from_link(element_name, link, abs_url)
             description = self._find_link_description(link)
 
+            # The last meeting on every page is expanded by default.
+            # Its content is then shown inline *and* in the meeting page itself.
+            # We should skip the inline content.
+            if element_type != IliasElementType.MEETING and self._is_in_expanded_meeting(link):
+                continue
+
             if not element_type:
                 continue
             if element_type == IliasElementType.MEETING:
@@ -445,6 +451,26 @@ class IliasPage:
 
         return result
 
+    def _is_in_expanded_meeting(self, tag: Tag) -> bool:
+        """
+        Returns whether a file is part of an expanded meeting.
+        Has false positives for meetings themselves as their title is also "in the expanded meeting content".
+        It is in the same general div and this whole thing is guesswork.
+        Therefore, you should check for meetings before passing them in this function.
+        """
+        parents: List[Tag] = list(tag.parents)
+        for parent in parents:
+            if not parent.get("class"):
+                continue
+
+            # We should not crawl files under meetings
+            if "ilContainerListItemContentCB" in parent.get("class"):
+                link: Tag = parent.parent.find("a")
+                type = IliasPage._find_type_from_folder_like(link, self._page_url)
+                return type == IliasElementType.MEETING
+
+        return False
+
     def _find_upwards_folder_hierarchy(self, tag: Tag) -> List[str]:
         """
         Interprets accordions and expandable blocks as virtual folders and returns them

From 2f0e04ce13ebbc7c7ccaa93e03d8f707f246ceef Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 5 May 2022 22:57:55 +0200
Subject: [PATCH 007/147] Adjust changelog

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 959fda0..4249287 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -28,6 +28,8 @@ ambiguous situations.
 
 ### Fixed
 - IPD crawler crashes on some sites
+- Meeting name normalization for yesterday, today and tomorrow fails
+- Crawling of meeting file previews
 
 ## 3.4.0 - 2022-05-01
 

From 616b0480f7c92afe11c36d2c105c99ba5f960e96 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 8 May 2022 17:39:18 +0200
Subject: [PATCH 008/147] Simplify IPD crawler link regex

---
 CHANGELOG.md                   | 5 +++--
 CONFIG.md                      | 2 +-
 PFERD/crawl/kit_ipd_crawler.py | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4249287..e2d3840 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,11 +24,12 @@ ambiguous situations.
 
 ### Changed
 - Add `cpp` extension to default `link_regex` of IPD crawler
-- Mention hrefs in IPD crawler for users of `link_regex` option
+- Mention hrefs in IPD crawler's `--explain` output for users of `link_regex` option
+- Simplify default IPD crawler `link_regex`
 
 ### Fixed
 - IPD crawler crashes on some sites
-- Meeting name normalization for yesterday, today and tomorrow fails
+- Meeting name normalization for yesterday, today and tomorrow
 - Crawling of meeting file previews
 
 ## 3.4.0 - 2022-05-01
diff --git a/CONFIG.md b/CONFIG.md
index 1355c34..f572a80 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -146,7 +146,7 @@ requests is likely a good idea.
 - `target`: URL to a KIT-IPD page
 - `link_regex`: A regex that is matched against the `href` part of links. If it
   matches, the given link is downloaded as a file. This is used to extract
-  files from KIT-IPD pages. (Default: `^.*/[^/]*\.(?:pdf|zip|c|cpp|java)$`)
+  files from KIT-IPD pages. (Default: `^.*?[^/]+\.(pdf|zip|c|cpp|java)$`)
 
 ### The `kit-ilias-web` crawler
 
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index 78fe0b1..d9fac32 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -27,7 +27,7 @@ class KitIpdCrawlerSection(HttpCrawlerSection):
         return target
 
     def link_regex(self) -> Pattern[str]:
-        regex = self.s.get("link_regex", r"^.*/[^/]*\.(?:pdf|zip|c|cpp|java)$")
+        regex = self.s.get("link_regex", r"^.*?[^/]+\.(pdf|zip|c|cpp|java)$")
         return re.compile(regex)
 
 

From a5015fe9b16d484613a27687f2c122b15e109ba2 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 8 May 2022 23:21:18 +0200
Subject: [PATCH 009/147] Correctly parse day-only meeting dates

I failed to recognize the correct format in the previous adjustment, so
this (hopefully) fixes it for good.
Meetings apparently don't always have a time portion.
---
 PFERD/crawl/ilias/kit_ilias_html.py | 48 +++++++++++++++++++----------
 1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index d93684c..6d063b6 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -746,17 +746,26 @@ class IliasPage:
         Normalizes meeting names, which have a relative time as their first part,
         to their date in ISO format.
         """
-        date_portion_str = meeting_name.split(" - ")[0]
+
+        # This checks whether we can reach a `:` without passing a `-`
+        if re.search(r"^[^-]+: ", meeting_name):
+            # Meeting name only contains date: "05. Jan 2000:"
+            split_delimiter = ":"
+        else:
+            # Meeting name contains date and start/end times: "05. Jan 2000, 16:00 - 17:30:"
+            split_delimiter = ", "
+
+        # We have a meeting day without time
+        date_portion_str = meeting_name.split(split_delimiter)[0]
         date_portion = demangle_date(date_portion_str)
 
+        # We failed to parse the date, bail out
         if not date_portion:
             return meeting_name
 
-        rest_of_name = meeting_name
-        if rest_of_name.startswith(date_portion_str):
-            rest_of_name = rest_of_name[len(date_portion_str):]
-
-        return datetime.strftime(date_portion, "%Y-%m-%d, %H:%M") + rest_of_name
+        # Replace the first section with the absolute date
+        rest_of_name = split_delimiter.join(meeting_name.split(split_delimiter)[1:])
+        return datetime.strftime(date_portion, "%Y-%m-%d") + split_delimiter + rest_of_name
 
     def _abs_url_from_link(self, link_tag: Tag) -> str:
         """
@@ -781,17 +790,15 @@ english_months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep',
 
 def demangle_date(date_str: str, fail_silently: bool = False) -> Optional[datetime]:
     """
-    Demangle a given date in one of the following formats:
+    Demangle a given date in one of the following formats (hour/minute part is optional):
     "Gestern, HH:MM"
     "Heute, HH:MM"
     "Morgen, HH:MM"
     "dd. mon yyyy, HH:MM
     """
     try:
+        # Normalize whitespace because users
         date_str = re.sub(r"\s+", " ", date_str)
-        date_str = re.sub("(Gestern|Yesterday):", "", date_str, re.I)
-        date_str = re.sub("(Heute|Today):", "", date_str, re.I)
-        date_str = re.sub("(Morgen|Tomorrow):",  "", date_str, re.I)
 
         date_str = re.sub("Gestern|Yesterday", _format_date_english(_yesterday()), date_str, re.I)
         date_str = re.sub("Heute|Today", _format_date_english(date.today()), date_str, re.I)
@@ -802,19 +809,28 @@ def demangle_date(date_str: str, fail_silently: bool = False) -> Optional[dateti
             # Remove trailing dots for abbreviations, e.g. "20. Apr. 2020" -> "20. Apr 2020"
             date_str = date_str.replace(english + ".", english)
 
-        # We now have a nice english String in the format: "dd. mmm yyyy, hh:mm"
-        day_part, time_part = date_str.split(",")
+        # We now have a nice english String in the format: "dd. mmm yyyy, hh:mm" or "dd. mmm yyyy"
+
+        # Check if we have a time as well
+        if ", " in date_str:
+            day_part, time_part = date_str.split(",")
+        else:
+            day_part = date_str.split(",")[0]
+            time_part = None
+
         day_str, month_str, year_str = day_part.split(" ")
 
         day = int(day_str.strip().replace(".", ""))
         month = english_months.index(month_str.strip()) + 1
         year = int(year_str.strip())
 
-        hour_str, minute_str = time_part.split(":")
-        hour = int(hour_str)
-        minute = int(minute_str)
+        if time_part:
+            hour_str, minute_str = time_part.split(":")
+            hour = int(hour_str)
+            minute = int(minute_str)
+            return datetime(year, month, day, hour, minute)
 
-        return datetime(year, month, day, hour, minute)
+        return datetime(year, month, day)
     except Exception:
         if not fail_silently:
             log.warn(f"Date parsing failed for {date_str!r}")

From 846c29aee1867f7f0b7efae802af47fee77a3ec6 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 11 May 2022 21:16:09 +0200
Subject: [PATCH 010/147] Download page descriptions

---
 CHANGELOG.md                               |  3 +
 PFERD/crawl/ilias/ilias_html_cleaner.py    | 91 ++++++++++++++++++++++
 PFERD/crawl/ilias/kit_ilias_html.py        | 25 ++++++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 29 +++++++
 4 files changed, 148 insertions(+)
 create mode 100644 PFERD/crawl/ilias/ilias_html_cleaner.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e2d3840..b7cad13 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Added
+- Download of page descriptions
+
 ### Changed
 - Add `cpp` extension to default `link_regex` of IPD crawler
 - Mention hrefs in IPD crawler's `--explain` output for users of `link_regex` option
diff --git a/PFERD/crawl/ilias/ilias_html_cleaner.py b/PFERD/crawl/ilias/ilias_html_cleaner.py
new file mode 100644
index 0000000..5952309
--- /dev/null
+++ b/PFERD/crawl/ilias/ilias_html_cleaner.py
@@ -0,0 +1,91 @@
+from bs4 import BeautifulSoup, Comment, Tag
+
+_STYLE_TAG_CONTENT = """
+    .ilc_text_block_Information {
+      background-color: #f5f7fa;
+    }
+    div.ilc_text_block_Standard {
+      margin-bottom: 10px;
+      margin-top: 10px;
+    }
+    span.ilc_text_inline_Strong {
+      font-weight: bold;
+    }
+
+    .accordion-head {
+      background-color: #f5f7fa;
+      padding: 0.5rem 0;
+    }
+
+    h3 {
+      margin-top: 0.5rem;
+      margin-bottom: 1rem;
+    }
+
+    br.visible-break {
+      margin-bottom: 1rem;
+    }
+
+    article {
+      margin: 0.5rem 0;
+    }
+
+    body {
+      padding: 1em;
+      grid-template-columns: 1fr min(60rem, 90%) 1fr;
+      line-height: 1.2;
+    }
+"""
+
+_ARTICLE_WORTHY_CLASSES = [
+    "ilc_text_block_Information",
+    "ilc_section_Attention",
+    "ilc_section_Link",
+]
+
+
+def insert_base_markup(soup: BeautifulSoup) -> BeautifulSoup:
+    head = soup.new_tag("head")
+    soup.insert(0, head)
+
+    simplecss_link: Tag = soup.new_tag("link")
+    # <link rel="stylesheet" href="https://cdn.simplecss.org/simple.css">
+    simplecss_link["rel"] = "stylesheet"
+    simplecss_link["href"] = "https://cdn.simplecss.org/simple.css"
+    head.append(simplecss_link)
+
+    # Basic style tags for compat
+    style: Tag = soup.new_tag("style")
+    style.append(_STYLE_TAG_CONTENT)
+    head.append(style)
+
+    return soup
+
+
+def clean(soup: BeautifulSoup) -> BeautifulSoup:
+    for block in soup.find_all(class_=lambda x: x in _ARTICLE_WORTHY_CLASSES):
+        block.name = "article"
+
+    for block in soup.find_all("h3"):
+        block.name = "div"
+
+    for block in soup.find_all("h1"):
+        block.name = "h3"
+
+    for block in soup.find_all(class_="ilc_va_ihcap_VAccordIHeadCap"):
+        block.name = "h3"
+        block["class"] += ["accordion-head"]
+
+    for dummy in soup.select(".ilc_text_block_Standard.ilc_Paragraph"):
+        children = list(dummy.children)
+        if not children:
+            dummy.decompose()
+        if len(children) > 1:
+            continue
+        if type(children[0]) == Comment:
+            dummy.decompose()
+
+    for hrule_imposter in soup.find_all(class_="ilc_section_Separator"):
+        hrule_imposter.insert(0, soup.new_tag("hr"))
+
+    return soup
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 6d063b6..d58e5c8 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -85,6 +85,31 @@ class IliasPage:
         log.explain("Page is a normal folder, searching for elements")
         return self._find_normal_entries()
 
+    def get_description(self) -> Optional[BeautifulSoup]:
+        def is_interesting_class(name: str) -> bool:
+            return name in ["ilCOPageSection", "ilc_Paragraph", "ilc_va_ihcap_VAccordIHeadCap"]
+
+        paragraphs: List[Tag] = self._soup.findAll(class_=is_interesting_class)
+        if not paragraphs:
+            return None
+
+        # Extract bits and pieces into a string and parse it again.
+        # This ensures we don't miss anything and weird structures are resolved
+        # somewhat gracefully.
+        raw_html = ""
+        for p in paragraphs:
+            if p.find_parent(class_=is_interesting_class):
+                continue
+
+            # Ignore special listings (like folder groupings)
+            if "ilc_section_Special" in p["class"]:
+                continue
+
+            raw_html += str(p) + "\n"
+        raw_html = f"<body>\n{raw_html}\n</body>"
+
+        return BeautifulSoup(raw_html, "html.parser")
+
     def get_next_stage_element(self) -> Optional[IliasPageElement]:
         if self._is_ilias_opencast_embedding():
             return self.get_child_elements()[0]
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index ae9ebd4..bbed986 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -17,6 +17,7 @@ from ...utils import fmt_path, soupify, url_set_query_param
 from ..crawler import AWrapped, CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
 from ..http_crawler import HttpCrawler, HttpCrawlerSection
 from .file_templates import Links
+from .ilias_html_cleaner import clean, insert_base_markup
 from .kit_ilias_html import IliasElementType, IliasPage, IliasPageElement
 
 TargetType = Union[str, int]
@@ -215,6 +216,8 @@ instance's greatest bottleneck.
         cl = maybe_cl  # Not mypy's fault, but explained here: https://github.com/python/mypy/issues/2608
 
         elements: List[IliasPageElement] = []
+        # A list as variable redefinitions are not propagated to outer scopes
+        description: List[BeautifulSoup] = []
 
         @_iorepeat(3, "crawling url")
         async def gather_elements() -> None:
@@ -233,9 +236,15 @@ instance's greatest bottleneck.
                 page = IliasPage(soup, url, None)
                 elements.extend(page.get_child_elements())
 
+                if description_string := page.get_description():
+                    description.append(description_string)
+
         # Fill up our task list with the found elements
         await gather_elements()
 
+        if description:
+            await self._download_description(PurePath("."), description[0])
+
         elements.sort(key=lambda e: e.id())
 
         tasks: List[Awaitable[None]] = []
@@ -265,6 +274,8 @@ instance's greatest bottleneck.
         cl: CrawlToken,
     ) -> None:
         elements: List[IliasPageElement] = []
+        # A list as variable redefinitions are not propagated to outer scopes
+        description: List[BeautifulSoup] = []
 
         @_iorepeat(3, "crawling folder")
         async def gather_elements() -> None:
@@ -285,10 +296,15 @@ instance's greatest bottleneck.
                         next_stage_url = None
 
                 elements.extend(page.get_child_elements())
+                if description_string := page.get_description():
+                    description.append(description_string)
 
         # Fill up our task list with the found elements
         await gather_elements()
 
+        if description:
+            await self._download_description(PurePath("."), description[0])
+
         elements.sort(key=lambda e: e.id())
 
         tasks: List[Awaitable[None]] = []
@@ -425,6 +441,19 @@ instance's greatest bottleneck.
 
         return self._download_booking(element, link_template_maybe, maybe_dl)
 
+    @anoncritical
+    @_iorepeat(1, "downloading description")
+    async def _download_description(self, parent_path: PurePath, description: BeautifulSoup) -> None:
+        path = parent_path / "Description.html"
+        dl = await self.download(path, redownload=Redownload.ALWAYS)
+        if not dl:
+            return
+
+        async with dl as (bar, sink):
+            description = clean(insert_base_markup(description))
+            sink.file.write(description.prettify().encode("utf-8"))
+            sink.done()
+
     @anoncritical
     @_iorepeat(3, "resolving booking")
     async def _download_booking(

From 46fb782798725b6fde76b71cf7a4d90912ea2c7d Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 24 May 2022 23:28:09 +0200
Subject: [PATCH 011/147] Add forum crawling

This downloads all forum posts when needed and saves each thread in its
own html file, named after the thread title.
---
 CHANGELOG.md                               |   1 +
 PFERD/cli/command_kit_ilias_web.py         |   7 ++
 PFERD/crawl/ilias/kit_ilias_html.py        |  90 ++++++++++++++-
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 122 ++++++++++++++++++---
 PFERD/logging.py                           |   4 +-
 5 files changed, 208 insertions(+), 16 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b7cad13..1d70c4a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ ambiguous situations.
 
 ### Added
 - Download of page descriptions
+- Forum download support
 
 ### Changed
 - Add `cpp` extension to default `link_regex` of IPD crawler
diff --git a/PFERD/cli/command_kit_ilias_web.py b/PFERD/cli/command_kit_ilias_web.py
index 12803a6..de74fc3 100644
--- a/PFERD/cli/command_kit_ilias_web.py
+++ b/PFERD/cli/command_kit_ilias_web.py
@@ -62,6 +62,11 @@ GROUP.add_argument(
     action=BooleanOptionalAction,
     help="crawl and download videos"
 )
+GROUP.add_argument(
+    "--forums",
+    action=BooleanOptionalAction,
+    help="crawl and download forum posts"
+)
 GROUP.add_argument(
     "--http-timeout", "-t",
     type=float,
@@ -90,6 +95,8 @@ def load(
         section["link_redirect_delay"] = str(args.link_redirect_delay)
     if args.videos is not None:
         section["videos"] = "yes" if args.videos else "no"
+    if args.forums is not None:
+        section["forums"] = "yes" if args.forums else "no"
     if args.http_timeout is not None:
         section["http_timeout"] = str(args.http_timeout)
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index d58e5c8..7bab152 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -3,7 +3,7 @@ import re
 from dataclasses import dataclass
 from datetime import date, datetime, timedelta
 from enum import Enum
-from typing import List, Optional, Union
+from typing import Dict, List, Optional, Union
 from urllib.parse import urljoin, urlparse
 
 from bs4 import BeautifulSoup, Tag
@@ -55,6 +55,20 @@ class IliasPageElement:
         return self.url
 
 
+@dataclass
+class IliasDownloadForumData:
+    url: str
+    form_data: Dict[str, Union[str, List[str]]]
+
+
+@dataclass
+class IliasForumThread:
+    title: str
+    title_tag: Tag
+    content_tag: Tag
+    mtime: Optional[datetime]
+
+
 class IliasPage:
 
     def __init__(self, soup: BeautifulSoup, _page_url: str, source_element: Optional[IliasPageElement]):
@@ -110,13 +124,39 @@ class IliasPage:
 
         return BeautifulSoup(raw_html, "html.parser")
 
+    def get_download_forum_data(self) -> Optional[IliasDownloadForumData]:
+        form = self._soup.find("form", attrs={"action": lambda x: x and "fallbackCmd=showThreads" in x})
+        if not form:
+            return None
+        post_url = self._abs_url_from_relative(form["action"])
+
+        form_data: Dict[str, Union[str, List[ſtr]]] = {
+            "thread_ids[]": [f["value"] for f in form.find_all(attrs={"name": "thread_ids[]"})],
+            "selected_cmd2": "html",
+            "select_cmd2": "Ausführen",
+            "selected_cmd": "",
+        }
+
+        return IliasDownloadForumData(post_url, form_data)
+
     def get_next_stage_element(self) -> Optional[IliasPageElement]:
+        if self._is_forum_page():
+            if "trows=800" in self._page_url:
+                return None
+            return self._get_show_max_forum_entries_per_page_url()
         if self._is_ilias_opencast_embedding():
             return self.get_child_elements()[0]
         if self._page_type == IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED:
             return self._find_video_entries_paginated()[0]
         return None
 
+    def _is_forum_page(self) -> bool:
+        read_more_btn = self._soup.find(
+            "button",
+            attrs={"onclick": lambda x: x and "cmdClass=ilobjforumgui&cmd=markAllRead" in x}
+        )
+        return read_more_btn is not None
+
     def _is_video_player(self) -> bool:
         return "paella_config_file" in str(self._soup)
 
@@ -194,6 +234,19 @@ class IliasPage:
 
         return items
 
+    def _get_show_max_forum_entries_per_page_url(self) -> Optional[IliasPageElement]:
+        correct_link = self._soup.find(
+            "a",
+            attrs={"href": lambda x: x and "trows=800" in x and "cmd=showThreads" in x}
+        )
+
+        if not correct_link:
+            return None
+
+        link = self._abs_url_from_link(correct_link)
+
+        return IliasPageElement(IliasElementType.FORUM, link, "show all forum threads")
+
     def _find_personal_desktop_entries(self) -> List[IliasPageElement]:
         items: List[IliasPageElement] = []
 
@@ -877,3 +930,38 @@ def _tomorrow() -> date:
 
 def _sanitize_path_name(name: str) -> str:
     return name.replace("/", "-").replace("\\", "-").strip()
+
+
+def parse_ilias_forum_export(forum_export: BeautifulSoup) -> List[IliasForumThread]:
+    elements = []
+    for p in forum_export.select("body > p"):
+        title_tag = p
+        content_tag = p.find_next_sibling("ul")
+        title = p.find("b").text
+        if ":" in title:
+            title = title[title.find(":") + 1:]
+        title = title.strip()
+        mtime = _guess_timestamp_from_forum_post_content(content_tag)
+        elements.append(IliasForumThread(title, title_tag, content_tag, mtime))
+
+    return elements
+
+
+def _guess_timestamp_from_forum_post_content(content: Tag) -> Optional[datetime]:
+    posts: Optional[Tag] = content.select(".ilFrmPostHeader > span.small")
+    if not posts:
+        return None
+
+    newest_date: Optional[datetime] = None
+
+    for post in posts:
+        text = post.text.strip()
+        text = text[text.rfind("|") + 1:]
+        date = demangle_date(text, fail_silently=True)
+        if not date:
+            continue
+
+        if not newest_date or newest_date < date:
+            newest_date = date
+
+    return newest_date
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index bbed986..156cd4c 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -18,7 +18,8 @@ from ..crawler import AWrapped, CrawlError, CrawlToken, CrawlWarning, DownloadTo
 from ..http_crawler import HttpCrawler, HttpCrawlerSection
 from .file_templates import Links
 from .ilias_html_cleaner import clean, insert_base_markup
-from .kit_ilias_html import IliasElementType, IliasPage, IliasPageElement
+from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasPage, IliasPageElement,
+                             _sanitize_path_name, parse_ilias_forum_export)
 
 TargetType = Union[str, int]
 
@@ -67,6 +68,9 @@ class KitIliasWebCrawlerSection(HttpCrawlerSection):
     def videos(self) -> bool:
         return self.s.getboolean("videos", fallback=False)
 
+    def forums(self) -> bool:
+        return self.s.getboolean("forums", fallback=False)
+
 
 _DIRECTORY_PAGES: Set[IliasElementType] = set([
     IliasElementType.EXERCISE,
@@ -183,6 +187,7 @@ instance's greatest bottleneck.
         self._link_file_redirect_delay = section.link_redirect_delay()
         self._links = section.links()
         self._videos = section.videos()
+        self._forums = section.forums()
         self._visited_urls: Set[str] = set()
 
     async def _run(self) -> None:
@@ -335,22 +340,27 @@ instance's greatest bottleneck.
         element_path = PurePath(parent_path, element.name)
 
         if element.type in _VIDEO_ELEMENTS:
-            log.explain_topic(f"Decision: Crawl video element {fmt_path(element_path)}")
             if not self._videos:
-                log.explain("Video crawling is disabled")
-                log.explain("Answer: no")
+                log.status(
+                    "[bold bright_black]",
+                    "Ignored",
+                    fmt_path(element_path),
+                    "[bright_black](enable with option 'videos')"
+                )
                 return None
-            else:
-                log.explain("Video crawling is enabled")
-                log.explain("Answer: yes")
 
         if element.type == IliasElementType.FILE:
             return await self._handle_file(element, element_path)
         elif element.type == IliasElementType.FORUM:
-            log.explain_topic(f"Decision: Crawl {fmt_path(element_path)}")
-            log.explain("Forums are not supported")
-            log.explain("Answer: No")
-            return None
+            if not self._forums:
+                log.status(
+                    "[bold bright_black]",
+                    "Ignored",
+                    fmt_path(element_path),
+                    "[bright_black](enable with option 'forums')"
+                )
+                return None
+            return await self._handle_forum(element, element_path)
         elif element.type == IliasElementType.TEST:
             log.explain_topic(f"Decision: Crawl {fmt_path(element_path)}")
             log.explain("Tests contain no relevant files")
@@ -635,6 +645,68 @@ instance's greatest bottleneck.
         if not await try_stream():
             raise CrawlError("File streaming failed after authenticate()")
 
+    async def _handle_forum(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Coroutine[Any, Any, None]]:
+        maybe_cl = await self.crawl(element_path)
+        if not maybe_cl:
+            return None
+        return self._crawl_forum(element, maybe_cl)
+
+    @_iorepeat(3, "crawling forum")
+    @anoncritical
+    async def _crawl_forum(self, element: IliasPageElement, cl: CrawlToken) -> None:
+        elements = []
+
+        async with cl:
+            next_stage_url = element.url
+            while next_stage_url:
+                log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
+                log.explain(f"URL: {next_stage_url}")
+
+                soup = await self._get_page(next_stage_url)
+                page = IliasPage(soup, next_stage_url, None)
+
+                if next := page.get_next_stage_element():
+                    next_stage_url = next.url
+                else:
+                    break
+
+            download_data = page.get_download_forum_data()
+            if not download_data:
+                raise CrawlWarning("Failed to extract forum data")
+            html = await self._post_authenticated(download_data.url, download_data.form_data)
+            elements = parse_ilias_forum_export(soupify(html))
+
+        elements.sort(key=lambda elem: elem.title)
+
+        tasks: List[Awaitable[None]] = []
+        for elem in elements:
+            tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, elem)))
+
+        # And execute them
+        await self.gather(tasks)
+
+    @anoncritical
+    @_iorepeat(3, "saving forum thread")
+    async def _download_forum_thread(
+        self,
+        parent_path: PurePath,
+        element: IliasForumThread,
+    ) -> None:
+        path = parent_path / (_sanitize_path_name(element.title) + ".html")
+        maybe_dl = await self.download(path, mtime=element.mtime)
+        if not maybe_dl:
+            return
+
+        async with maybe_dl as (bar, sink):
+            content = element.title_tag.prettify()
+            content += element.content_tag.prettify()
+            sink.file.write(content.encode("utf-8"))
+            sink.done()
+
     async def _get_page(self, url: str) -> BeautifulSoup:
         auth_id = await self._current_auth_id()
         async with self.session.get(url) as request:
@@ -652,13 +724,37 @@ instance's greatest bottleneck.
                 return soup
         raise CrawlError("get_page failed even after authenticating")
 
+    async def _post_authenticated(
+        self,
+        url: str,
+        data: dict[str, Union[str, List[str]]]
+    ) -> BeautifulSoup:
+        auth_id = await self._current_auth_id()
+
+        form_data = aiohttp.FormData()
+        for key, val in data.items():
+            form_data.add_field(key, val)
+
+        async with self.session.post(url, data=form_data(), allow_redirects=False) as request:
+            if request.status == 200:
+                return await request.read()
+
+        # We weren't authenticated, so try to do that
+        await self.authenticate(auth_id)
+
+        # Retry once after authenticating. If this fails, we will die.
+        async with self.session.post(url, data=data, allow_redirects=False) as request:
+            if request.status == 200:
+                return await request.read()
+        raise CrawlError("post_authenticated failed even after authenticating")
+
     # We repeat this as the login method in shibboleth doesn't handle I/O errors.
     # Shibboleth is quite reliable as well, the repeat is likely not critical here.
-    @_iorepeat(3, "Login", failure_is_error=True)
+    @ _iorepeat(3, "Login", failure_is_error=True)
     async def _authenticate(self) -> None:
         await self._shibboleth_login.login(self.session)
 
-    @staticmethod
+    @ staticmethod
     def _is_logged_in(soup: BeautifulSoup) -> bool:
         # Normal ILIAS pages
         mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")
diff --git a/PFERD/logging.py b/PFERD/logging.py
index e833716..340b21f 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -197,7 +197,7 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
         if self.output_explain:
             self.print(f"  {escape(text)}")
 
-    def status(self, style: str, action: str, text: str) -> None:
+    def status(self, style: str, action: str, text: str, suffix: str = "") -> None:
         """
         Print a status update while crawling. Allows markup in the "style"
         argument which will be applied to the "action" string.
@@ -205,7 +205,7 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
 
         if self.output_status:
             action = escape(f"{action:<{self.STATUS_WIDTH}}")
-            self.print(f"{style}{action}[/] {escape(text)}")
+            self.print(f"{style}{action}[/] {escape(text)} {suffix}")
 
     def report(self, text: str) -> None:
         """

From ed24366aba7cfb8ca3cdd0df7b2650bc1220437f Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 15 Jan 2022 16:23:37 +0100
Subject: [PATCH 012/147] Add pass authenticator

---
 CHANGELOG.md           |  1 +
 CONFIG.md              | 21 ++++++++-
 PFERD/auth/__init__.py |  3 ++
 PFERD/auth/pass_.py    | 98 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 121 insertions(+), 2 deletions(-)
 create mode 100644 PFERD/auth/pass_.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1d70c4a..bc9f3e5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ ambiguous situations.
 ### Added
 - Download of page descriptions
 - Forum download support
+- `pass` authenticator
 
 ### Changed
 - Add `cpp` extension to default `link_regex` of IPD crawler
diff --git a/CONFIG.md b/CONFIG.md
index f572a80..0f114ed 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -223,6 +223,23 @@ is stored in the keyring.
 - `keyring_name`: The service name PFERD uses for storing credentials. (Default:
   `PFERD`)
 
+### The `pass` authenticator
+
+This authenticator queries the [`pass` password manager][3] for a username and
+password. It tries to be mostly compatible with [browserpass][4] and
+[passff][5], so see those links for an overview of the format. If PFERD fails
+to load your password, you can use the `--explain` flag to see why.
+
+- `passname`: The name of the password to use (Required)
+- `username_prefixes`: A comma-separated list of username line prefixes
+  (Default: `login,username,user`)
+- `password_prefixes`: A comma-separated list of password line prefixes
+  (Default: `password,pass,secret`)
+
+[3]: <https://www.passwordstore.org/> "Pass: The Standard Unix Password Manager"
+[4]: <https://github.com/browserpass/browserpass-extension#organizing-password-store> "Organizing password store"
+[5]: <https://github.com/passff/passff#multi-line-format> "Multi-line format"
+
 ### The `tfa` authenticator
 
 This authenticator prompts the user on the console for a two-factor
@@ -316,7 +333,7 @@ is a regular expression and `TARGET` an f-string based template. If a path
 matches `SOURCE`, the output path is created using `TARGET` as template.
 `SOURCE` is automatically anchored.
 
-`TARGET` uses Python's [format string syntax][3]. The *n*-th capturing group can
+`TARGET` uses Python's [format string syntax][6]. The *n*-th capturing group can
 be referred to as `{g<n>}` (e.g. `{g3}`). `{g0}` refers to the original path.
 If capturing group *n*'s contents are a valid integer, the integer value is
 available as `{i<n>}` (e.g. `{i3}`). If capturing group *n*'s contents are a
@@ -337,7 +354,7 @@ Example: `f(oo+)/be?ar -re-> B{g1.upper()}H/fear`
 - Converts `fooooo/bear` into `BOOOOOH/fear`
 - Converts `foo/bar/baz` into `BOOH/fear/baz`
 
-[3]: <https://docs.python.org/3/library/string.html#format-string-syntax> "Format String Syntax"
+[6]: <https://docs.python.org/3/library/string.html#format-string-syntax> "Format String Syntax"
 
 ### The `-name-re->` arrow
 
diff --git a/PFERD/auth/__init__.py b/PFERD/auth/__init__.py
index 277cade..aa3ba8e 100644
--- a/PFERD/auth/__init__.py
+++ b/PFERD/auth/__init__.py
@@ -5,6 +5,7 @@ from ..config import Config
 from .authenticator import Authenticator, AuthError, AuthLoadError, AuthSection  # noqa: F401
 from .credential_file import CredentialFileAuthenticator, CredentialFileAuthSection
 from .keyring import KeyringAuthenticator, KeyringAuthSection
+from .pass_ import PassAuthenticator, PassAuthSection
 from .simple import SimpleAuthenticator, SimpleAuthSection
 from .tfa import TfaAuthenticator
 
@@ -19,6 +20,8 @@ AUTHENTICATORS: Dict[str, AuthConstructor] = {
         CredentialFileAuthenticator(n, CredentialFileAuthSection(s), c),
     "keyring": lambda n, s, c:
         KeyringAuthenticator(n, KeyringAuthSection(s)),
+    "pass": lambda n, s, c:
+        PassAuthenticator(n, PassAuthSection(s)),
     "simple": lambda n, s, c:
         SimpleAuthenticator(n, SimpleAuthSection(s)),
     "tfa": lambda n, s, c:
diff --git a/PFERD/auth/pass_.py b/PFERD/auth/pass_.py
new file mode 100644
index 0000000..4c8e775
--- /dev/null
+++ b/PFERD/auth/pass_.py
@@ -0,0 +1,98 @@
+import re
+import subprocess
+from typing import List, Tuple
+
+from ..logging import log
+from .authenticator import Authenticator, AuthError, AuthSection
+
+
+class PassAuthSection(AuthSection):
+    def passname(self) -> str:
+        if (value := self.s.get("passname")) is None:
+            self.missing_value("passname")
+        return value
+
+    def username_prefixes(self) -> List[str]:
+        value = self.s.get("username_prefixes", "login,username,user")
+        return [prefix.lower() for prefix in value.split(",")]
+
+    def password_prefixes(self) -> List[str]:
+        value = self.s.get("password_prefixes", "password,pass,secret")
+        return [prefix.lower() for prefix in value.split(",")]
+
+
+class PassAuthenticator(Authenticator):
+    PREFIXED_LINE_RE = r"([a-zA-Z]+):\s?(.*)"  # to be used with fullmatch
+
+    def __init__(self, name: str, section: PassAuthSection) -> None:
+        super().__init__(name)
+
+        self._passname = section.passname()
+        self._username_prefixes = section.username_prefixes()
+        self._password_prefixes = section.password_prefixes()
+
+    async def credentials(self) -> Tuple[str, str]:
+        log.explain_topic("Obtaining credentials from pass")
+
+        try:
+            log.explain(f"Calling 'pass show {self._passname}'")
+            result = subprocess.check_output(["pass", "show", self._passname], text=True)
+        except subprocess.CalledProcessError as e:
+            raise AuthError(f"Failed to get password info from {self._passname}: {e}")
+
+        prefixed = {}
+        unprefixed = []
+        for line in result.strip().splitlines():
+            if match := re.fullmatch(self.PREFIXED_LINE_RE, line):
+                prefix = match.group(1).lower()
+                value = match.group(2)
+                log.explain(f"Found prefixed line {line!r} with prefix {prefix!r}, value {value!r}")
+                if prefix in prefixed:
+                    raise AuthError(f"Prefix {prefix} specified multiple times")
+                prefixed[prefix] = value
+            else:
+                log.explain(f"Found unprefixed line {line!r}")
+                unprefixed.append(line)
+
+        username = None
+        for prefix in self._username_prefixes:
+            log.explain(f"Looking for username at prefix {prefix!r}")
+            if prefix in prefixed:
+                username = prefixed[prefix]
+                log.explain(f"Found username {username!r}")
+                break
+
+        password = None
+        for prefix in self._password_prefixes:
+            log.explain(f"Looking for password at prefix {prefix!r}")
+            if prefix in prefixed:
+                password = prefixed[prefix]
+                log.explain(f"Found password {password!r}")
+                break
+
+        if password is None and username is None:
+            log.explain("No username and password found so far")
+            log.explain("Using first unprefixed line as password")
+            log.explain("Using second unprefixed line as username")
+        elif password is None:
+            log.explain("No password found so far")
+            log.explain("Using first unprefixed line as password")
+        elif username is None:
+            log.explain("No username found so far")
+            log.explain("Using first unprefixed line as username")
+
+        if password is None:
+            if not unprefixed:
+                log.explain("Not enough unprefixed lines left")
+                raise AuthError("Password could not be determined")
+            password = unprefixed.pop(0)
+            log.explain(f"Found password {password!r}")
+
+        if username is None:
+            if not unprefixed:
+                log.explain("Not enough unprefixed lines left")
+                raise AuthError("Username could not be determined")
+            username = unprefixed.pop(0)
+            log.explain(f"Found username {username!r}")
+
+        return username, password

From 345f52a1f6f55eecf6c31d3cc1a4350c5200087d Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 14 Aug 2022 21:41:29 +0200
Subject: [PATCH 013/147] Detect new login button

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 156cd4c..c99a920 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -759,7 +759,7 @@ instance's greatest bottleneck.
         # Normal ILIAS pages
         mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")
         if mainbar is not None:
-            login_button = mainbar.find("button", attrs={"data-action": lambda x: x and "login.php" in x})
+            login_button = mainbar.find(attrs={"href": lambda x: x and "login.php" in x})
             shib_login = soup.find(id="button_shib_login")
             return not login_button and not shib_login
 

From d9b111cec252f4b1810f06b0f2ca551cb5cdb2a2 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 14 Aug 2022 21:45:33 +0200
Subject: [PATCH 014/147] Correctly nest description entries

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index c99a920..1852c5f 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -308,7 +308,7 @@ instance's greatest bottleneck.
         await gather_elements()
 
         if description:
-            await self._download_description(PurePath("."), description[0])
+            await self._download_description(cl.path, description[0])
 
         elements.sort(key=lambda e: e.id())
 

From aa5a3a10bcbfa0dd54a0dc1a533625f76b2d6ed8 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 14 Aug 2022 21:48:59 +0200
Subject: [PATCH 015/147] Adjust changelog

---
 CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bc9f3e5..7f35c9c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -36,6 +36,9 @@ ambiguous situations.
 - IPD crawler crashes on some sites
 - Meeting name normalization for yesterday, today and tomorrow
 - Crawling of meeting file previews
+- Login with new login button html layout
+- Descriptions for courses are now placed in the correct subfolder when
+  downloading the whole desktop
 
 ## 3.4.0 - 2022-05-01
 

From 66a5b1ba0223848f713192b084f2dcd26a18dbe5 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 17 Aug 2022 13:24:01 +0200
Subject: [PATCH 016/147] Bump version to 3.4.1

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7f35c9c..671d48a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.4.1 - 2022-08-17
+
 ### Added
 - Download of page descriptions
 - Forum download support
diff --git a/PFERD/version.py b/PFERD/version.py
index 8102d37..8832a51 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.4.0"
+VERSION = "3.4.1"

From 4a51aaa4f5a1b3382f0bed59f1292fc0952c2832 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 19 Oct 2022 22:59:33 +0200
Subject: [PATCH 017/147] Fix forum crawling crashing for empty threads

---
 CHANGELOG.md                        | 3 +++
 PFERD/crawl/ilias/kit_ilias_html.py | 7 +++++++
 2 files changed, 10 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 671d48a..70d2cd5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Fixed
+- Forum crawling crashing when parsing empty (= 0 messages) threads
+
 ## 3.4.1 - 2022-08-17
 
 ### Added
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 7bab152..8795512 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -937,6 +937,13 @@ def parse_ilias_forum_export(forum_export: BeautifulSoup) -> List[IliasForumThre
     for p in forum_export.select("body > p"):
         title_tag = p
         content_tag = p.find_next_sibling("ul")
+
+        if not content_tag:
+            # ILIAS allows users to delete the initial post while keeping the thread open
+            # This produces empty threads without *any* content.
+            # I am not sure why you would want this, but ILIAS makes it easy to do.
+            continue
+
         title = p.find("b").text
         if ":" in title:
             title = title[title.find(":") + 1:]

From d72fc2760b1dd8243ccf21876bb8cc6e027944bb Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 24 Oct 2022 13:09:29 +0200
Subject: [PATCH 018/147] Handle empty forums

---
 CHANGELOG.md                               | 1 +
 PFERD/crawl/ilias/kit_ilias_html.py        | 7 +++++--
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 6 +++++-
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 70d2cd5..c7a9899 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ ambiguous situations.
 
 ### Fixed
 - Forum crawling crashing when parsing empty (= 0 messages) threads
+- Forum crawling crashing when a forum has no threads at all
 
 ## 3.4.1 - 2022-08-17
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 8795512..9ea6b9f 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -59,6 +59,7 @@ class IliasPageElement:
 class IliasDownloadForumData:
     url: str
     form_data: Dict[str, Union[str, List[str]]]
+    empty: bool
 
 
 @dataclass
@@ -130,14 +131,16 @@ class IliasPage:
             return None
         post_url = self._abs_url_from_relative(form["action"])
 
+        thread_ids = [f["value"] for f in form.find_all(attrs={"name": "thread_ids[]"})]
+
         form_data: Dict[str, Union[str, List[ſtr]]] = {
-            "thread_ids[]": [f["value"] for f in form.find_all(attrs={"name": "thread_ids[]"})],
+            "thread_ids[]": thread_ids,
             "selected_cmd2": "html",
             "select_cmd2": "Ausführen",
             "selected_cmd": "",
         }
 
-        return IliasDownloadForumData(post_url, form_data)
+        return IliasDownloadForumData(url=post_url, form_data=form_data, empty=len(thread_ids) == 0)
 
     def get_next_stage_element(self) -> Optional[IliasPageElement]:
         if self._is_forum_page():
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 1852c5f..f2d5215 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -658,7 +658,7 @@ instance's greatest bottleneck.
     @_iorepeat(3, "crawling forum")
     @anoncritical
     async def _crawl_forum(self, element: IliasPageElement, cl: CrawlToken) -> None:
-        elements = []
+        elements: List[IliasForumThread] = []
 
         async with cl:
             next_stage_url = element.url
@@ -677,6 +677,10 @@ instance's greatest bottleneck.
             download_data = page.get_download_forum_data()
             if not download_data:
                 raise CrawlWarning("Failed to extract forum data")
+            if download_data.empty:
+                log.explain("Forum had no threads")
+                elements = []
+                return
             html = await self._post_authenticated(download_data.url, download_data.form_data)
             elements = parse_ilias_forum_export(soupify(html))
 

From fb4631ba180a9ff0303d59e798d4bccfa0253666 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 24 Oct 2022 13:13:36 +0200
Subject: [PATCH 019/147] Fix ilias background login

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 35 ++++++++++++++++++----
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index f2d5215..10a270f 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -23,6 +23,12 @@ from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasPage, Ilia
 
 TargetType = Union[str, int]
 
+_ILIAS_URL = "https://ilias.studium.kit.edu"
+
+
+class KitShibbolethBackgroundLoginSuccessful():
+    pass
+
 
 class KitIliasWebCrawlerSection(HttpCrawlerSection):
     def target(self) -> TargetType:
@@ -36,7 +42,7 @@ class KitIliasWebCrawlerSection(HttpCrawlerSection):
         if target == "desktop":
             # Full personal desktop
             return target
-        if target.startswith("https://ilias.studium.kit.edu"):
+        if target.startswith(_ILIAS_URL):
             # ILIAS URL
             return target
 
@@ -181,7 +187,7 @@ instance's greatest bottleneck.
             section.tfa_auth(authenticators),
         )
 
-        self._base_url = "https://ilias.studium.kit.edu"
+        self._base_url = _ILIAS_URL
 
         self._target = section.target()
         self._link_file_redirect_delay = section.link_redirect_delay()
@@ -808,14 +814,17 @@ class KitShibbolethLogin:
 
         # Equivalent: Click on "Mit KIT-Account anmelden" button in
         # https://ilias.studium.kit.edu/login.php
-        url = "https://ilias.studium.kit.edu/shib_login.php"
+        url = f"{_ILIAS_URL}/shib_login.php"
         data = {
             "sendLogin": "1",
             "idp_selection": "https://idp.scc.kit.edu/idp/shibboleth",
             "il_target": "",
             "home_organization_selection": "Weiter",
         }
-        soup: BeautifulSoup = await _shib_post(sess, url, data)
+        soup: Union[BeautifulSoup, KitShibbolethBackgroundLoginSuccessful] = await _shib_post(sess, url, data)
+
+        if isinstance(soup, KitShibbolethBackgroundLoginSuccessful):
+            return
 
         # Attempt to login using credentials, if necessary
         while not self._login_successful(soup):
@@ -854,7 +863,7 @@ class KitShibbolethLogin:
         # (or clicking "Continue" if you have JS disabled)
         relay_state = soup.find("input", {"name": "RelayState"})
         saml_response = soup.find("input", {"name": "SAMLResponse"})
-        url = "https://ilias.studium.kit.edu/Shibboleth.sso/SAML2/POST"
+        url = f"{_ILIAS_URL}/Shibboleth.sso/SAML2/POST"
         data = {  # using the info obtained in the while loop above
             "RelayState": relay_state["value"],
             "SAMLResponse": saml_response["value"],
@@ -903,22 +912,35 @@ async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> Beautifu
         return soupify(await response.read())
 
 
-async def _shib_post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup:
+async def _shib_post(
+    session: aiohttp.ClientSession,
+    url: str,
+    data: Any
+) -> Union[BeautifulSoup, KitShibbolethBackgroundLoginSuccessful]:
     """
     aiohttp unescapes '/' and ':' in URL query parameters which is not RFC compliant and rejected
     by Shibboleth. Thanks a lot. So now we unroll the requests manually, parse location headers and
     build encoded URL objects ourselves... Who thought mangling location header was a good idea??
     """
+    log.explain_topic("Shib login POST")
     async with session.post(url, data=data, allow_redirects=False) as response:
         location = response.headers.get("location")
+        log.explain(f"Got location {location!r}")
         if not location:
             raise CrawlWarning(f"Login failed (1), no location header present at {url}")
         correct_url = yarl.URL(location, encoded=True)
+        log.explain(f"Corrected location to {correct_url!r}")
+
+        if str(correct_url).startswith(_ILIAS_URL):
+            log.explain("ILIAS recognized our shib token and logged us in in the background, returning")
+            return KitShibbolethBackgroundLoginSuccessful()
 
         async with session.get(correct_url, allow_redirects=False) as response:
             location = response.headers.get("location")
+            log.explain(f"Redirected to {location!r} with status {response.status}")
             # If shib still still has a valid session, it will directly respond to the request
             if location is None:
+                log.explain("Shib recognized us, returning its response directly")
                 return soupify(await response.read())
 
             as_yarl = yarl.URL(response.url)
@@ -932,6 +954,7 @@ async def _shib_post(session: aiohttp.ClientSession, url: str, data: Any) -> Bea
                 path=location,
                 encoded=True
             )
+            log.explain(f"Corrected location to {correct_url!r}")
 
             async with session.get(correct_url, allow_redirects=False) as response:
                 return soupify(await response.read())

From 5fdd40204b156b15c008ec1dee05e168672fe243 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 24 Oct 2022 14:33:58 +0200
Subject: [PATCH 020/147] Unwrap future meetings when ILIAS hides them behind a
 pagination

---
 PFERD/crawl/ilias/kit_ilias_html.py        | 20 +++++++++++++++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 29 ++++++++++++++--------
 2 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 9ea6b9f..2f0011e 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -146,11 +146,17 @@ class IliasPage:
         if self._is_forum_page():
             if "trows=800" in self._page_url:
                 return None
+            log.explain("Requesting *all* forum threads")
             return self._get_show_max_forum_entries_per_page_url()
         if self._is_ilias_opencast_embedding():
+            log.explain("Unwrapping opencast embedding")
             return self.get_child_elements()[0]
         if self._page_type == IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED:
+            log.explain("Unwrapping video pagination")
             return self._find_video_entries_paginated()[0]
+        if self._contains_collapsed_future_meetings():
+            log.explain("Requesting *all* future meetings")
+            return self._uncollapse_future_meetings_url()
         return None
 
     def _is_forum_page(self) -> bool:
@@ -203,6 +209,16 @@ class IliasPage:
             return False
         return "target=copa_" in link.get("value")
 
+    def _contains_collapsed_future_meetings(self) -> bool:
+        return self._uncollapse_future_meetings_url() is not None
+
+    def _uncollapse_future_meetings_url(self) -> Optional[IliasPageElement]:
+        element = self._soup.find("a", attrs={"href": lambda x: x and "crs_next_sess=1" in x})
+        if not element:
+            return None
+        link = self._abs_url_from_link(element)
+        return IliasPageElement(IliasElementType.FOLDER, link, "show all meetings")
+
     def _player_to_video(self) -> List[IliasPageElement]:
         # Fetch the actual video page. This is a small wrapper page initializing a javscript
         # player. Sadly we can not execute that JS. The actual video stream url is nowhere
@@ -793,6 +809,10 @@ class IliasPage:
         if img_tag is None:
             img_tag = found_parent.select_one("img.icon")
 
+        if img_tag is None and found_parent.find("a", attrs={"href": lambda x: x and "crs_next_sess=" in x}):
+            log.explain("Found session expansion button, skipping it as it has no content")
+            return None
+
         if img_tag is None:
             _unexpected_html_warning()
             log.warn_contd(f"Tried to figure out element type, but did not find an image for {url}")
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 10a270f..bc0d816 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -234,19 +234,28 @@ instance's greatest bottleneck.
         async def gather_elements() -> None:
             elements.clear()
             async with cl:
-                soup = await self._get_page(url)
-
-                if expected_id is not None:
-                    perma_link_element: Tag = soup.find(id="current_perma_link")
-                    if not perma_link_element or "crs_" not in perma_link_element.get("value"):
-                        raise CrawlError("Invalid course id? Didn't find anything looking like a course")
+                next_stage_url: Optional[str] = url
+                current_parent = None
 
                 # Duplicated code, but the root page is special - we want to avoid fetching it twice!
-                log.explain_topic("Parsing root HTML page")
-                log.explain(f"URL: {url}")
-                page = IliasPage(soup, url, None)
-                elements.extend(page.get_child_elements())
+                while next_stage_url:
+                    soup = await self._get_page(next_stage_url)
 
+                    if current_parent is None and expected_id is not None:
+                        perma_link_element: Tag = soup.find(id="current_perma_link")
+                        if not perma_link_element or "crs_" not in perma_link_element.get("value"):
+                            raise CrawlError("Invalid course id? Didn't find anything looking like a course")
+
+                    log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
+                    log.explain(f"URL: {next_stage_url}")
+                    page = IliasPage(soup, next_stage_url, current_parent)
+                    if next_element := page.get_next_stage_element():
+                        current_parent = next_element
+                        next_stage_url = next_element.url
+                    else:
+                        next_stage_url = None
+
+                elements.extend(page.get_child_elements())
                 if description_string := page.get_description():
                     description.append(description_string)
 

From e1430e629844ad122a78d18197ed54100c734bbb Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 24 Oct 2022 18:36:34 +0200
Subject: [PATCH 021/147] Handle (and ignore) surveys

---
 PFERD/crawl/ilias/kit_ilias_html.py        | 3 +++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 7 +++++++
 2 files changed, 10 insertions(+)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 2f0011e..d969577 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -24,6 +24,7 @@ class IliasElementType(Enum):
     LINK = "link"
     BOOKING = "booking"
     MEETING = "meeting"
+    SURVEY = "survey"
     VIDEO = "video"
     VIDEO_PLAYER = "video_player"
     VIDEO_FOLDER = "video_folder"
@@ -730,6 +731,8 @@ class IliasPage:
             return IliasElementType.TEST
         if "fold" in icon["class"]:
             return IliasElementType.FOLDER
+        if "svy" in icon["class"]:
+            return IliasElementType.SURVEY
 
         _unexpected_html_warning()
         log.warn_contd(f"Could not extract type from {icon} for card title {card_title}")
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index bc0d816..5ff8212 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -380,6 +380,13 @@ instance's greatest bottleneck.
             log.explain_topic(f"Decision: Crawl {fmt_path(element_path)}")
             log.explain("Tests contain no relevant files")
             log.explain("Answer: No")
+        elif element.type == IliasElementType.SURVEY:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](surveys contain no relevant data)"
+            )
             return None
         elif element.type == IliasElementType.LINK:
             return await self._handle_link(element, element_path)

From 1b6be6bd79112faea6e56c43f4756dde10ba00ba Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 24 Oct 2022 18:36:54 +0200
Subject: [PATCH 022/147] Handle content pages in cards

---
 PFERD/crawl/ilias/kit_ilias_html.py        |  2 ++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 10 +++++++---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index d969577..ee0364a 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -731,6 +731,8 @@ class IliasPage:
             return IliasElementType.TEST
         if "fold" in icon["class"]:
             return IliasElementType.FOLDER
+        if "copa" in icon["class"]:
+            return IliasElementType.FOLDER
         if "svy" in icon["class"]:
             return IliasElementType.SURVEY
 
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 5ff8212..9295e93 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -377,9 +377,13 @@ instance's greatest bottleneck.
                 return None
             return await self._handle_forum(element, element_path)
         elif element.type == IliasElementType.TEST:
-            log.explain_topic(f"Decision: Crawl {fmt_path(element_path)}")
-            log.explain("Tests contain no relevant files")
-            log.explain("Answer: No")
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](tests contain no relevant data)"
+            )
+            return None
         elif element.type == IliasElementType.SURVEY:
             log.status(
                 "[bold bright_black]",

From f47d2f11d843bfd3307815b231dd3e3df0265cef Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 25 Oct 2022 20:28:06 +0200
Subject: [PATCH 023/147] Append trailing slash to kit-ipd links to ensure
 urljoin works as expected

---
 CHANGELOG.md                   | 1 +
 PFERD/crawl/kit_ipd_crawler.py | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c7a9899..24d9fa6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ ambiguous situations.
 ### Fixed
 - Forum crawling crashing when parsing empty (= 0 messages) threads
 - Forum crawling crashing when a forum has no threads at all
+- kit-ipd crawler if URL did not end with a trailing slash
 
 ## 3.4.1 - 2022-08-17
 
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index d9fac32..338e059 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -24,6 +24,9 @@ class KitIpdCrawlerSection(HttpCrawlerSection):
         if not target.startswith("https://"):
             self.invalid_value("target", target, "Should be a URL")
 
+        if not target.endswith("/"):
+            target = target + "/"
+
         return target
 
     def link_regex(self) -> Pattern[str]:

From 37b51a66d87d368afc3bef2b81edf1629f95cd57 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 26 Oct 2022 18:22:37 +0200
Subject: [PATCH 024/147] Update changelog

---
 CHANGELOG.md | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 24d9fa6..2bb0231 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,10 +22,16 @@ ambiguous situations.
 
 ## Unreleased
 
+### Added
+- Recognize and crawl content pages in cards
+- Recognize and ignore surveys
+
 ### Fixed
-- Forum crawling crashing when parsing empty (= 0 messages) threads
+- Forum crawling crashing when a thread has no messages at all
 - Forum crawling crashing when a forum has no threads at all
-- kit-ipd crawler if URL did not end with a trailing slash
+- Ilias login failing in some cases
+- Crawling of paginated future meetings
+- IPD crawler handling of URLs without trailing slash
 
 ## 3.4.1 - 2022-08-17
 

From 259cfc20cccae68a2f34984796405a35a7f31707 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Wed, 26 Oct 2022 18:26:17 +0200
Subject: [PATCH 025/147] Bump version to 3.4.2

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2bb0231..9ecddf7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.4.2 - 2022-10-26
+
 ### Added
 - Recognize and crawl content pages in cards
 - Recognize and ignore surveys
diff --git a/PFERD/version.py b/PFERD/version.py
index 8832a51..0ef5d89 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.4.1"
+VERSION = "3.4.2"

From c020cccc64f152882688b119416f0582ec94e074 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 29 Oct 2022 14:08:29 +0200
Subject: [PATCH 026/147] Include found paths in "second path found" warning

---
 CHANGELOG.md                               | 3 +++
 PFERD/crawl/ilias/kit_ilias_html.py        | 2 +-
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 8 +++++---
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9ecddf7..3dd25b8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Changed
+- Clear up error message shown when multiple paths are found to an element
+
 ## 3.4.2 - 2022-10-26
 
 ### Added
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index ee0364a..56dcf7b 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -134,7 +134,7 @@ class IliasPage:
 
         thread_ids = [f["value"] for f in form.find_all(attrs={"name": "thread_ids[]"})]
 
-        form_data: Dict[str, Union[str, List[ſtr]]] = {
+        form_data: Dict[str, Union[str, List[str]]] = {
             "thread_ids[]": thread_ids,
             "selected_cmd2": "html",
             "select_cmd2": "Ausführen",
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 9295e93..e3719b8 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -194,7 +194,7 @@ instance's greatest bottleneck.
         self._links = section.links()
         self._videos = section.videos()
         self._forums = section.forums()
-        self._visited_urls: Set[str] = set()
+        self._visited_urls: Dict[str, PurePath] = dict()
 
     async def _run(self) -> None:
         if isinstance(self._target, int):
@@ -348,9 +348,11 @@ instance's greatest bottleneck.
     ) -> Optional[Coroutine[Any, Any, None]]:
         if element.url in self._visited_urls:
             raise CrawlWarning(
-                f"Found second path to element {element.name!r} at {element.url!r}. Aborting subpath"
+                f"Found second path to element {element.name!r} at {element.url!r}. "
+                + f"First path: {fmt_path(self._visited_urls[element.url])}. "
+                + f"Second path: {fmt_path(parent_path)}."
             )
-        self._visited_urls.add(element.url)
+        self._visited_urls[element.url] = parent_path
 
         element_path = PurePath(parent_path, element.name)
 

From 07200bbde5fb72f2f846101b92b440724c8c7959 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Mon, 31 Oct 2022 14:10:45 +0100
Subject: [PATCH 027/147] Document ilias web crawler's forums option

---
 CHANGELOG.md | 3 +++
 CONFIG.md    | 1 +
 2 files changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3dd25b8..e5e81d6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Added
+- Missing documentation for `forums` option
+
 ### Changed
 - Clear up error message shown when multiple paths are found to an element
 
diff --git a/CONFIG.md b/CONFIG.md
index 0f114ed..1ca43c4 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -181,6 +181,7 @@ script once per day should be fine.
   redirect to the actual URL. Set to a negative value to disable the automatic
   redirect. (Default: `-1`)
 - `videos`: Whether to download videos. (Default: `no`)
+- `forums`: Whether to download forum threads. (Default: `no`)
 - `http_timeout`: The timeout (in seconds) for all HTTP requests. (Default:
   `20.0`)
 

From e69b55b3496d58bc19d76429ca0078ab10f23074 Mon Sep 17 00:00:00 2001
From: Pavel Zwerschke <pavelzw@gmail.com>
Date: Fri, 4 Nov 2022 12:18:26 +0100
Subject: [PATCH 028/147] Add more unofficial package managers (#66)

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index ce917b0..31a3475 100644
--- a/README.md
+++ b/README.md
@@ -30,7 +30,10 @@ The use of [venv](https://docs.python.org/3/library/venv.html) is recommended.
 
 Unofficial packages are available for:
 - [AUR](https://aur.archlinux.org/packages/pferd)
+- [brew](https://formulae.brew.sh/formula/pferd)
+- [conda-forge](https://github.com/conda-forge/pferd-feedstock)
 - [nixpkgs](https://github.com/NixOS/nixpkgs/blob/master/pkgs/tools/misc/pferd/default.nix)
+- [PyPi](https://pypi.org/project/pferd)
 
 See also PFERD's [repology page](https://repology.org/project/pferd/versions).
 

From 635caa765decd9a747d8b313252fd6b56cea0951 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 15 Nov 2022 17:17:55 +0100
Subject: [PATCH 029/147] Fix typo

Thanks, burg113
---
 CONFIG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CONFIG.md b/CONFIG.md
index 1ca43c4..640e4af 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -290,7 +290,7 @@ path matches `SOURCE`, it is renamed to `TARGET`.
 Example: `foo/bar --> baz`
 - Doesn't match `foo`, `a/foo/bar` or `foo/baz`
 - Converts `foo/bar` into `baz`
-- Converts `foo/bar/wargl` into `bar/wargl`
+- Converts `foo/bar/wargl` into `baz/wargl`
 
 Example: `foo/bar --> !`
 - Doesn't match `foo`, `a/foo/bar` or `foo/baz`

From c0d6d8b22975234b0c9141a22307c8036698566c Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Mon, 21 Nov 2022 17:53:30 +0100
Subject: [PATCH 030/147] Use url after redirect for relative links

---
 CHANGELOG.md                   |  3 +++
 PFERD/crawl/kit_ipd_crawler.py | 27 ++++++++++++---------------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e5e81d6..5bbefd4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -28,6 +28,9 @@ ambiguous situations.
 ### Changed
 - Clear up error message shown when multiple paths are found to an element
 
+### Fixed
+- IPD crawler unnecessarily appending trailing slashes
+
 ## 3.4.2 - 2022-10-26
 
 ### Added
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index 338e059..c852be0 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -2,7 +2,7 @@ import os
 import re
 from dataclasses import dataclass
 from pathlib import PurePath
-from typing import Awaitable, List, Optional, Pattern, Set, Union
+from typing import Awaitable, List, Optional, Pattern, Set, Tuple, Union
 from urllib.parse import urljoin
 
 from bs4 import BeautifulSoup, Tag
@@ -24,9 +24,6 @@ class KitIpdCrawlerSection(HttpCrawlerSection):
         if not target.startswith("https://"):
             self.invalid_value("target", target, "Should be a URL")
 
-        if not target.endswith("/"):
-            target = target + "/"
-
         return target
 
     def link_regex(self) -> Pattern[str]:
@@ -102,32 +99,32 @@ class KitIpdCrawler(HttpCrawler):
             await self._stream_from_url(file.url, sink, bar)
 
     async def _fetch_items(self) -> Set[Union[KitIpdFile, KitIpdFolder]]:
-        page = await self.get_page()
+        page, url = await self.get_page()
         elements: List[Tag] = self._find_file_links(page)
         items: Set[Union[KitIpdFile, KitIpdFolder]] = set()
 
         for element in elements:
             folder_label = self._find_folder_label(element)
             if folder_label:
-                folder = self._extract_folder(folder_label)
+                folder = self._extract_folder(folder_label, url)
                 if folder not in items:
                     items.add(folder)
                     folder.explain()
             else:
-                file = self._extract_file(element)
+                file = self._extract_file(element, url)
                 items.add(file)
                 log.explain_topic(f"Orphan file {file.name!r} (href={file.url!r})")
                 log.explain("Attributing it to root folder")
 
         return items
 
-    def _extract_folder(self, folder_tag: Tag) -> KitIpdFolder:
+    def _extract_folder(self, folder_tag: Tag, url: str) -> KitIpdFolder:
         files: List[KitIpdFile] = []
         name = folder_tag.getText().strip()
 
         container: Tag = folder_tag.findNextSibling(name="table")
         for link in self._find_file_links(container):
-            files.append(self._extract_file(link))
+            files.append(self._extract_file(link, url))
 
         return KitIpdFolder(name, files)
 
@@ -138,16 +135,16 @@ class KitIpdCrawler(HttpCrawler):
             return None
         return enclosing_table.findPreviousSibling(name=re.compile("^h[1-6]$"))
 
-    def _extract_file(self, link: Tag) -> KitIpdFile:
-        url = self._abs_url_from_link(link)
+    def _extract_file(self, link: Tag, url: str) -> KitIpdFile:
+        url = self._abs_url_from_link(url, link)
         name = os.path.basename(url)
         return KitIpdFile(name, url)
 
     def _find_file_links(self, tag: Union[Tag, BeautifulSoup]) -> List[Tag]:
         return tag.findAll(name="a", attrs={"href": self._file_regex})
 
-    def _abs_url_from_link(self, link_tag: Tag) -> str:
-        return urljoin(self._url, link_tag.get("href"))
+    def _abs_url_from_link(self, url: str, link_tag: Tag) -> str:
+        return urljoin(url, link_tag.get("href"))
 
     async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar) -> None:
         async with self.session.get(url, allow_redirects=False) as resp:
@@ -162,7 +159,7 @@ class KitIpdCrawler(HttpCrawler):
 
             sink.done()
 
-    async def get_page(self) -> BeautifulSoup:
+    async def get_page(self) -> Tuple[BeautifulSoup, str]:
         async with self.session.get(self._url) as request:
             # The web page for Algorithmen für Routenplanung contains some
             # weird comments that beautifulsoup doesn't parse correctly. This
@@ -170,4 +167,4 @@ class KitIpdCrawler(HttpCrawler):
             # cause issues on other pages.
             content = (await request.read()).decode("utf-8")
             content = re.sub(r"<!--.*?-->", "", content)
-            return soupify(content.encode("utf-8"))
+            return soupify(content.encode("utf-8")), str(request.url)

From 55a2de6b88bbd2ee0cb031271e7045f53caa1702 Mon Sep 17 00:00:00 2001
From: c0derMo <jaydeveloper@outlook.de>
Date: Fri, 25 Nov 2022 10:25:22 +0000
Subject: [PATCH 031/147] Fix crawling English opencast

---
 CHANGELOG.md                        | 1 +
 PFERD/crawl/ilias/kit_ilias_html.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5bbefd4..1dc5abc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,6 +30,7 @@ ambiguous situations.
 
 ### Fixed
 - IPD crawler unnecessarily appending trailing slashes
+- Crawling opencast when ILIAS is set to English
 
 ## 3.4.2 - 2022-10-26
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 56dcf7b..c0ebdc9 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -366,7 +366,7 @@ class IliasPage:
         """
         # Video start links are marked with an "Abspielen" link
         video_links: List[Tag] = self._soup.findAll(
-            name="a", text=re.compile(r"\s*Abspielen\s*")
+            name="a", text=re.compile(r"\s*(Abspielen|Play)\s*")
         )
 
         results: List[IliasPageElement] = []

From 6d44aac2783c69031e7686263fc0a2285912376f Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 29 Nov 2022 18:22:19 +0100
Subject: [PATCH 032/147] Bump version to 3.4.3

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1dc5abc..8793d43 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.4.3 - 2022-11-29
+
 ### Added
 - Missing documentation for `forums` option
 
diff --git a/PFERD/version.py b/PFERD/version.py
index 0ef5d89..7043d78 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.4.2"
+VERSION = "3.4.3"

From 722d2eb393913e770aff17da6b5b3b6603d1ee67 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Fri, 25 Nov 2022 12:49:36 +0100
Subject: [PATCH 033/147] Fix crawling of courses with preselected timeline tab

---
 CHANGELOG.md                        |  3 +++
 PFERD/crawl/ilias/kit_ilias_html.py | 23 +++++++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8793d43..b1d18cd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Fixed
+- Crawling of courses with the timeline view as the default tab
+
 ## 3.4.3 - 2022-11-29
 
 ### Added
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index c0ebdc9..44e44d9 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -158,6 +158,8 @@ class IliasPage:
         if self._contains_collapsed_future_meetings():
             log.explain("Requesting *all* future meetings")
             return self._uncollapse_future_meetings_url()
+        if not self._is_content_tab_selected():
+            return self._select_content_page_url()
         return None
 
     def _is_forum_page(self) -> bool:
@@ -220,6 +222,27 @@ class IliasPage:
         link = self._abs_url_from_link(element)
         return IliasPageElement(IliasElementType.FOLDER, link, "show all meetings")
 
+    def _is_content_tab_selected(self) -> bool:
+        return self._select_content_page_url() is None
+
+    def _select_content_page_url(self) -> Optional[IliasPageElement]:
+        tab = self._soup.find(
+            id="tab_view_content",
+            attrs={"class": lambda x: x is not None and "active" not in x}
+        )
+        # Already selected (or not found)
+        if not tab:
+            return None
+        link = tab.find("a")
+        if link:
+            link = self._abs_url_from_link(link)
+            return IliasPageElement(IliasElementType.FOLDER, link, "select content page")
+
+        _unexpected_html_warning()
+        log.warn_contd(f"Could not find content tab URL on {self._page_url!r}.")
+        log.warn_contd("PFERD might not find content on the course's main page.")
+        return None
+
     def _player_to_video(self) -> List[IliasPageElement]:
         # Fetch the actual video page. This is a small wrapper page initializing a javscript
         # player. Sadly we can not execute that JS. The actual video stream url is nowhere

From 467fc526e8411d4a5113dbb78747aa119981c476 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 21 Mar 2023 23:52:24 +0100
Subject: [PATCH 034/147] Fix crawling of file/video cards

---
 CHANGELOG.md                        | 1 +
 PFERD/crawl/ilias/kit_ilias_html.py | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b1d18cd..c27059b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ ambiguous situations.
 
 ### Fixed
 - Crawling of courses with the timeline view as the default tab
+- Crawling of file and custom opencast cards
 
 ## 3.4.3 - 2022-11-29
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 44e44d9..079cfd6 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -738,7 +738,7 @@ class IliasPage:
 
         icon: Tag = card_root.select_one(".il-card-repository-head .icon")
 
-        if "opencast" in icon["class"]:
+        if "opencast" in icon["class"] or "xoct" in icon["class"]:
             return IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED
         if "exc" in icon["class"]:
             return IliasElementType.EXERCISE
@@ -758,6 +758,8 @@ class IliasPage:
             return IliasElementType.FOLDER
         if "svy" in icon["class"]:
             return IliasElementType.SURVEY
+        if "file" in icon["class"]:
+            return IliasElementType.FILE
 
         _unexpected_html_warning()
         log.warn_contd(f"Could not extract type from {icon} for card title {card_title}")

From 6f30c6583d6512c92042c581e86027a4341ddc89 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 21 Mar 2023 23:52:33 +0100
Subject: [PATCH 035/147] Fix crawling of cards without descriptions

---
 CHANGELOG.md                        | 1 +
 PFERD/crawl/ilias/kit_ilias_html.py | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c27059b..7a5f654 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ ambiguous situations.
 ### Fixed
 - Crawling of courses with the timeline view as the default tab
 - Crawling of file and custom opencast cards
+- Crawling of button cards without descriptions
 
 ## 3.4.3 - 2022-11-29
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 079cfd6..efe6757 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -708,7 +708,11 @@ class IliasPage:
                 "div",
                 attrs={"class": lambda x: x and "caption" in x},
             )
-            description = caption_parent.find_next_sibling("div").getText().strip()
+            caption_container = caption_parent.find_next_sibling("div")
+            if caption_container:
+                description = caption_container.getText().strip()
+            else:
+                description = None
 
             if not type:
                 _unexpected_html_warning()

From 0294ceb7d5ff074dcc2566872d6b5f64f99c598f Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 22 Mar 2023 00:08:19 +0100
Subject: [PATCH 036/147] Update github action versions

---
 .github/workflows/build-and-release.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/build-and-release.yml b/.github/workflows/build-and-release.yml
index 090ac7e..83a36e4 100644
--- a/.github/workflows/build-and-release.yml
+++ b/.github/workflows/build-and-release.yml
@@ -17,9 +17,9 @@ jobs:
         python: ["3.9"]
     steps:
 
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
 
-      - uses: actions/setup-python@v2
+      - uses: actions/setup-python@v4
         with:
           python-version: ${{ matrix.python }}
 
@@ -45,7 +45,7 @@ jobs:
         run: mv dist/pferd* dist/pferd-${{ matrix.os }}
 
       - name: Upload binary
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v3
         with:
           name: Binaries
           path: dist/pferd-${{ matrix.os }}
@@ -57,7 +57,7 @@ jobs:
     steps:
 
       - name: Download binaries
-        uses: actions/download-artifact@v2
+        uses: actions/download-artifact@v3
         with:
           name: Binaries
 

From 443f7fe83913bcb82a42d7b70d4d05df65f05278 Mon Sep 17 00:00:00 2001
From: "Mr. Pine" <git@mr-pine.de>
Date: Sat, 29 Jul 2023 17:54:42 +0200
Subject: [PATCH 037/147] Add `no-delete-prompt-overwrite` crawler conflict
 resolution option (#75)

---
 CHANGELOG.md        |  3 +++
 CONFIG.md           |  2 ++
 LICENSE             |  3 ++-
 PFERD/output_dir.py | 11 ++++++-----
 4 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7a5f654..22522e2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,9 @@ ambiguous situations.
 - Crawling of file and custom opencast cards
 - Crawling of button cards without descriptions
 
+### Added
+- `no-delete-prompt-override` conflict resolution strategy
+
 ## 3.4.3 - 2022-11-29
 
 ### Added
diff --git a/CONFIG.md b/CONFIG.md
index 640e4af..84ee885 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -75,6 +75,8 @@ common to all crawlers:
       using `prompt` and always choosing "yes".
     - `no-delete`: Never delete local files, but overwrite local files if the
       remote file is different.
+    - `no-delete-prompt-overwrite`: Never delete local files, but prompt to overwrite local files if the
+      remote file is different.
 - `transform`: Rules for renaming and excluding certain files and directories.
   For more details, see [this section](#transformation-rules). (Default: empty)
 - `tasks`: The maximum number of concurrent tasks (such as crawling or
diff --git a/LICENSE b/LICENSE
index fe2293f..d81e827 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,5 +1,6 @@
 Copyright 2019-2021 Garmelon, I-Al-Istannen, danstooamerican, pavelzw,
-                    TheChristophe, Scriptim, thelukasprobst, Toorero
+                    TheChristophe, Scriptim, thelukasprobst, Toorero,
+                    Mr-Pine
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index c92f4a6..38d1288 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -44,6 +44,7 @@ class OnConflict(Enum):
     LOCAL_FIRST = "local-first"
     REMOTE_FIRST = "remote-first"
     NO_DELETE = "no-delete"
+    NO_DELETE_PROMPT_OVERWRITE = "no-delete-prompt-overwrite"
 
     @staticmethod
     def from_string(string: str) -> "OnConflict":
@@ -51,7 +52,7 @@ class OnConflict(Enum):
             return OnConflict(string)
         except ValueError:
             raise ValueError("must be one of 'prompt', 'local-first',"
-                             " 'remote-first', 'no-delete'")
+                             " 'remote-first', 'no-delete', 'no-delete-prompt-overwrite'")
 
 
 @dataclass
@@ -264,7 +265,7 @@ class OutputDirectory:
             on_conflict: OnConflict,
             path: PurePath,
     ) -> bool:
-        if on_conflict == OnConflict.PROMPT:
+        if on_conflict in {OnConflict.PROMPT, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
             async with log.exclusive_output():
                 prompt = f"Replace {fmt_path(path)} with remote file?"
                 return await prompt_yes_no(prompt, default=False)
@@ -283,7 +284,7 @@ class OutputDirectory:
             on_conflict: OnConflict,
             path: PurePath,
     ) -> bool:
-        if on_conflict == OnConflict.PROMPT:
+        if on_conflict in {OnConflict.PROMPT, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
             async with log.exclusive_output():
                 prompt = f"Recursively delete {fmt_path(path)} and replace with remote file?"
                 return await prompt_yes_no(prompt, default=False)
@@ -303,7 +304,7 @@ class OutputDirectory:
             path: PurePath,
             parent: PurePath,
     ) -> bool:
-        if on_conflict == OnConflict.PROMPT:
+        if on_conflict in {OnConflict.PROMPT, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
             async with log.exclusive_output():
                 prompt = f"Delete {fmt_path(parent)} so remote file {fmt_path(path)} can be downloaded?"
                 return await prompt_yes_no(prompt, default=False)
@@ -330,7 +331,7 @@ class OutputDirectory:
             return False
         elif on_conflict == OnConflict.REMOTE_FIRST:
             return True
-        elif on_conflict == OnConflict.NO_DELETE:
+        elif on_conflict in {OnConflict.NO_DELETE, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
             return False
 
         # This should never be reached

From d204dac8ced63534ca2b4596e9a63c880b2077a3 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Fri, 2 Jun 2023 18:19:39 +0200
Subject: [PATCH 038/147] Detect unexpected root page redirects and abort
 operation

---
 CHANGELOG.md                               |  1 +
 PFERD/crawl/ilias/kit_ilias_html.py        | 10 ++++++++++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 20 ++++++++++++++++----
 3 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 22522e2..ee55659 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -26,6 +26,7 @@ ambiguous situations.
 - Crawling of courses with the timeline view as the default tab
 - Crawling of file and custom opencast cards
 - Crawling of button cards without descriptions
+- Abort crawling when encountering an unexpected ilias root page redirect
 
 ### Added
 - `no-delete-prompt-override` conflict resolution strategy
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index efe6757..aed2069 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -79,6 +79,16 @@ class IliasPage:
         self._page_type = source_element.type if source_element else None
         self._source_name = source_element.name if source_element else ""
 
+    @staticmethod
+    def is_root_page(soup: BeautifulSoup) -> bool:
+        permalink = soup.find(id="current_perma_link")
+        if permalink is None:
+            return False
+        value = permalink.attrs.get("value")
+        if value is None:
+            return False
+        return "goto.php?target=root_" in value
+
     def get_child_elements(self) -> List[IliasPageElement]:
         """
         Return all child page elements you can find here.
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index e3719b8..ae49edc 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -239,7 +239,7 @@ instance's greatest bottleneck.
 
                 # Duplicated code, but the root page is special - we want to avoid fetching it twice!
                 while next_stage_url:
-                    soup = await self._get_page(next_stage_url)
+                    soup = await self._get_page(next_stage_url, root_page_allowed=True)
 
                     if current_parent is None and expected_id is not None:
                         perma_link_element: Tag = soup.find(id="current_perma_link")
@@ -739,12 +739,12 @@ instance's greatest bottleneck.
             sink.file.write(content.encode("utf-8"))
             sink.done()
 
-    async def _get_page(self, url: str) -> BeautifulSoup:
+    async def _get_page(self, url: str, root_page_allowed: bool = False) -> BeautifulSoup:
         auth_id = await self._current_auth_id()
         async with self.session.get(url) as request:
             soup = soupify(await request.read())
             if self._is_logged_in(soup):
-                return soup
+                return self._verify_page(soup, url, root_page_allowed)
 
         # We weren't authenticated, so try to do that
         await self.authenticate(auth_id)
@@ -753,9 +753,21 @@ instance's greatest bottleneck.
         async with self.session.get(url) as request:
             soup = soupify(await request.read())
             if self._is_logged_in(soup):
-                return soup
+                return self._verify_page(soup, url, root_page_allowed)
         raise CrawlError("get_page failed even after authenticating")
 
+    def _verify_page(self, soup: BeautifulSoup, url: str, root_page_allowed: bool) -> BeautifulSoup:
+        if IliasPage.is_root_page(soup) and not root_page_allowed:
+            raise CrawlError(
+                "Unexpectedly encountered ILIAS root page. "
+                "This usually happens because the ILIAS instance is broken. "
+                "If so, wait a day or two and try again. "
+                "It could also happen because a crawled element links to the ILIAS root page. "
+                "If so, use a transform with a ! as target to ignore the particular element. "
+                f"The redirect came from {url}"
+            )
+        return soup
+
     async def _post_authenticated(
         self,
         url: str,

From 123a57beec37090310f76df3746e6ce107ceb299 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 29 Jul 2023 18:14:57 +0200
Subject: [PATCH 039/147] Fix mypy unreachable error in file_templates

---
 PFERD/crawl/ilias/file_templates.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index 151a41b..59123a2 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -102,24 +102,24 @@ class Links(Enum):
     INTERNET_SHORTCUT = "internet-shortcut"
 
     def template(self) -> Optional[str]:
-        if self == self.FANCY:
+        if self == Links.FANCY:
             return _link_template_fancy
-        elif self == self.PLAINTEXT:
+        elif self == Links.PLAINTEXT:
             return _link_template_plain
-        elif self == self.INTERNET_SHORTCUT:
+        elif self == Links.INTERNET_SHORTCUT:
             return _link_template_internet_shortcut
-        elif self == self.IGNORE:
+        elif self == Links.IGNORE:
             return None
         raise ValueError("Missing switch case")
 
     def extension(self) -> Optional[str]:
-        if self == self.FANCY:
+        if self == Links.FANCY:
             return ".html"
-        elif self == self.PLAINTEXT:
+        elif self == Links.PLAINTEXT:
             return ".txt"
-        elif self == self.INTERNET_SHORTCUT:
+        elif self == Links.INTERNET_SHORTCUT:
             return ".url"
-        elif self == self.IGNORE:
+        elif self == Links.IGNORE:
             return None
         raise ValueError("Missing switch case")
 

From 68c398f1fea5cfefd86d11e79f2f6582d50e6563 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 29 Jul 2023 23:23:10 +0200
Subject: [PATCH 040/147] Add support for ILIAS learning modules

---
 CHANGELOG.md                               |   1 +
 PFERD/crawl/ilias/file_templates.py        |  69 +++++++++
 PFERD/crawl/ilias/ilias_html_cleaner.py    |   2 +-
 PFERD/crawl/ilias/kit_ilias_html.py        |  46 ++++++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 160 ++++++++++++++++++++-
 5 files changed, 272 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ee55659..6e3925c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,6 +30,7 @@ ambiguous situations.
 
 ### Added
 - `no-delete-prompt-override` conflict resolution strategy
+- support for ILIAS learning modules
 
 ## 3.4.3 - 2022-11-29
 
diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index 59123a2..b206461 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -1,6 +1,10 @@
 from enum import Enum
 from typing import Optional
 
+import bs4
+
+from PFERD.utils import soupify
+
 _link_template_plain = "{{link}}"
 _link_template_fancy = """
 <!DOCTYPE html>
@@ -94,6 +98,71 @@ _link_template_internet_shortcut = """
 URL={{link}}
 """.strip()
 
+_learning_module_template = """
+<!DOCTYPE html>
+<html lang="en">
+    <head>
+        <meta charset="UTF-8">
+        <title>{{name}}</title>
+    </head>
+
+    <style>
+    * {
+        box-sizing: border-box;
+    }
+    .center-flex {
+        display: flex;
+        align-items: center;
+        justify-content: center;
+    }
+    .nav {
+        display: flex;
+        justify-content: space-between;
+    }
+    </style>
+    <body class="center-flex">
+{{body}}
+    </body>
+</html>
+"""
+
+
+def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next: Optional[str]) -> str:
+    # Seems to be comments, ignore those.
+    for elem in body.select(".il-copg-mob-fullscreen-modal"):
+        elem.decompose()
+
+    nav_template = """
+        <div class="nav">
+            {{left}}
+            {{right}}
+        </div>
+    """
+    if prev and body.select_one(".ilc_page_lnav_LeftNavigation"):
+        text = body.select_one(".ilc_page_lnav_LeftNavigation").getText().strip()
+        left = f'<a href="{prev}">{text}</a>'
+    else:
+        left = "<span></span>"
+
+    if next and body.select_one(".ilc_page_rnav_RightNavigation"):
+        text = body.select_one(".ilc_page_rnav_RightNavigation").getText().strip()
+        right = f'<a href="{next}">{text}</a>'
+    else:
+        right = "<span></span>"
+
+    if top_nav := body.select_one(".ilc_page_tnav_TopNavigation"):
+        top_nav.replace_with(
+            soupify(nav_template.replace("{{left}}", left).replace("{{right}}", right).encode())
+        )
+
+    if bot_nav := body.select_one(".ilc_page_bnav_BottomNavigation"):
+        bot_nav.replace_with(soupify(nav_template.replace(
+            "{{left}}", left).replace("{{right}}", right).encode())
+        )
+
+    body = body.prettify()
+    return _learning_module_template.replace("{{body}}", body).replace("{{name}}", name)
+
 
 class Links(Enum):
     IGNORE = "ignore"
diff --git a/PFERD/crawl/ilias/ilias_html_cleaner.py b/PFERD/crawl/ilias/ilias_html_cleaner.py
index 5952309..5495304 100644
--- a/PFERD/crawl/ilias/ilias_html_cleaner.py
+++ b/PFERD/crawl/ilias/ilias_html_cleaner.py
@@ -82,7 +82,7 @@ def clean(soup: BeautifulSoup) -> BeautifulSoup:
             dummy.decompose()
         if len(children) > 1:
             continue
-        if type(children[0]) == Comment:
+        if isinstance(type(children[0]), Comment):
             dummy.decompose()
 
     for hrule_imposter in soup.find_all(class_="ilc_section_Separator"):
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index aed2069..46a8073 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -22,6 +22,7 @@ class IliasElementType(Enum):
     FOLDER = "folder"
     FORUM = "forum"
     LINK = "link"
+    LEARNING_MODULE = "learning_module"
     BOOKING = "booking"
     MEETING = "meeting"
     SURVEY = "survey"
@@ -71,6 +72,14 @@ class IliasForumThread:
     mtime: Optional[datetime]
 
 
+@dataclass
+class IliasLearningModulePage:
+    title: str
+    content: Tag
+    next_url: Optional[str]
+    previous_url: Optional[str]
+
+
 class IliasPage:
 
     def __init__(self, soup: BeautifulSoup, _page_url: str, source_element: Optional[IliasPageElement]):
@@ -136,6 +145,34 @@ class IliasPage:
 
         return BeautifulSoup(raw_html, "html.parser")
 
+    def get_learning_module_data(self) -> Optional[IliasLearningModulePage]:
+        if not self._is_learning_module_page():
+            return None
+        content = self._soup.select_one("#ilLMPageContent")
+        title = self._soup.select_one(".ilc_page_title_PageTitle").getText().strip()
+        return IliasLearningModulePage(
+            title=title,
+            content=content,
+            next_url=self._find_learning_module_next(),
+            previous_url=self._find_learning_module_prev()
+        )
+
+    def _find_learning_module_next(self) -> Optional[str]:
+        for link in self._soup.select("a.ilc_page_rnavlink_RightNavigationLink"):
+            url = self._abs_url_from_link(link)
+            if "baseClass=ilLMPresentationGUI" not in url:
+                continue
+            return url
+        return None
+
+    def _find_learning_module_prev(self) -> Optional[str]:
+        for link in self._soup.select("a.ilc_page_lnavlink_LeftNavigationLink"):
+            url = self._abs_url_from_link(link)
+            if "baseClass=ilLMPresentationGUI" not in url:
+                continue
+            return url
+        return None
+
     def get_download_forum_data(self) -> Optional[IliasDownloadForumData]:
         form = self._soup.find("form", attrs={"action": lambda x: x and "fallbackCmd=showThreads" in x})
         if not form:
@@ -222,6 +259,12 @@ class IliasPage:
             return False
         return "target=copa_" in link.get("value")
 
+    def _is_learning_module_page(self) -> bool:
+        link = self._soup.find(id="current_perma_link")
+        if not link:
+            return False
+        return "target=pg_" in link.get("value")
+
     def _contains_collapsed_future_meetings(self) -> bool:
         return self._uncollapse_future_meetings_url() is not None
 
@@ -812,6 +855,9 @@ class IliasPage:
         if "cmdClass=ilobjtestgui" in parsed_url.query:
             return IliasElementType.TEST
 
+        if "baseClass=ilLMPresentationGUI" in parsed_url.query:
+            return IliasElementType.LEARNING_MODULE
+
         # Booking and Meeting can not be detected based on the link. They do have a ref_id though, so
         # try to guess it from the image.
 
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index ae49edc..f82d684 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -1,8 +1,11 @@
 import asyncio
+import base64
+import os
 import re
 from collections.abc import Awaitable, Coroutine
 from pathlib import PurePath
-from typing import Any, Callable, Dict, List, Optional, Set, Union, cast
+from typing import Any, Callable, Dict, List, Literal, Optional, Set, Union, cast
+from urllib.parse import urljoin
 
 import aiohttp
 import yarl
@@ -16,10 +19,10 @@ from ...output_dir import FileSink, Redownload
 from ...utils import fmt_path, soupify, url_set_query_param
 from ..crawler import AWrapped, CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
 from ..http_crawler import HttpCrawler, HttpCrawlerSection
-from .file_templates import Links
+from .file_templates import Links, learning_module_template
 from .ilias_html_cleaner import clean, insert_base_markup
-from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasPage, IliasPageElement,
-                             _sanitize_path_name, parse_ilias_forum_export)
+from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
+                             IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
 
 TargetType = Union[str, int]
 
@@ -394,6 +397,8 @@ instance's greatest bottleneck.
                 "[bright_black](surveys contain no relevant data)"
             )
             return None
+        elif element.type == IliasElementType.LEARNING_MODULE:
+            return await self._handle_learning_module(element, element_path)
         elif element.type == IliasElementType.LINK:
             return await self._handle_link(element, element_path)
         elif element.type == IliasElementType.BOOKING:
@@ -739,6 +744,135 @@ instance's greatest bottleneck.
             sink.file.write(content.encode("utf-8"))
             sink.done()
 
+    async def _handle_learning_module(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Coroutine[Any, Any, None]]:
+        maybe_cl = await self.crawl(element_path)
+        if not maybe_cl:
+            return None
+        return self._crawl_learning_module(element, maybe_cl)
+
+    @_iorepeat(3, "crawling learning module")
+    @anoncritical
+    async def _crawl_learning_module(self, element: IliasPageElement, cl: CrawlToken) -> None:
+        elements: List[IliasLearningModulePage] = []
+
+        async with cl:
+            log.explain_topic(f"Parsing initial HTML page for {fmt_path(cl.path)}")
+            log.explain(f"URL: {element.url}")
+            soup = await self._get_page(element.url)
+            page = IliasPage(soup, element.url, None)
+            if next := page.get_learning_module_data():
+                elements.extend(await self._crawl_learning_module_direction(
+                    cl.path, next.previous_url, "left"
+                ))
+                elements.append(next)
+                elements.extend(await self._crawl_learning_module_direction(
+                    cl.path, next.next_url, "right"
+                ))
+
+        # Reflect their natural ordering in the file names
+        for index, lm_element in enumerate(elements):
+            lm_element.title = f"{index:02}_{lm_element.title}"
+
+        tasks: List[Awaitable[None]] = []
+        for index, elem in enumerate(elements):
+            prev_url = elements[index - 1].title if index > 0 else None
+            next_url = elements[index + 1].title if index < len(elements) - 1 else None
+            tasks.append(asyncio.create_task(
+                self._download_learning_module_page(cl.path, elem, prev_url, next_url)
+            ))
+
+        # And execute them
+        await self.gather(tasks)
+
+    async def _crawl_learning_module_direction(
+        self,
+        path: PurePath,
+        start_url: Optional[str],
+        dir: Union[Literal["left"], Literal["right"]]
+    ) -> List[IliasLearningModulePage]:
+        elements: List[IliasLearningModulePage] = []
+
+        if not start_url:
+            return elements
+
+        next_element_url: Optional[str] = start_url
+        counter = 0
+        while next_element_url:
+            log.explain_topic(f"Parsing HTML page for {fmt_path(path)} ({dir}-{counter})")
+            log.explain(f"URL: {next_element_url}")
+            soup = await self._get_page(next_element_url)
+            page = IliasPage(soup, next_element_url, None)
+            if next := page.get_learning_module_data():
+                elements.append(next)
+                if dir == "left":
+                    next_element_url = next.previous_url
+                else:
+                    next_element_url = next.next_url
+            counter += 1
+
+        return elements
+
+    @anoncritical
+    @_iorepeat(3, "saving learning module page")
+    async def _download_learning_module_page(
+        self,
+        parent_path: PurePath,
+        element: IliasLearningModulePage,
+        prev: Optional[str],
+        next: Optional[str]
+    ) -> None:
+        path = parent_path / (_sanitize_path_name(element.title) + ".html")
+        maybe_dl = await self.download(path)
+        if not maybe_dl:
+            return
+        my_path = self._transformer.transform(maybe_dl.path)
+        if not my_path:
+            return
+
+        if prev:
+            prev_p = self._transformer.transform(parent_path / (_sanitize_path_name(prev) + ".html"))
+            if prev_p:
+                prev = os.path.relpath(prev_p, my_path.parent)
+            else:
+                prev = None
+        if next:
+            next_p = self._transformer.transform(parent_path / (_sanitize_path_name(next) + ".html"))
+            if next_p:
+                next = os.path.relpath(next_p, my_path.parent)
+            else:
+                next = None
+
+        async with maybe_dl as (bar, sink):
+            content = element.content
+            content = await self.internalize_images(content)
+            sink.file.write(learning_module_template(content, maybe_dl.path.name, prev, next).encode("utf-8"))
+            sink.done()
+
+    async def internalize_images(self, tag: Tag) -> Tag:
+        """
+        Tries to fetch ILIAS images and embed them as base64 data.
+        """
+        log.explain_topic("Internalizing images")
+        for elem in tag.find_all(recursive=True):
+            if not isinstance(elem, Tag):
+                continue
+            if elem.name == "img":
+                if src := elem.attrs.get("src", None):
+                    url = urljoin(_ILIAS_URL, src)
+                    if not url.startswith(_ILIAS_URL):
+                        continue
+                    log.explain(f"Internalizing {url!r}")
+                    img = await self._get_authenticated(url)
+                    elem.attrs["src"] = "data:;base64," + base64.b64encode(img).decode()
+            if elem.name == "iframe" and elem.attrs.get("src", "").startswith("//"):
+                # For unknown reasons the protocol seems to be stripped.
+                elem.attrs["src"] = "https:" + elem.attrs["src"]
+        return tag
+
     async def _get_page(self, url: str, root_page_allowed: bool = False) -> BeautifulSoup:
         auth_id = await self._current_auth_id()
         async with self.session.get(url) as request:
@@ -772,7 +906,7 @@ instance's greatest bottleneck.
         self,
         url: str,
         data: dict[str, Union[str, List[str]]]
-    ) -> BeautifulSoup:
+    ) -> bytes:
         auth_id = await self._current_auth_id()
 
         form_data = aiohttp.FormData()
@@ -792,6 +926,22 @@ instance's greatest bottleneck.
                 return await request.read()
         raise CrawlError("post_authenticated failed even after authenticating")
 
+    async def _get_authenticated(self, url: str) -> bytes:
+        auth_id = await self._current_auth_id()
+
+        async with self.session.get(url, allow_redirects=False) as request:
+            if request.status == 200:
+                return await request.read()
+
+        # We weren't authenticated, so try to do that
+        await self.authenticate(auth_id)
+
+        # Retry once after authenticating. If this fails, we will die.
+        async with self.session.get(url, allow_redirects=False) as request:
+            if request.status == 200:
+                return await request.read()
+        raise CrawlError("get_authenticated failed even after authenticating")
+
     # We repeat this as the login method in shibboleth doesn't handle I/O errors.
     # Shibboleth is quite reliable as well, the repeat is likely not critical here.
     @ _iorepeat(3, "Login", failure_is_error=True)

From dbc2553b119c39c7a8ad196c6858fc8109f746a9 Mon Sep 17 00:00:00 2001
From: "Mr. Pine" <50425705+Mr-Pine@users.noreply.github.com>
Date: Wed, 15 Mar 2023 15:33:42 +0100
Subject: [PATCH 041/147] Add default `show-not-deleted` option If set to `no`,
 PFERD won't print status or report messages for not deleted files

---
 CHANGELOG.md        |  3 +++
 CONFIG.md           |  8 ++++++--
 PFERD/__main__.py   |  4 ++++
 PFERD/cli/parser.py |  7 +++++++
 PFERD/config.py     |  3 +++
 PFERD/logging.py    | 20 ++++++++++++++++++++
 PFERD/output_dir.py |  2 +-
 PFERD/pferd.py      |  2 +-
 8 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6e3925c..85513d2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -31,6 +31,9 @@ ambiguous situations.
 ### Added
 - `no-delete-prompt-override` conflict resolution strategy
 - support for ILIAS learning modules
+- `show_not_deleted` option to stop printing the "Not Deleted" status or report
+  message. This combines nicely with the `no-delete-prompt-override` strategy,
+  causing PFERD to mostly ignore local-only files.
 
 ## 3.4.3 - 2022-11-29
 
diff --git a/CONFIG.md b/CONFIG.md
index 84ee885..5f62749 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -26,6 +26,9 @@ default values for the other sections.
   `Added ...`) while running a crawler. (Default: `yes`)
 - `report`: Whether PFERD should print a report of added, changed and deleted
    local files for all crawlers before exiting. (Default: `yes`)
+- `show_not_deleted`: Whether PFERD should print messages in status and report
+   when a local-only file wasn't deleted. Combines nicely with the
+   `no-delete-prompt-override` conflict resolution strategy.
 - `share_cookies`: Whether crawlers should share cookies where applicable. For
   example, some crawlers share cookies if they crawl the same website using the
   same account. (Default: `yes`)
@@ -75,8 +78,9 @@ common to all crawlers:
       using `prompt` and always choosing "yes".
     - `no-delete`: Never delete local files, but overwrite local files if the
       remote file is different.
-    - `no-delete-prompt-overwrite`: Never delete local files, but prompt to overwrite local files if the
-      remote file is different.
+    - `no-delete-prompt-overwrite`: Never delete local files, but prompt to
+      overwrite local files if the remote file is different. Combines nicely
+      with the `show_not_deleted` option.
 - `transform`: Rules for renaming and excluding certain files and directories.
   For more details, see [this section](#transformation-rules). (Default: empty)
 - `tasks`: The maximum number of concurrent tasks (such as crawling or
diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index 4faeb13..cb8c67c 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -47,6 +47,8 @@ def configure_logging_from_args(args: argparse.Namespace) -> None:
         log.output_explain = args.explain
     if args.status is not None:
         log.output_status = args.status
+    if args.show_not_deleted is not None:
+        log.output_not_deleted = args.show_not_deleted
     if args.report is not None:
         log.output_report = args.report
 
@@ -72,6 +74,8 @@ def configure_logging_from_config(args: argparse.Namespace, config: Config) -> N
             log.output_status = config.default_section.status()
         if args.report is None:
             log.output_report = config.default_section.report()
+        if args.show_not_deleted is None:
+            log.output_not_deleted = config.default_section.show_not_deleted()
     except ConfigOptionError as e:
         log.error(str(e))
         sys.exit(1)
diff --git a/PFERD/cli/parser.py b/PFERD/cli/parser.py
index e753023..be483fd 100644
--- a/PFERD/cli/parser.py
+++ b/PFERD/cli/parser.py
@@ -215,6 +215,11 @@ PARSER.add_argument(
     action=BooleanOptionalAction,
     help="whether crawlers should share cookies where applicable"
 )
+PARSER.add_argument(
+    "--show-not-deleted",
+    action=BooleanOptionalAction,
+    help="print messages in status and report when PFERD did not delete a local only file"
+)
 
 
 def load_default_section(
@@ -233,6 +238,8 @@ def load_default_section(
         section["report"] = "yes" if args.report else "no"
     if args.share_cookies is not None:
         section["share_cookies"] = "yes" if args.share_cookies else "no"
+    if args.show_not_deleted is not None:
+        section["show_not_deleted"] = "yes" if args.show_not_deleted else "no"
 
 
 SUBPARSERS = PARSER.add_subparsers(title="crawlers")
diff --git a/PFERD/config.py b/PFERD/config.py
index 8f7e682..b2cff4e 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -82,6 +82,9 @@ class DefaultSection(Section):
     def report(self) -> bool:
         return self.s.getboolean("report", fallback=True)
 
+    def show_not_deleted(self) -> bool:
+        return self.s.getboolean("show_not_deleted", fallback=True)
+
     def share_cookies(self) -> bool:
         return self.s.getboolean("share_cookies", fallback=True)
 
diff --git a/PFERD/logging.py b/PFERD/logging.py
index 340b21f..b958fb2 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -59,6 +59,7 @@ class Log:
         # Whether different parts of the output are enabled or disabled
         self.output_explain = False
         self.output_status = True
+        self.output_not_deleted = True
         self.output_report = True
 
     def _update_live(self) -> None:
@@ -207,6 +208,17 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
             action = escape(f"{action:<{self.STATUS_WIDTH}}")
             self.print(f"{style}{action}[/] {escape(text)} {suffix}")
 
+    def not_deleted(self, style: str, action: str, text: str, suffix: str = "") -> None:
+        """
+        Print a message for a local only file that wasn't
+        deleted while crawling. Allows markup in the "style"
+        argument which will be applied to the "action" string.
+        """
+
+        if self.output_status and self.output_not_deleted:
+            action = escape(f"{action:<{self.STATUS_WIDTH}}")
+            self.print(f"{style}{action}[/] {escape(text)} {suffix}")
+
     def report(self, text: str) -> None:
         """
         Print a report after crawling. Allows markup.
@@ -215,6 +227,14 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
         if self.output_report:
             self.print(text)
 
+    def report_not_deleted(self, text: str) -> None:
+        """
+        Print a report for a local only file that wasn't deleted after crawling. Allows markup.
+        """
+
+        if self.output_report and self.output_not_deleted:
+            self.print(text)
+
     @contextmanager
     def _bar(
             self,
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 38d1288..e9e9b93 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -496,7 +496,7 @@ class OutputDirectory:
             except OSError:
                 pass
         else:
-            log.status("[bold bright_magenta]", "Not deleted", fmt_path(pure))
+            log.not_deleted("[bold bright_magenta]", "Not deleted", fmt_path(pure))
             self._report.not_delete_file(pure)
 
     def load_prev_report(self) -> None:
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index 079053b..b30a04a 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -180,7 +180,7 @@ class Pferd:
                 log.report(f"  [bold bright_magenta]Deleted[/] {fmt_path(path)}")
             for path in sorted(crawler.report.not_deleted_files):
                 something_changed = True
-                log.report(f"  [bold bright_magenta]Not deleted[/] {fmt_path(path)}")
+                log.report_not_deleted(f"  [bold bright_magenta]Not deleted[/] {fmt_path(path)}")
 
             for warning in crawler.report.encountered_warnings:
                 something_changed = True

From b3d412360baeed6992535e6957d0bc1e368c337f Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 26 Aug 2023 23:48:14 +0200
Subject: [PATCH 042/147] Add Nix flake

---
 flake.lock | 27 +++++++++++++++++++++++++++
 flake.nix  | 41 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+)
 create mode 100644 flake.lock
 create mode 100644 flake.nix

diff --git a/flake.lock b/flake.lock
new file mode 100644
index 0000000..914c58b
--- /dev/null
+++ b/flake.lock
@@ -0,0 +1,27 @@
+{
+  "nodes": {
+    "nixpkgs": {
+      "locked": {
+        "lastModified": 1692986144,
+        "narHash": "sha256-M4VFpy7Av9j+33HF5nIGm0k2+DXXW4qSSKdidIKg5jY=",
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "74e5bdc5478ebbe7ba5849f0d765f92757bb9dbf",
+        "type": "github"
+      },
+      "original": {
+        "owner": "NixOS",
+        "ref": "nixos-23.05",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "root": {
+      "inputs": {
+        "nixpkgs": "nixpkgs"
+      }
+    }
+  },
+  "root": "root",
+  "version": 7
+}
diff --git a/flake.nix b/flake.nix
new file mode 100644
index 0000000..e3d52af
--- /dev/null
+++ b/flake.nix
@@ -0,0 +1,41 @@
+{
+  description = "Tool for downloading course-related files from ILIAS";
+
+  inputs = {
+    nixpkgs.url = "github:NixOS/nixpkgs/nixos-23.05";
+  };
+
+  outputs = { self, nixpkgs }:
+    let
+      # Helper function to generate an attrset '{ x86_64-linux = f "x86_64-linux"; ... }'.
+      forAllSystems = nixpkgs.lib.genAttrs nixpkgs.lib.systems.flakeExposed;
+    in
+    {
+      packages = forAllSystems (system:
+        let pkgs = import nixpkgs { inherit system; };
+        in
+        rec {
+          default = pkgs.python3Packages.buildPythonApplication rec {
+            pname = "pferd";
+            # Performing black magic
+            # Don't worry, I sacrificed enough goats for the next few years
+            version = (pkgs.lib.importTOML ./PFERD/version.py).VERSION;
+            format = "pyproject";
+
+            src = ./.;
+
+            nativeBuildInputs = with pkgs.python3Packages; [
+              setuptools
+            ];
+
+            propagatedBuildInputs = with pkgs.python3Packages; [
+              aiohttp
+              beautifulsoup4
+              rich
+              keyring
+              certifi
+            ];
+          };
+        });
+    };
+}

From 2184ac804018e836e439e365ae2b0d184adae26d Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 26 Aug 2023 19:39:40 +0200
Subject: [PATCH 043/147] Add support for ILIAS mediacast listings

---
 CHANGELOG.md                               |   1 +
 PFERD/crawl/ilias/kit_ilias_html.py        | 110 +++++++++++++++------
 PFERD/crawl/ilias/kit_ilias_web_crawler.py |  45 +++++----
 3 files changed, 107 insertions(+), 49 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 85513d2..d58ea18 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -34,6 +34,7 @@ ambiguous situations.
 - `show_not_deleted` option to stop printing the "Not Deleted" status or report
   message. This combines nicely with the `no-delete-prompt-override` strategy,
   causing PFERD to mostly ignore local-only files.
+- support for mediacast video listings
 
 ## 3.4.3 - 2022-11-29
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 46a8073..d5ea76d 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -3,7 +3,7 @@ import re
 from dataclasses import dataclass
 from datetime import date, datetime, timedelta
 from enum import Enum
-from typing import Dict, List, Optional, Union
+from typing import Dict, List, Optional, Union, cast
 from urllib.parse import urljoin, urlparse
 
 from bs4 import BeautifulSoup, Tag
@@ -26,10 +26,12 @@ class IliasElementType(Enum):
     BOOKING = "booking"
     MEETING = "meeting"
     SURVEY = "survey"
-    VIDEO = "video"
-    VIDEO_PLAYER = "video_player"
-    VIDEO_FOLDER = "video_folder"
-    VIDEO_FOLDER_MAYBE_PAGINATED = "video_folder_maybe_paginated"
+    MEDIACAST_VIDEO_FOLDER = "mediacast_video_folder"
+    MEDIACAST_VIDEO = "mediacast_video"
+    OPENCAST_VIDEO = "opencast_video"
+    OPENCAST_VIDEO_PLAYER = "opencast_video_player"
+    OPENCAST_VIDEO_FOLDER = "opencast_video_folder"
+    OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED = "opencast_video_folder_maybe_paginated"
 
 
 @dataclass
@@ -45,7 +47,8 @@ class IliasPageElement:
             r"eid=(?P<id>[0-9a-z\-]+)",
             r"file_(?P<id>\d+)",
             r"ref_id=(?P<id>\d+)",
-            r"target=[a-z]+_(?P<id>\d+)"
+            r"target=[a-z]+_(?P<id>\d+)",
+            r"mm_(?P<id>\d+)"
         ]
 
         for regex in regexes:
@@ -105,9 +108,9 @@ class IliasPage:
         if self._is_video_player():
             log.explain("Page is a video player, extracting URL")
             return self._player_to_video()
-        if self._is_video_listing():
-            log.explain("Page is a video listing, searching for elements")
-            return self._find_video_entries()
+        if self._is_opencast_video_listing():
+            log.explain("Page is an opencast video listing, searching for elements")
+            return self._find_opencast_video_entries()
         if self._is_exercise_file():
             log.explain("Page is an exercise, searching for elements")
             return self._find_exercise_entries()
@@ -199,9 +202,9 @@ class IliasPage:
         if self._is_ilias_opencast_embedding():
             log.explain("Unwrapping opencast embedding")
             return self.get_child_elements()[0]
-        if self._page_type == IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED:
+        if self._page_type == IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED:
             log.explain("Unwrapping video pagination")
-            return self._find_video_entries_paginated()[0]
+            return self._find_opencast_video_entries_paginated()[0]
         if self._contains_collapsed_future_meetings():
             log.explain("Requesting *all* future meetings")
             return self._uncollapse_future_meetings_url()
@@ -219,7 +222,7 @@ class IliasPage:
     def _is_video_player(self) -> bool:
         return "paella_config_file" in str(self._soup)
 
-    def _is_video_listing(self) -> bool:
+    def _is_opencast_video_listing(self) -> bool:
         if self._is_ilias_opencast_embedding():
             return True
 
@@ -319,14 +322,14 @@ class IliasPage:
         # and just fetch the lone video url!
         if len(streams) == 1:
             video_url = streams[0]["sources"]["mp4"][0]["src"]
-            return [IliasPageElement(IliasElementType.VIDEO, video_url, self._source_name)]
+            return [IliasPageElement(IliasElementType.OPENCAST_VIDEO, video_url, self._source_name)]
 
         log.explain(f"Found multiple videos for stream at {self._source_name}")
         items = []
         for stream in sorted(streams, key=lambda stream: stream["content"]):
             full_name = f"{self._source_name.replace('.mp4', '')} ({stream['content']}).mp4"
             video_url = stream["sources"]["mp4"][0]["src"]
-            items.append(IliasPageElement(IliasElementType.VIDEO, video_url, full_name))
+            items.append(IliasPageElement(IliasElementType.OPENCAST_VIDEO, video_url, full_name))
 
         return items
 
@@ -385,7 +388,7 @@ class IliasPage:
 
         return items
 
-    def _find_video_entries(self) -> List[IliasPageElement]:
+    def _find_opencast_video_entries(self) -> List[IliasPageElement]:
         # ILIAS has three stages for video pages
         # 1. The initial dummy page without any videos. This page contains the link to the listing
         # 2. The video listing which might be paginated
@@ -405,27 +408,27 @@ class IliasPage:
             query_params = {"limit": "800", "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
             url = url_set_query_params(url, query_params)
             log.explain("Found ILIAS video frame page, fetching actual content next")
-            return [IliasPageElement(IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED, url, "")]
+            return [IliasPageElement(IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED, url, "")]
 
         is_paginated = self._soup.find(id=re.compile(r"tab_page_sel.+")) is not None
 
-        if is_paginated and not self._page_type == IliasElementType.VIDEO_FOLDER:
+        if is_paginated and not self._page_type == IliasElementType.OPENCAST_VIDEO_FOLDER:
             # We are in stage 2 - try to break pagination
-            return self._find_video_entries_paginated()
+            return self._find_opencast_video_entries_paginated()
 
-        return self._find_video_entries_no_paging()
+        return self._find_opencast_video_entries_no_paging()
 
-    def _find_video_entries_paginated(self) -> List[IliasPageElement]:
+    def _find_opencast_video_entries_paginated(self) -> List[IliasPageElement]:
         table_element: Tag = self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
 
         if table_element is None:
             log.warn("Couldn't increase elements per page (table not found). I might miss elements.")
-            return self._find_video_entries_no_paging()
+            return self._find_opencast_video_entries_no_paging()
 
         id_match = re.match(r"tbl_xoct_(.+)", table_element.attrs["id"])
         if id_match is None:
             log.warn("Couldn't increase elements per page (table id not found). I might miss elements.")
-            return self._find_video_entries_no_paging()
+            return self._find_opencast_video_entries_no_paging()
 
         table_id = id_match.group(1)
 
@@ -434,9 +437,9 @@ class IliasPage:
         url = url_set_query_params(self._page_url, query_params)
 
         log.explain("Disabled pagination, retrying folder as a new entry")
-        return [IliasPageElement(IliasElementType.VIDEO_FOLDER, url, "")]
+        return [IliasPageElement(IliasElementType.OPENCAST_VIDEO_FOLDER, url, "")]
 
-    def _find_video_entries_no_paging(self) -> List[IliasPageElement]:
+    def _find_opencast_video_entries_no_paging(self) -> List[IliasPageElement]:
         """
         Crawls the "second stage" video page. This page contains the actual video urls.
         """
@@ -448,11 +451,11 @@ class IliasPage:
         results: List[IliasPageElement] = []
 
         for link in video_links:
-            results.append(self._listed_video_to_element(link))
+            results.append(self._listed_opencast_video_to_element(link))
 
         return results
 
-    def _listed_video_to_element(self, link: Tag) -> IliasPageElement:
+    def _listed_opencast_video_to_element(self, link: Tag) -> IliasPageElement:
         # The link is part of a table with multiple columns, describing metadata.
         # 6th or 7th child (1 indexed) is the modification time string. Try to find it
         # by parsing backwards from the end and finding something that looks like a date
@@ -479,7 +482,9 @@ class IliasPage:
         video_url = self._abs_url_from_link(link)
 
         log.explain(f"Found video {video_name!r} at {video_url}")
-        return IliasPageElement(IliasElementType.VIDEO_PLAYER, video_url, video_name, modification_time)
+        return IliasPageElement(
+            IliasElementType.OPENCAST_VIDEO_PLAYER, video_url, video_name, modification_time
+        )
 
     def _find_exercise_entries(self) -> List[IliasPageElement]:
         if self._soup.find(id="tab_submission"):
@@ -622,9 +627,48 @@ class IliasPage:
             result.append(IliasPageElement(element_type, abs_url, element_name, description=description))
 
         result += self._find_cards()
+        result += self._find_mediacast_videos()
 
         return result
 
+    def _find_mediacast_videos(self) -> List[IliasPageElement]:
+        videos: List[IliasPageElement] = []
+
+        for elem in cast(List[Tag], self._soup.select(".ilPlayerPreviewOverlayOuter")):
+            element_name = _sanitize_path_name(
+                elem.select_one(".ilPlayerPreviewDescription").getText().strip()
+            )
+            if not element_name.endswith(".mp4"):
+                # just to make sure it has some kinda-alrightish ending
+                element_name = element_name + ".mp4"
+            video_element = elem.find(name="video")
+            if not video_element:
+                _unexpected_html_warning()
+                log.warn_contd(f"No <video> element found for mediacast video '{element_name}'")
+                continue
+
+            videos.append(IliasPageElement(
+                type=IliasElementType.MEDIACAST_VIDEO,
+                url=self._abs_url_from_relative(video_element.get("src")),
+                name=element_name,
+                mtime=self._find_mediacast_video_mtime(elem.findParent(name="td"))
+            ))
+
+        return videos
+
+    def _find_mediacast_video_mtime(self, enclosing_td: Tag) -> Optional[datetime]:
+        description_td: Tag = enclosing_td.findPreviousSibling("td")
+        if not description_td:
+            return None
+
+        meta_tag: Tag = description_td.find_all("p")[-1]
+        if not meta_tag:
+            return None
+
+        updated_str = meta_tag.getText().strip().replace("\n", " ")
+        updated_str = re.sub(".+?: ", "", updated_str)
+        return demangle_date(updated_str)
+
     def _is_in_expanded_meeting(self, tag: Tag) -> bool:
         """
         Returns whether a file is part of an expanded meeting.
@@ -796,7 +840,7 @@ class IliasPage:
         icon: Tag = card_root.select_one(".il-card-repository-head .icon")
 
         if "opencast" in icon["class"] or "xoct" in icon["class"]:
-            return IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED
+            return IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED
         if "exc" in icon["class"]:
             return IliasElementType.EXERCISE
         if "webr" in icon["class"]:
@@ -817,6 +861,8 @@ class IliasPage:
             return IliasElementType.SURVEY
         if "file" in icon["class"]:
             return IliasElementType.FILE
+        if "mcst" in icon["class"]:
+            return IliasElementType.MEDIACAST_VIDEO_FOLDER
 
         _unexpected_html_warning()
         log.warn_contd(f"Could not extract type from {icon} for card title {card_title}")
@@ -858,6 +904,9 @@ class IliasPage:
         if "baseClass=ilLMPresentationGUI" in parsed_url.query:
             return IliasElementType.LEARNING_MODULE
 
+        if "baseClass=ilMediaCastHandlerGUI" in parsed_url.query:
+            return IliasElementType.MEDIACAST_VIDEO_FOLDER
+
         # Booking and Meeting can not be detected based on the link. They do have a ref_id though, so
         # try to guess it from the image.
 
@@ -909,7 +958,7 @@ class IliasPage:
             return None
 
         if "opencast" in str(img_tag["alt"]).lower():
-            return IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED
+            return IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED
 
         if str(img_tag["src"]).endswith("icon_exc.svg"):
             return IliasElementType.EXERCISE
@@ -929,6 +978,9 @@ class IliasPage:
         if str(img_tag["src"]).endswith("icon_tst.svg"):
             return IliasElementType.TEST
 
+        if str(img_tag["src"]).endswith("icon_mcst.svg"):
+            return IliasElementType.MEDIACAST_VIDEO_FOLDER
+
         return IliasElementType.FOLDER
 
     @staticmethod
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index f82d684..eef3373 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -86,15 +86,18 @@ _DIRECTORY_PAGES: Set[IliasElementType] = set([
     IliasElementType.EXERCISE_FILES,
     IliasElementType.FOLDER,
     IliasElementType.MEETING,
-    IliasElementType.VIDEO_FOLDER,
-    IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED,
+    IliasElementType.MEDIACAST_VIDEO_FOLDER,
+    IliasElementType.OPENCAST_VIDEO_FOLDER,
+    IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
 ])
 
 _VIDEO_ELEMENTS: Set[IliasElementType] = set([
-    IliasElementType.VIDEO,
-    IliasElementType.VIDEO_PLAYER,
-    IliasElementType.VIDEO_FOLDER,
-    IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED,
+    IliasElementType.MEDIACAST_VIDEO_FOLDER,
+    IliasElementType.MEDIACAST_VIDEO,
+    IliasElementType.OPENCAST_VIDEO,
+    IliasElementType.OPENCAST_VIDEO_PLAYER,
+    IliasElementType.OPENCAST_VIDEO_FOLDER,
+    IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
 ])
 
 
@@ -403,10 +406,12 @@ instance's greatest bottleneck.
             return await self._handle_link(element, element_path)
         elif element.type == IliasElementType.BOOKING:
             return await self._handle_booking(element, element_path)
-        elif element.type == IliasElementType.VIDEO:
+        elif element.type == IliasElementType.OPENCAST_VIDEO:
+            return await self._handle_file(element, element_path)
+        elif element.type == IliasElementType.OPENCAST_VIDEO_PLAYER:
+            return await self._handle_opencast_video(element, element_path)
+        elif element.type == IliasElementType.MEDIACAST_VIDEO:
             return await self._handle_file(element, element_path)
-        elif element.type == IliasElementType.VIDEO_PLAYER:
-            return await self._handle_video(element, element_path)
         elif element.type in _DIRECTORY_PAGES:
             return await self._handle_ilias_page(element.url, element, element_path)
         else:
@@ -523,7 +528,7 @@ instance's greatest bottleneck.
 
         raise CrawlError("resolve_link_target failed even after authenticating")
 
-    async def _handle_video(
+    async def _handle_opencast_video(
         self,
         element: IliasPageElement,
         element_path: PurePath,
@@ -544,18 +549,18 @@ instance's greatest bottleneck.
 
         # If we do not want to crawl it (user filter) or we have every file
         # from the cached mapping already, we can ignore this and bail
-        if not maybe_dl or self._all_videos_locally_present(element_path):
+        if not maybe_dl or self._all_opencast_videos_locally_present(element_path):
             # Mark all existing cideos as known so they do not get deleted
             # during dleanup. We "downloaded" them, just without actually making
             # a network request as we assumed they did not change.
-            for video in self._previous_contained_videos(element_path):
+            for video in self._previous_contained_opencast_videos(element_path):
                 await self.download(video)
 
             return None
 
-        return self._download_video(element_path, element, maybe_dl)
+        return self._download_opencast_video(element_path, element, maybe_dl)
 
-    def _previous_contained_videos(self, video_path: PurePath) -> List[PurePath]:
+    def _previous_contained_opencast_videos(self, video_path: PurePath) -> List[PurePath]:
         if not self.prev_report:
             return []
         custom_value = self.prev_report.get_custom_value(str(video_path))
@@ -565,12 +570,12 @@ instance's greatest bottleneck.
         folder = video_path.parent
         return [PurePath(folder, name) for name in names]
 
-    def _all_videos_locally_present(self, video_path: PurePath) -> bool:
-        if contained_videos := self._previous_contained_videos(video_path):
+    def _all_opencast_videos_locally_present(self, video_path: PurePath) -> bool:
+        if contained_videos := self._previous_contained_opencast_videos(video_path):
             log.explain_topic(f"Checking local cache for video {video_path.name}")
             all_found_locally = True
             for video in contained_videos:
-                transformed_path = self._to_local_video_path(video)
+                transformed_path = self._to_local_opencast_video_path(video)
                 if transformed_path:
                     exists_locally = self._output_dir.resolve(transformed_path).exists()
                     all_found_locally = all_found_locally and exists_locally
@@ -580,14 +585,14 @@ instance's greatest bottleneck.
             log.explain("Missing at least one video, continuing with requests!")
         return False
 
-    def _to_local_video_path(self, path: PurePath) -> Optional[PurePath]:
+    def _to_local_opencast_video_path(self, path: PurePath) -> Optional[PurePath]:
         if transformed := self._transformer.transform(path):
             return self._deduplicator.fixup_path(transformed)
         return None
 
     @anoncritical
     @_iorepeat(3, "downloading video")
-    async def _download_video(
+    async def _download_opencast_video(
         self,
         original_path: PurePath,
         element: IliasPageElement,
@@ -604,7 +609,7 @@ instance's greatest bottleneck.
                 log.explain(f"Using single video mode for {element.name}")
                 stream_element = stream_elements[0]
 
-                transformed_path = self._to_local_video_path(original_path)
+                transformed_path = self._to_local_opencast_video_path(original_path)
                 if not transformed_path:
                     raise CrawlError(f"Download returned a path but transform did not for {original_path}")
 

From b54b3b979c41204a51f0d7f02de7f55a0031ba3e Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 27 Aug 2023 11:42:25 +0200
Subject: [PATCH 044/147] Remove size suffix for content pages

---
 CHANGELOG.md                        | 1 +
 PFERD/crawl/ilias/kit_ilias_html.py | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d58ea18..0e93f01 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,7 @@ ambiguous situations.
 - Crawling of file and custom opencast cards
 - Crawling of button cards without descriptions
 - Abort crawling when encountering an unexpected ilias root page redirect
+- Remove size suffix for files in content pages
 
 ### Added
 - `no-delete-prompt-override` conflict resolution strategy
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index d5ea76d..c0807d3 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -377,7 +377,8 @@ class IliasPage:
 
         for link in links:
             url = self._abs_url_from_link(link)
-            name = _sanitize_path_name(link.getText().strip().replace("\t", ""))
+            name = re.sub(r"\([\d,.]+ [MK]B\)", "", link.getText()).strip().replace("\t", "")
+            name = _sanitize_path_name(name)
 
             if "file_id" not in url:
                 _unexpected_html_warning()

From 87b67e9271bd843397542aef75d75557762f641b Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 29 Aug 2023 11:52:16 +0200
Subject: [PATCH 045/147] Crawl files in the info tab

---
 CHANGELOG.md                               |  1 +
 PFERD/crawl/ilias/kit_ilias_html.py        | 44 +++++++++++++++++++++-
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 16 +++++---
 3 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0e93f01..3c675f2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -36,6 +36,7 @@ ambiguous situations.
   message. This combines nicely with the `no-delete-prompt-override` strategy,
   causing PFERD to mostly ignore local-only files.
 - support for mediacast video listings
+- crawling of files in info tab
 
 ## 3.4.3 - 2022-11-29
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index c0807d3..a8fcecb 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -22,6 +22,7 @@ class IliasElementType(Enum):
     FOLDER = "folder"
     FORUM = "forum"
     LINK = "link"
+    INFO_TAB = "info_tab"
     LEARNING_MODULE = "learning_module"
     BOOKING = "booking"
     MEETING = "meeting"
@@ -120,9 +121,25 @@ class IliasPage:
         if self._is_content_page():
             log.explain("Page is a content page, searching for elements")
             return self._find_copa_entries()
+        if self._is_info_tab():
+            log.explain("Page is info tab, searching for elements")
+            return self._find_info_tab_entries()
         log.explain("Page is a normal folder, searching for elements")
         return self._find_normal_entries()
 
+    def get_info_tab(self) -> Optional[IliasPageElement]:
+        tab: Optional[Tag] = self._soup.find(
+            name="a",
+            attrs={"href": lambda x: x and "cmdClass=ilinfoscreengui" in x}
+        )
+        if tab is not None:
+            return IliasPageElement(
+                IliasElementType.INFO_TAB,
+                self._abs_url_from_link(tab),
+                "infos"
+            )
+        return None
+
     def get_description(self) -> Optional[BeautifulSoup]:
         def is_interesting_class(name: str) -> bool:
             return name in ["ilCOPageSection", "ilc_Paragraph", "ilc_va_ihcap_VAccordIHeadCap"]
@@ -209,7 +226,11 @@ class IliasPage:
             log.explain("Requesting *all* future meetings")
             return self._uncollapse_future_meetings_url()
         if not self._is_content_tab_selected():
-            return self._select_content_page_url()
+            if self._page_type != IliasElementType.INFO_TAB:
+                log.explain("Selecting content tab")
+                return self._select_content_page_url()
+            else:
+                log.explain("Crawling info tab, skipping content select")
         return None
 
     def _is_forum_page(self) -> bool:
@@ -281,6 +302,10 @@ class IliasPage:
     def _is_content_tab_selected(self) -> bool:
         return self._select_content_page_url() is None
 
+    def _is_info_tab(self) -> bool:
+        might_be_info = self._soup.find("form", attrs={"name": lambda x: x == "formInfoScreen"}) is not None
+        return self._page_type == IliasElementType.INFO_TAB and might_be_info
+
     def _select_content_page_url(self) -> Optional[IliasPageElement]:
         tab = self._soup.find(
             id="tab_view_content",
@@ -389,6 +414,23 @@ class IliasPage:
 
         return items
 
+    def _find_info_tab_entries(self) -> List[IliasPageElement]:
+        items = []
+        links: List[Tag] = self._soup.select("a.il_ContainerItemCommand")
+
+        for link in links:
+            if "cmdClass=ilobjcoursegui" not in link["href"]:
+                continue
+            if "cmd=sendfile" not in link["href"]:
+                continue
+            items.append(IliasPageElement(
+                IliasElementType.FILE,
+                self._abs_url_from_link(link),
+                _sanitize_path_name(link.getText())
+            ))
+
+        return items
+
     def _find_opencast_video_entries(self) -> List[IliasPageElement]:
         # ILIAS has three stages for video pages
         # 1. The initial dummy page without any videos. This page contains the link to the listing
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index eef3373..4f6cc74 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -85,6 +85,7 @@ _DIRECTORY_PAGES: Set[IliasElementType] = set([
     IliasElementType.EXERCISE,
     IliasElementType.EXERCISE_FILES,
     IliasElementType.FOLDER,
+    IliasElementType.INFO_TAB,
     IliasElementType.MEETING,
     IliasElementType.MEDIACAST_VIDEO_FOLDER,
     IliasElementType.OPENCAST_VIDEO_FOLDER,
@@ -262,6 +263,8 @@ instance's greatest bottleneck.
                         next_stage_url = None
 
                 elements.extend(page.get_child_elements())
+                if info_tab := page.get_info_tab():
+                    elements.append(info_tab)
                 if description_string := page.get_description():
                     description.append(description_string)
 
@@ -705,7 +708,7 @@ instance's greatest bottleneck.
                 log.explain(f"URL: {next_stage_url}")
 
                 soup = await self._get_page(next_stage_url)
-                page = IliasPage(soup, next_stage_url, None)
+                page = IliasPage(soup, next_stage_url, element)
 
                 if next := page.get_next_stage_element():
                     next_stage_url = next.url
@@ -768,14 +771,14 @@ instance's greatest bottleneck.
             log.explain_topic(f"Parsing initial HTML page for {fmt_path(cl.path)}")
             log.explain(f"URL: {element.url}")
             soup = await self._get_page(element.url)
-            page = IliasPage(soup, element.url, None)
+            page = IliasPage(soup, element.url, element)
             if next := page.get_learning_module_data():
                 elements.extend(await self._crawl_learning_module_direction(
-                    cl.path, next.previous_url, "left"
+                    cl.path, next.previous_url, "left", element
                 ))
                 elements.append(next)
                 elements.extend(await self._crawl_learning_module_direction(
-                    cl.path, next.next_url, "right"
+                    cl.path, next.next_url, "right", element
                 ))
 
         # Reflect their natural ordering in the file names
@@ -797,7 +800,8 @@ instance's greatest bottleneck.
         self,
         path: PurePath,
         start_url: Optional[str],
-        dir: Union[Literal["left"], Literal["right"]]
+        dir: Union[Literal["left"], Literal["right"]],
+        parent_element: IliasPageElement
     ) -> List[IliasLearningModulePage]:
         elements: List[IliasLearningModulePage] = []
 
@@ -810,7 +814,7 @@ instance's greatest bottleneck.
             log.explain_topic(f"Parsing HTML page for {fmt_path(path)} ({dir}-{counter})")
             log.explain(f"URL: {next_element_url}")
             soup = await self._get_page(next_element_url)
-            page = IliasPage(soup, next_element_url, None)
+            page = IliasPage(soup, next_element_url, parent_element)
             if next := page.get_learning_module_data():
                 elements.append(next)
                 if dir == "left":

From ad53185247aa7182e95f7ef486b557e5a342ba08 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 29 Aug 2023 12:25:16 +0200
Subject: [PATCH 046/147] Sanitize ascii control characters on windows

---
 CHANGELOG.md          | 1 +
 PFERD/deduplicator.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3c675f2..ae809e3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -28,6 +28,7 @@ ambiguous situations.
 - Crawling of button cards without descriptions
 - Abort crawling when encountering an unexpected ilias root page redirect
 - Remove size suffix for files in content pages
+- Sanitize ascii control characters on Windows
 
 ### Added
 - `no-delete-prompt-override` conflict resolution strategy
diff --git a/PFERD/deduplicator.py b/PFERD/deduplicator.py
index 7777f28..559addb 100644
--- a/PFERD/deduplicator.py
+++ b/PFERD/deduplicator.py
@@ -14,7 +14,7 @@ def name_variants(path: PurePath) -> Iterator[PurePath]:
 
 
 class Deduplicator:
-    FORBIDDEN_CHARS = '<>:"/\\|?*'
+    FORBIDDEN_CHARS = '<>:"/\\|?*' + "".join([chr(i) for i in range(0, 32)])
     FORBIDDEN_NAMES = {
         "CON", "PRN", "AUX", "NUL",
         "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9",

From df3514cd0350fd6ef9231cadb236c930c99b89db Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 29 Aug 2023 12:30:54 +0200
Subject: [PATCH 047/147] Crawl paginated past meetings

---
 CHANGELOG.md                        |  1 +
 PFERD/crawl/ilias/kit_ilias_html.py | 11 +++++++++--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ae809e3..3f318b2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ ambiguous situations.
 - Abort crawling when encountering an unexpected ilias root page redirect
 - Remove size suffix for files in content pages
 - Sanitize ascii control characters on Windows
+- Crawling of paginated past meetings
 
 ### Added
 - `no-delete-prompt-override` conflict resolution strategy
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index a8fcecb..5a94a0b 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -293,7 +293,10 @@ class IliasPage:
         return self._uncollapse_future_meetings_url() is not None
 
     def _uncollapse_future_meetings_url(self) -> Optional[IliasPageElement]:
-        element = self._soup.find("a", attrs={"href": lambda x: x and "crs_next_sess=1" in x})
+        element = self._soup.find(
+            "a",
+            attrs={"href": lambda x: x and ("crs_next_sess=1" in x or "crs_prev_sess=1" in x)}
+        )
         if not element:
             return None
         link = self._abs_url_from_link(element)
@@ -991,7 +994,11 @@ class IliasPage:
         if img_tag is None:
             img_tag = found_parent.select_one("img.icon")
 
-        if img_tag is None and found_parent.find("a", attrs={"href": lambda x: x and "crs_next_sess=" in x}):
+        is_session_expansion_button = found_parent.find(
+            "a",
+            attrs={"href": lambda x: x and ("crs_next_sess=" in x or "crs_prev_sess=" in x)}
+        )
+        if img_tag is None and is_session_expansion_button:
             log.explain("Found session expansion button, skipping it as it has no content")
             return None
 

From 50b50513c6d8bb01200104633d7ce312e17a0ba7 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 29 Aug 2023 13:51:19 +0200
Subject: [PATCH 048/147] Ignore SCORM learning modules

---
 CHANGELOG.md                               |  1 +
 PFERD/crawl/ilias/kit_ilias_html.py        |  7 +++++++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 10 +++++++++-
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3f318b2..47df846 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,6 +30,7 @@ ambiguous situations.
 - Remove size suffix for files in content pages
 - Sanitize ascii control characters on Windows
 - Crawling of paginated past meetings
+- Ignore SCORM learning modules
 
 ### Added
 - `no-delete-prompt-override` conflict resolution strategy
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 5a94a0b..2c37816 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -27,6 +27,7 @@ class IliasElementType(Enum):
     BOOKING = "booking"
     MEETING = "meeting"
     SURVEY = "survey"
+    SCORM_LEARNING_MODULE = "scorm_learning_module"
     MEDIACAST_VIDEO_FOLDER = "mediacast_video_folder"
     MEDIACAST_VIDEO = "mediacast_video"
     OPENCAST_VIDEO = "opencast_video"
@@ -953,6 +954,9 @@ class IliasPage:
         if "baseClass=ilMediaCastHandlerGUI" in parsed_url.query:
             return IliasElementType.MEDIACAST_VIDEO_FOLDER
 
+        if "baseClass=ilSAHSPresentationGUI" in parsed_url.query:
+            return IliasElementType.SCORM_LEARNING_MODULE
+
         # Booking and Meeting can not be detected based on the link. They do have a ref_id though, so
         # try to guess it from the image.
 
@@ -1031,6 +1035,9 @@ class IliasPage:
         if str(img_tag["src"]).endswith("icon_mcst.svg"):
             return IliasElementType.MEDIACAST_VIDEO_FOLDER
 
+        if str(img_tag["src"]).endswith("icon_sahs.svg"):
+            return IliasElementType.SCORM_LEARNING_MODULE
+
         return IliasElementType.FOLDER
 
     @staticmethod
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 4f6cc74..d5f6809 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -403,6 +403,14 @@ instance's greatest bottleneck.
                 "[bright_black](surveys contain no relevant data)"
             )
             return None
+        elif element.type == IliasElementType.SCORM_LEARNING_MODULE:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](scorm learning modules are not supported)"
+            )
+            return None
         elif element.type == IliasElementType.LEARNING_MODULE:
             return await self._handle_learning_module(element, element_path)
         elif element.type == IliasElementType.LINK:
@@ -897,7 +905,7 @@ instance's greatest bottleneck.
             soup = soupify(await request.read())
             if self._is_logged_in(soup):
                 return self._verify_page(soup, url, root_page_allowed)
-        raise CrawlError("get_page failed even after authenticating")
+        raise CrawlError(f"get_page failed even after authenticating on {url!r}")
 
     def _verify_page(self, soup: BeautifulSoup, url: str, root_page_allowed: bool) -> BeautifulSoup:
         if IliasPage.is_root_page(soup) and not root_page_allowed:

From 40f8a05ad66edb1951524a728eeb1a6f2819e4e5 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 13 Sep 2023 22:23:31 +0200
Subject: [PATCH 049/147] Add .idea to gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 455eaca..36ab590 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@
 /PFERD.egg-info/
 __pycache__/
 /.vscode/
+/.idea/
 
 # pyinstaller
 /pferd.spec

From 0113a0ca1027278eb4a8ecee3bf925ac1ffed201 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 13 Sep 2023 22:23:21 +0200
Subject: [PATCH 050/147] Update flake.lock

---
 flake.lock | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/flake.lock b/flake.lock
index 914c58b..1655107 100644
--- a/flake.lock
+++ b/flake.lock
@@ -2,11 +2,11 @@
   "nodes": {
     "nixpkgs": {
       "locked": {
-        "lastModified": 1692986144,
-        "narHash": "sha256-M4VFpy7Av9j+33HF5nIGm0k2+DXXW4qSSKdidIKg5jY=",
+        "lastModified": 1694499547,
+        "narHash": "sha256-R7xMz1Iia6JthWRHDn36s/E248WB1/je62ovC/dUVKI=",
         "owner": "NixOS",
         "repo": "nixpkgs",
-        "rev": "74e5bdc5478ebbe7ba5849f0d765f92757bb9dbf",
+        "rev": "e5f018cf150e29aac26c61dac0790ea023c46b24",
         "type": "github"
       },
       "original": {

From 533bc274395589459a5197462274a4e22e097914 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 13 Sep 2023 23:13:30 +0200
Subject: [PATCH 051/147] Bump version to 3.5.0

---
 CHANGELOG.md     | 24 ++++++++++++++----------
 PFERD/version.py |  2 +-
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 47df846..e902efa 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,25 +22,29 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.5.0 - 2023-09-13
+
+### Added
+- `no-delete-prompt-override` conflict resolution strategy
+- Support for ILIAS learning modules
+- `show_not_deleted` option to stop printing the "Not Deleted" status or report
+  message. This combines nicely with the `no-delete-prompt-override` strategy,
+  causing PFERD to mostly ignore local-only files.
+- Support for mediacast video listings
+- Crawling of files in info tab
+
+### Changed
+- Remove size suffix for files in content pages
+
 ### Fixed
 - Crawling of courses with the timeline view as the default tab
 - Crawling of file and custom opencast cards
 - Crawling of button cards without descriptions
 - Abort crawling when encountering an unexpected ilias root page redirect
-- Remove size suffix for files in content pages
 - Sanitize ascii control characters on Windows
 - Crawling of paginated past meetings
 - Ignore SCORM learning modules
 
-### Added
-- `no-delete-prompt-override` conflict resolution strategy
-- support for ILIAS learning modules
-- `show_not_deleted` option to stop printing the "Not Deleted" status or report
-  message. This combines nicely with the `no-delete-prompt-override` strategy,
-  causing PFERD to mostly ignore local-only files.
-- support for mediacast video listings
-- crawling of files in info tab
-
 ## 3.4.3 - 2022-11-29
 
 ### Added
diff --git a/PFERD/version.py b/PFERD/version.py
index 7043d78..5ee464d 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.4.3"
+VERSION = "3.5.0"

From 266812f90ea7b33e2cd195ee6d34dc2ba53c4926 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 16 Nov 2023 10:34:49 +0100
Subject: [PATCH 052/147] Move is_logged_in helper to kit_ilias_html

---
 PFERD/crawl/ilias/kit_ilias_html.py        | 28 +++++++++++++++++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 35 +++-------------------
 2 files changed, 32 insertions(+), 31 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 2c37816..d23141f 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -1067,6 +1067,34 @@ class IliasPage:
         rest_of_name = split_delimiter.join(meeting_name.split(split_delimiter)[1:])
         return datetime.strftime(date_portion, "%Y-%m-%d") + split_delimiter + rest_of_name
 
+    @staticmethod
+    def is_logged_in(soup: BeautifulSoup) -> bool:
+        # Normal ILIAS pages
+        mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")
+        if mainbar is not None:
+            login_button = mainbar.find(attrs={"href": lambda x: x and "login.php" in x})
+            shib_login = soup.find(id="button_shib_login")
+            return not login_button and not shib_login
+
+        # Personal Desktop
+        if soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x}):
+            return True
+
+        # Video listing embeds do not have complete ILIAS html. Try to match them by
+        # their video listing table
+        video_table = soup.find(
+            recursive=True,
+            name="table",
+            attrs={"id": lambda x: x is not None and x.startswith("tbl_xoct")}
+        )
+        if video_table is not None:
+            return True
+        # The individual video player wrapper page has nothing of the above.
+        # Match it by its playerContainer.
+        if soup.select_one("#playerContainer") is not None:
+            return True
+        return False
+
     def _abs_url_from_link(self, link_tag: Tag) -> str:
         """
         Create an absolute url from an <a> tag.
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index d5f6809..94b7b9e 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -894,7 +894,7 @@ instance's greatest bottleneck.
         auth_id = await self._current_auth_id()
         async with self.session.get(url) as request:
             soup = soupify(await request.read())
-            if self._is_logged_in(soup):
+            if IliasPage.is_logged_in(soup):
                 return self._verify_page(soup, url, root_page_allowed)
 
         # We weren't authenticated, so try to do that
@@ -903,11 +903,12 @@ instance's greatest bottleneck.
         # Retry once after authenticating. If this fails, we will die.
         async with self.session.get(url) as request:
             soup = soupify(await request.read())
-            if self._is_logged_in(soup):
+            if IliasPage.is_logged_in(soup):
                 return self._verify_page(soup, url, root_page_allowed)
         raise CrawlError(f"get_page failed even after authenticating on {url!r}")
 
-    def _verify_page(self, soup: BeautifulSoup, url: str, root_page_allowed: bool) -> BeautifulSoup:
+    @staticmethod
+    def _verify_page(soup: BeautifulSoup, url: str, root_page_allowed: bool) -> BeautifulSoup:
         if IliasPage.is_root_page(soup) and not root_page_allowed:
             raise CrawlError(
                 "Unexpectedly encountered ILIAS root page. "
@@ -965,34 +966,6 @@ instance's greatest bottleneck.
     async def _authenticate(self) -> None:
         await self._shibboleth_login.login(self.session)
 
-    @ staticmethod
-    def _is_logged_in(soup: BeautifulSoup) -> bool:
-        # Normal ILIAS pages
-        mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")
-        if mainbar is not None:
-            login_button = mainbar.find(attrs={"href": lambda x: x and "login.php" in x})
-            shib_login = soup.find(id="button_shib_login")
-            return not login_button and not shib_login
-
-        # Personal Desktop
-        if soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x}):
-            return True
-
-        # Video listing embeds do not have complete ILIAS html. Try to match them by
-        # their video listing table
-        video_table = soup.find(
-            recursive=True,
-            name="table",
-            attrs={"id": lambda x: x is not None and x.startswith("tbl_xoct")}
-        )
-        if video_table is not None:
-            return True
-        # The individual video player wrapper page has nothing of the above.
-        # Match it by its playerContainer.
-        if soup.select_one("#playerContainer") is not None:
-            return True
-        return False
-
 
 class KitShibbolethLogin:
     """

From e9f8901520356e23a7fe75c232e2abeb65e2d5a7 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 30 Nov 2023 20:50:53 +0100
Subject: [PATCH 053/147] Fix typos in ilias crawler and use set literals

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 94b7b9e..b9fb45a 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -81,7 +81,7 @@ class KitIliasWebCrawlerSection(HttpCrawlerSection):
         return self.s.getboolean("forums", fallback=False)
 
 
-_DIRECTORY_PAGES: Set[IliasElementType] = set([
+_DIRECTORY_PAGES: Set[IliasElementType] = {
     IliasElementType.EXERCISE,
     IliasElementType.EXERCISE_FILES,
     IliasElementType.FOLDER,
@@ -90,16 +90,16 @@ _DIRECTORY_PAGES: Set[IliasElementType] = set([
     IliasElementType.MEDIACAST_VIDEO_FOLDER,
     IliasElementType.OPENCAST_VIDEO_FOLDER,
     IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
-])
+}
 
-_VIDEO_ELEMENTS: Set[IliasElementType] = set([
+_VIDEO_ELEMENTS: Set[IliasElementType] = {
     IliasElementType.MEDIACAST_VIDEO_FOLDER,
     IliasElementType.MEDIACAST_VIDEO,
     IliasElementType.OPENCAST_VIDEO,
     IliasElementType.OPENCAST_VIDEO_PLAYER,
     IliasElementType.OPENCAST_VIDEO_FOLDER,
     IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
-])
+}
 
 
 def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Callable[[AWrapped], AWrapped]:
@@ -561,8 +561,8 @@ instance's greatest bottleneck.
         # If we do not want to crawl it (user filter) or we have every file
         # from the cached mapping already, we can ignore this and bail
         if not maybe_dl or self._all_opencast_videos_locally_present(element_path):
-            # Mark all existing cideos as known so they do not get deleted
-            # during dleanup. We "downloaded" them, just without actually making
+            # Mark all existing videos as known so they do not get deleted
+            # during cleanup. We "downloaded" them, just without actually making
             # a network request as we assumed they did not change.
             for video in self._previous_contained_opencast_videos(element_path):
                 await self.download(video)

From a117126389a6298d04944ddbcda35f9b537e960b Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 9 Dec 2023 23:01:59 +0100
Subject: [PATCH 054/147] Fix video name deduplication

---
 CHANGELOG.md                               |   3 +
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 117 +++++++++++----------
 2 files changed, 65 insertions(+), 55 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e902efa..0443d50 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Fixed
+- Video name deduplication
+
 ## 3.5.0 - 2023-09-13
 
 ### Added
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index b9fb45a..ac1f10d 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -140,6 +140,10 @@ def _wrap_io_in_warning(name: str) -> Callable[[AWrapped], AWrapped]:
     return _iorepeat(1, name)
 
 
+def _get_video_cache_key(element: IliasPageElement) -> str:
+    return f"ilias-video-cache-{element.id()}"
+
+
 # Crawler control flow:
 #
 #     crawl_desktop -+
@@ -547,8 +551,8 @@ instance's greatest bottleneck.
         # Copy old mapping as it is likely still relevant
         if self.prev_report:
             self.report.add_custom_value(
-                str(element_path),
-                self.prev_report.get_custom_value(str(element_path))
+                _get_video_cache_key(element),
+                self.prev_report.get_custom_value(_get_video_cache_key(element))
             )
 
         # A video might contain other videos, so let's "crawl" the video first
@@ -558,58 +562,69 @@ instance's greatest bottleneck.
         # to ensure backwards compatibility.
         maybe_dl = await self.download(element_path, mtime=element.mtime, redownload=Redownload.ALWAYS)
 
-        # If we do not want to crawl it (user filter) or we have every file
-        # from the cached mapping already, we can ignore this and bail
-        if not maybe_dl or self._all_opencast_videos_locally_present(element_path):
-            # Mark all existing videos as known so they do not get deleted
-            # during cleanup. We "downloaded" them, just without actually making
-            # a network request as we assumed they did not change.
-            for video in self._previous_contained_opencast_videos(element_path):
-                await self.download(video)
+        # If we do not want to crawl it (user filter), we can move on
+        if not maybe_dl:
+            return None
+
+        # If we have every file from the cached mapping already, we can ignore this and bail
+        if self._all_opencast_videos_locally_present(element, maybe_dl.path):
+            # Mark all existing videos as known to ensure they do not get deleted during cleanup.
+            # We "downloaded" them, just without actually making a network request as we assumed
+            # they did not change.
+            contained = self._previous_contained_opencast_videos(element, maybe_dl.path)
+            if len(contained) > 1:
+                # Only do this if we threw away the original dl token,
+                # to not download single-stream videos twice
+                for video in contained:
+                    await self.download(video)
 
             return None
 
-        return self._download_opencast_video(element_path, element, maybe_dl)
+        return self._download_opencast_video(element, maybe_dl)
 
-    def _previous_contained_opencast_videos(self, video_path: PurePath) -> List[PurePath]:
+    def _previous_contained_opencast_videos(
+        self, element: IliasPageElement, element_path: PurePath
+    ) -> List[PurePath]:
         if not self.prev_report:
             return []
-        custom_value = self.prev_report.get_custom_value(str(video_path))
+        custom_value = self.prev_report.get_custom_value(_get_video_cache_key(element))
         if not custom_value:
             return []
-        names = cast(List[str], custom_value)
-        folder = video_path.parent
-        return [PurePath(folder, name) for name in names]
+        cached_value = cast(dict[str, Any], custom_value)
+        if "known_paths" not in cached_value or "own_path" not in cached_value:
+            log.explain(f"'known_paths' or 'own_path' missing from cached value: {cached_value}")
+            return []
+        transformed_own_path = self._transformer.transform(element_path)
+        if cached_value["own_path"] != str(transformed_own_path):
+            log.explain(
+                f"own_path '{transformed_own_path}' does not match cached value: '{cached_value['own_path']}"
+            )
+            return []
+        return [PurePath(name) for name in cached_value["known_paths"]]
 
-    def _all_opencast_videos_locally_present(self, video_path: PurePath) -> bool:
-        if contained_videos := self._previous_contained_opencast_videos(video_path):
-            log.explain_topic(f"Checking local cache for video {video_path.name}")
-            all_found_locally = True
-            for video in contained_videos:
-                transformed_path = self._to_local_opencast_video_path(video)
-                if transformed_path:
-                    exists_locally = self._output_dir.resolve(transformed_path).exists()
-                    all_found_locally = all_found_locally and exists_locally
-            if all_found_locally:
-                log.explain("Found all videos locally, skipping enumeration request")
+    def _all_opencast_videos_locally_present(self, element: IliasPageElement, element_path: PurePath) -> bool:
+        log.explain_topic(f"Checking local cache for video {fmt_path(element_path)}")
+        if contained_videos := self._previous_contained_opencast_videos(element, element_path):
+            log.explain(
+                f"The following contained videos are known: {','.join(map(fmt_path, contained_videos))}"
+            )
+            if all(self._output_dir.resolve(path).exists() for path in contained_videos):
+                log.explain("Found all known videos locally, skipping enumeration request")
                 return True
             log.explain("Missing at least one video, continuing with requests!")
+        else:
+            log.explain("No local cache present")
         return False
 
-    def _to_local_opencast_video_path(self, path: PurePath) -> Optional[PurePath]:
-        if transformed := self._transformer.transform(path):
-            return self._deduplicator.fixup_path(transformed)
-        return None
-
     @anoncritical
     @_iorepeat(3, "downloading video")
-    async def _download_opencast_video(
-        self,
-        original_path: PurePath,
-        element: IliasPageElement,
-        dl: DownloadToken
-    ) -> None:
-        stream_elements: List[IliasPageElement] = []
+    async def _download_opencast_video(self, element: IliasPageElement, dl: DownloadToken) -> None:
+        def add_to_report(paths: list[str]) -> None:
+            self.report.add_custom_value(
+                _get_video_cache_key(element),
+                {"known_paths": paths, "own_path": str(self._transformer.transform(dl.path))}
+            )
+
         async with dl as (bar, sink):
             page = IliasPage(await self._get_page(element.url), element.url, element)
             stream_elements = page.get_child_elements()
@@ -620,32 +635,25 @@ instance's greatest bottleneck.
                 log.explain(f"Using single video mode for {element.name}")
                 stream_element = stream_elements[0]
 
-                transformed_path = self._to_local_opencast_video_path(original_path)
-                if not transformed_path:
-                    raise CrawlError(f"Download returned a path but transform did not for {original_path}")
-
                 # We do not have a local cache yet
-                if self._output_dir.resolve(transformed_path).exists():
-                    log.explain(f"Video for {element.name} existed locally")
-                else:
-                    await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
-                self.report.add_custom_value(str(original_path), [original_path.name])
+                await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
+                add_to_report([str(self._transformer.transform(dl.path))])
                 return
 
         contained_video_paths: List[str] = []
 
         for stream_element in stream_elements:
-            video_path = original_path.parent / stream_element.name
-            contained_video_paths.append(str(video_path))
+            video_path = dl.path.parent / stream_element.name
 
             maybe_dl = await self.download(video_path, mtime=element.mtime, redownload=Redownload.NEVER)
             if not maybe_dl:
                 continue
             async with maybe_dl as (bar, sink):
                 log.explain(f"Streaming video from real url {stream_element.url}")
+                contained_video_paths.append(str(self._transformer.transform(maybe_dl.path)))
                 await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
 
-        self.report.add_custom_value(str(original_path), contained_video_paths)
+        add_to_report(contained_video_paths)
 
     async def _handle_file(
         self,
@@ -657,8 +665,8 @@ instance's greatest bottleneck.
             return None
         return self._download_file(element, maybe_dl)
 
-    @anoncritical
     @_iorepeat(3, "downloading file")
+    @anoncritical
     async def _download_file(self, element: IliasPageElement, dl: DownloadToken) -> None:
         assert dl  # The function is only reached when dl is not None
         async with dl as (bar, sink):
@@ -728,7 +736,6 @@ instance's greatest bottleneck.
                 raise CrawlWarning("Failed to extract forum data")
             if download_data.empty:
                 log.explain("Forum had no threads")
-                elements = []
                 return
             html = await self._post_authenticated(download_data.url, download_data.form_data)
             elements = parse_ilias_forum_export(soupify(html))
@@ -962,7 +969,7 @@ instance's greatest bottleneck.
 
     # We repeat this as the login method in shibboleth doesn't handle I/O errors.
     # Shibboleth is quite reliable as well, the repeat is likely not critical here.
-    @ _iorepeat(3, "Login", failure_is_error=True)
+    @_iorepeat(3, "Login", failure_is_error=True)
     async def _authenticate(self) -> None:
         await self._shibboleth_login.login(self.session)
 
@@ -1112,7 +1119,7 @@ async def _shib_post(
         async with session.get(correct_url, allow_redirects=False) as response:
             location = response.headers.get("location")
             log.explain(f"Redirected to {location!r} with status {response.status}")
-            # If shib still still has a valid session, it will directly respond to the request
+            # If shib still has a valid session, it will directly respond to the request
             if location is None:
                 log.explain("Shib recognized us, returning its response directly")
                 return soupify(await response.read())

From ab0cb2d956129c51b67e4573da7c5e95372e9f5f Mon Sep 17 00:00:00 2001
From: TornaxO7 <tornax@proton.me>
Date: Tue, 27 Feb 2024 23:39:53 +0100
Subject: [PATCH 055/147] nix: bump nixpgs dependency

---
 flake.lock | 8 ++++----
 flake.nix  | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/flake.lock b/flake.lock
index 1655107..6428667 100644
--- a/flake.lock
+++ b/flake.lock
@@ -2,16 +2,16 @@
   "nodes": {
     "nixpkgs": {
       "locked": {
-        "lastModified": 1694499547,
-        "narHash": "sha256-R7xMz1Iia6JthWRHDn36s/E248WB1/je62ovC/dUVKI=",
+        "lastModified": 1708979614,
+        "narHash": "sha256-FWLWmYojIg6TeqxSnHkKpHu5SGnFP5um1uUjH+wRV6g=",
         "owner": "NixOS",
         "repo": "nixpkgs",
-        "rev": "e5f018cf150e29aac26c61dac0790ea023c46b24",
+        "rev": "b7ee09cf5614b02d289cd86fcfa6f24d4e078c2a",
         "type": "github"
       },
       "original": {
         "owner": "NixOS",
-        "ref": "nixos-23.05",
+        "ref": "nixos-23.11",
         "repo": "nixpkgs",
         "type": "github"
       }
diff --git a/flake.nix b/flake.nix
index e3d52af..4fc47b2 100644
--- a/flake.nix
+++ b/flake.nix
@@ -2,7 +2,7 @@
   description = "Tool for downloading course-related files from ILIAS";
 
   inputs = {
-    nixpkgs.url = "github:NixOS/nixpkgs/nixos-23.05";
+    nixpkgs.url = "github:NixOS/nixpkgs/nixos-23.11";
   };
 
   outputs = { self, nixpkgs }:

From eb0c956d32b9181c46d0ca8ce4f5d3f871e2c1df Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Fri, 5 Apr 2024 19:06:54 +0200
Subject: [PATCH 056/147] Add compatibility with ILIAS 8

---
 CHANGELOG.md                               |  1 +
 PFERD/crawl/ilias/kit_ilias_html.py        | 50 ++++++++++++----------
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 35 +++++++--------
 3 files changed, 46 insertions(+), 40 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0443d50..df4fcf5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ ambiguous situations.
 
 ### Fixed
 - Video name deduplication
+- Compatibility with ILIAS 8
 
 ## 3.5.0 - 2023-09-13
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index d23141f..0be6448 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -17,7 +17,7 @@ TargetType = Union[str, int]
 class IliasElementType(Enum):
     EXERCISE = "exercise"
     EXERCISE_FILES = "exercise_files"  # own submitted files
-    TEST = "test"                      # an online test. Will be ignored currently.
+    TEST = "test"  # an online test. Will be ignored currently.
     FILE = "file"
     FOLDER = "folder"
     FORUM = "forum"
@@ -95,13 +95,9 @@ class IliasPage:
 
     @staticmethod
     def is_root_page(soup: BeautifulSoup) -> bool:
-        permalink = soup.find(id="current_perma_link")
-        if permalink is None:
-            return False
-        value = permalink.attrs.get("value")
-        if value is None:
-            return False
-        return "goto.php?target=root_" in value
+        if permalink := IliasPage.get_soup_permalink(soup):
+            return "goto.php?target=root_" in permalink
+        return False
 
     def get_child_elements(self) -> List[IliasPageElement]:
         """
@@ -279,16 +275,14 @@ class IliasPage:
         return self._soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x})
 
     def _is_content_page(self) -> bool:
-        link = self._soup.find(id="current_perma_link")
-        if not link:
-            return False
-        return "target=copa_" in link.get("value")
+        if link := self.get_permalink():
+            return "target=copa_" in link
+        return False
 
     def _is_learning_module_page(self) -> bool:
-        link = self._soup.find(id="current_perma_link")
-        if not link:
-            return False
-        return "target=pg_" in link.get("value")
+        if link := self.get_permalink():
+            return "target=pg_" in link
+        return False
 
     def _contains_collapsed_future_meetings(self) -> bool:
         return self._uncollapse_future_meetings_url() is not None
@@ -513,8 +507,8 @@ class IliasPage:
             modification_string = link.parent.parent.parent.select_one(
                 f"td.std:nth-child({index})"
             ).getText().strip()
-            if re.search(r"\d+\.\d+.\d+ - \d+:\d+", modification_string):
-                modification_time = datetime.strptime(modification_string, "%d.%m.%Y - %H:%M")
+            if match := re.search(r"\d+\.\d+.\d+ \d+:\d+", modification_string):
+                modification_time = datetime.strptime(match.group(0), "%d.%m.%Y %H:%M")
                 break
 
         if modification_time is None:
@@ -613,7 +607,7 @@ class IliasPage:
             file_listings: List[Tag] = container.findAll(
                 name="a",
                 # download links contain the given command class
-                attrs={"href": lambda x: x and "cmdClass=ilexsubmissionfilegui" in x}
+                attrs={"href": lambda x: x and "cmdclass=ilexsubmissionfilegui" in x.lower()}
             )
 
             # Add each listing as a new
@@ -917,9 +911,9 @@ class IliasPage:
 
     @staticmethod
     def _find_type_from_link(
-            element_name: str,
-            link_element: Tag,
-            url: str
+        element_name: str,
+        link_element: Tag,
+        url: str
     ) -> Optional[IliasElementType]:
         """
         Decides which sub crawler to use for a given top level element.
@@ -1095,6 +1089,9 @@ class IliasPage:
             return True
         return False
 
+    def get_permalink(self) -> Optional[str]:
+        return IliasPage.get_soup_permalink(self._soup)
+
     def _abs_url_from_link(self, link_tag: Tag) -> str:
         """
         Create an absolute url from an <a> tag.
@@ -1107,6 +1104,13 @@ class IliasPage:
         """
         return urljoin(self._page_url, relative_url)
 
+    @staticmethod
+    def get_soup_permalink(soup: BeautifulSoup) -> Optional[str]:
+        perma_link_element: Tag = soup.select_one(".il-footer-permanent-url > a")
+        if not perma_link_element or not perma_link_element.get("href"):
+            return None
+        return perma_link_element.get("href")
+
 
 def _unexpected_html_warning() -> None:
     log.warn("Encountered unexpected HTML structure, ignoring element.")
@@ -1130,7 +1134,7 @@ def demangle_date(date_str: str, fail_silently: bool = False) -> Optional[dateti
 
         date_str = re.sub("Gestern|Yesterday", _format_date_english(_yesterday()), date_str, re.I)
         date_str = re.sub("Heute|Today", _format_date_english(date.today()), date_str, re.I)
-        date_str = re.sub("Morgen|Tomorrow",  _format_date_english(_tomorrow()), date_str, re.I)
+        date_str = re.sub("Morgen|Tomorrow", _format_date_english(_tomorrow()), date_str, re.I)
         date_str = date_str.strip()
         for german, english in zip(german_months, english_months):
             date_str = date_str.replace(german, english)
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index ac1f10d..52de793 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -12,17 +12,17 @@ import yarl
 from aiohttp import hdrs
 from bs4 import BeautifulSoup, Tag
 
+from .file_templates import Links, learning_module_template
+from .ilias_html_cleaner import clean, insert_base_markup
+from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
+                             IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
+from ..crawler import AWrapped, CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
+from ..http_crawler import HttpCrawler, HttpCrawlerSection
 from ...auth import Authenticator, TfaAuthenticator
 from ...config import Config
 from ...logging import ProgressBar, log
 from ...output_dir import FileSink, Redownload
 from ...utils import fmt_path, soupify, url_set_query_param
-from ..crawler import AWrapped, CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
-from ..http_crawler import HttpCrawler, HttpCrawlerSection
-from .file_templates import Links, learning_module_template
-from .ilias_html_cleaner import clean, insert_base_markup
-from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
-                             IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
 
 TargetType = Union[str, int]
 
@@ -130,6 +130,7 @@ def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Calla
             raise CrawlError("Impossible return in ilias _iorepeat")
 
         return wrapper  # type: ignore
+
     return decorator
 
 
@@ -177,11 +178,11 @@ def _get_video_cache_key(element: IliasPageElement) -> str:
 
 class KitIliasWebCrawler(HttpCrawler):
     def __init__(
-            self,
-            name: str,
-            section: KitIliasWebCrawlerSection,
-            config: Config,
-            authenticators: Dict[str, Authenticator]
+        self,
+        name: str,
+        section: KitIliasWebCrawlerSection,
+        config: Config,
+        authenticators: Dict[str, Authenticator]
     ):
         # Setting a main authenticator for cookie sharing
         auth = section.auth(authenticators)
@@ -253,8 +254,8 @@ instance's greatest bottleneck.
                     soup = await self._get_page(next_stage_url, root_page_allowed=True)
 
                     if current_parent is None and expected_id is not None:
-                        perma_link_element: Tag = soup.find(id="current_perma_link")
-                        if not perma_link_element or "crs_" not in perma_link_element.get("value"):
+                        perma_link = IliasPage.get_soup_permalink(soup)
+                        if not perma_link or "crs_" not in perma_link:
                             raise CrawlError("Invalid course id? Didn't find anything looking like a course")
 
                     log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
@@ -677,7 +678,7 @@ instance's greatest bottleneck.
             async with self.session.get(url, allow_redirects=is_video) as resp:
                 if not is_video:
                     # Redirect means we weren't authenticated
-                    if hdrs.LOCATION in resp.headers:
+                    if hdrs.LOCATION in resp.headers and "&cmd=sendfile" not in resp.headers[hdrs.LOCATION]:
                         return False
                 # we wanted a video but got HTML
                 if is_video and "html" in resp.content_type:
@@ -1052,9 +1053,9 @@ class KitShibbolethLogin:
         await sess.post(url, data=data)
 
     async def _authenticate_tfa(
-            self,
-            session: aiohttp.ClientSession,
-            soup: BeautifulSoup
+        self,
+        session: aiohttp.ClientSession,
+        soup: BeautifulSoup
     ) -> BeautifulSoup:
         if not self._tfa_auth:
             self._tfa_auth = TfaAuthenticator("ilias-anon-tfa")

From c1b592ac2930c1ced40dd7282ae8bca4d1b6109d Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 8 Apr 2024 17:52:13 +0200
Subject: [PATCH 057/147] Fix ILIAS 8 file downloads truncating to zero bytes

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 40 +++++++++++++++-------
 1 file changed, 28 insertions(+), 12 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 52de793..7d6b309 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -12,17 +12,17 @@ import yarl
 from aiohttp import hdrs
 from bs4 import BeautifulSoup, Tag
 
-from .file_templates import Links, learning_module_template
-from .ilias_html_cleaner import clean, insert_base_markup
-from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
-                             IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
-from ..crawler import AWrapped, CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
-from ..http_crawler import HttpCrawler, HttpCrawlerSection
 from ...auth import Authenticator, TfaAuthenticator
 from ...config import Config
 from ...logging import ProgressBar, log
 from ...output_dir import FileSink, Redownload
 from ...utils import fmt_path, soupify, url_set_query_param
+from ..crawler import AWrapped, CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
+from ..http_crawler import HttpCrawler, HttpCrawlerSection
+from .file_templates import Links, learning_module_template
+from .ilias_html_cleaner import clean, insert_base_markup
+from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
+                             IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
 
 TargetType = Union[str, int]
 
@@ -675,12 +675,28 @@ instance's greatest bottleneck.
 
     async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar, is_video: bool) -> None:
         async def try_stream() -> bool:
-            async with self.session.get(url, allow_redirects=is_video) as resp:
-                if not is_video:
-                    # Redirect means we weren't authenticated
-                    if hdrs.LOCATION in resp.headers and "&cmd=sendfile" not in resp.headers[hdrs.LOCATION]:
-                        return False
-                # we wanted a video but got HTML
+            next_url = url
+
+            # Normal files redirect to the magazine if we are not authenticated. As files could be HTML,
+            # we can not match on the content type here. Instead, we disallow redirects and inspect the
+            # new location. If we are redirected anywhere but the ILIAS 8 "sendfile" command, we assume
+            # our authentication expired.
+            if not is_video:
+                async with self.session.get(url, allow_redirects=False) as resp:
+                    # Redirect to anything except a "sendfile" means we weren't authenticated
+                    if hdrs.LOCATION in resp.headers:
+                        if "&cmd=sendfile" not in resp.headers[hdrs.LOCATION]:
+                            return False
+                        # Directly follow the redirect to not make a second, unnecessary request
+                        next_url = resp.headers[hdrs.LOCATION]
+
+            # Let's try this again and follow redirects
+            return await fetch_follow_redirects(next_url)
+
+        async def fetch_follow_redirects(file_url: str) -> bool:
+            async with self.session.get(file_url) as resp:
+                # We wanted a video but got HTML => Forbidden, auth expired. Logging in won't really
+                # solve that depending on the setup, but it is better than nothing.
                 if is_video and "html" in resp.content_type:
                     return False
 

From da627ff929abb3a1a3dff58ec46f29025e16c96b Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Tue, 9 Apr 2024 14:28:56 +0200
Subject: [PATCH 058/147] Bump version to 3.5.1

---
 CHANGELOG.md     | 6 +++++-
 PFERD/version.py | 2 +-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index df4fcf5..a76508e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,9 +22,13 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.5.1 - 2024-04-09
+
+### Added
+- Support for ILIAS 8
+
 ### Fixed
 - Video name deduplication
-- Compatibility with ILIAS 8
 
 ## 3.5.0 - 2023-09-13
 
diff --git a/PFERD/version.py b/PFERD/version.py
index 5ee464d..3f27494 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.5.0"
+VERSION = "3.5.1"

From 1cbc2b717a76751725f776483b611bd6b43525cf Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 10 Apr 2024 01:01:59 +0200
Subject: [PATCH 059/147] Fix personal desktop crawling with ILIAS 8

---
 CHANGELOG.md                               | 3 +++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a76508e..36768b0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Fixed
+- Crawling of personal desktop with ILIAS 8
+
 ## 3.5.1 - 2024-04-09
 
 ### Added
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 7d6b309..371ffb3 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -228,7 +228,7 @@ instance's greatest bottleneck.
         await self._crawl_url(root_url, expected_id=course_id)
 
     async def _crawl_desktop(self) -> None:
-        appendix = r"ILIAS\PersonalDesktop\PDMainBarProvider|mm_pd_sel_items"
+        appendix = r"ILIAS\Repository\Provider\RepositoryMainBarProvider|mm_pd_sel_items"
         appendix = appendix.encode("ASCII").hex()
         await self._crawl_url(self._base_url + "/gs_content.php?item=" + appendix)
 

From 4a5959fd58d9c063ea9a37089d0aaa01c23544bc Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 10 Apr 2024 11:12:48 +0200
Subject: [PATCH 060/147] Fix personal desktop crawling without favorites

---
 CHANGELOG.md                        | 1 +
 PFERD/crawl/ilias/kit_ilias_html.py | 8 ++++++++
 2 files changed, 9 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 36768b0..5212824 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ ambiguous situations.
 
 ### Fixed
 - Crawling of personal desktop with ILIAS 8
+- Crawling of empty personal desktops
 
 ## 3.5.1 - 2024-04-09
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 0be6448..aa00a87 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -1074,6 +1074,14 @@ class IliasPage:
         if soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x}):
             return True
 
+        # Empty personal desktop has zero (0) markers. Match on the text...
+        if alert := soup.select_one(".alert-info"):
+            text = alert.getText().lower()
+            if "you have not yet selected any favourites" in text:
+                return True
+            if "sie haben aktuell noch keine favoriten ausgewählt" in text:
+                return True
+
         # Video listing embeds do not have complete ILIAS html. Try to match them by
         # their video listing table
         video_table = soup.find(

From 3db186a9782e22cf1cd45b8d343b5cfa5124eb25 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 10 Apr 2024 11:12:55 +0200
Subject: [PATCH 061/147] Fix personal desktop crawling HTML warnings

---
 PFERD/crawl/ilias/kit_ilias_html.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index aa00a87..4cfec9b 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -378,6 +378,10 @@ class IliasPage:
             name = _sanitize_path_name(link.text.strip())
             url = self._abs_url_from_link(link)
 
+            if "cmd=manage" in url and "cmdClass=ilPDSelectedItemsBlockGUI" in url:
+                # Configure button/link does not have anything interesting
+                continue
+
             type = self._find_type_from_link(name, link, url)
             if not type:
                 _unexpected_html_warning()

From eb01aa86cbad96dd3a6dba86b92b73fdefd86eb0 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sun, 14 Apr 2024 12:10:17 +0200
Subject: [PATCH 062/147] Bump version to 3.5.2

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5212824..e404d1d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.5.2 - 2024-04-14
+
 ### Fixed
 - Crawling of personal desktop with ILIAS 8
 - Crawling of empty personal desktops
diff --git a/PFERD/version.py b/PFERD/version.py
index 3f27494..47da4a6 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.5.1"
+VERSION = "3.5.2"

From bbcfe9c8dd5383463b4415d78e0a10ca8458b34d Mon Sep 17 00:00:00 2001
From: Florian Raith <37345813+florianraith@users.noreply.github.com>
Date: Fri, 19 Apr 2024 16:52:18 +0200
Subject: [PATCH 063/147] Fix typo in CONFIG.md (#89)

---
 CONFIG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CONFIG.md b/CONFIG.md
index 5f62749..25496e0 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -146,7 +146,7 @@ crawler simulate a slower, network-based crawler.
 
 This crawler crawls a KIT-IPD page by url. The root page can be crawled from
 outside the KIT network so you will be informed about any new/deleted files,
-but downloading files requires you to be within. Adding a show delay between
+but downloading files requires you to be within. Adding a short delay between
 requests is likely a good idea.
 
 - `target`: URL to a KIT-IPD page

From 3e831c7e23e9214e2cbbaf04709c153ee1fcb893 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 24 Apr 2024 22:32:26 +0200
Subject: [PATCH 064/147] Fix normalization of meeting names in cards

---
 CHANGELOG.md                        |   3 +
 PFERD/crawl/ilias/kit_ilias_html.py | 119 ++++++++++++++++------------
 2 files changed, 70 insertions(+), 52 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e404d1d..f244a9b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Fixed
+- Normalization of meeting names in cards
+
 ## 3.5.2 - 2024-04-14
 
 ### Fixed
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 4cfec9b..866f7c0 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -61,6 +61,47 @@ class IliasPageElement:
         log.warn(f"Didn't find identity for {self.name} - {self.url}. Please report this.")
         return self.url
 
+    @staticmethod
+    def create_new(
+        typ: IliasElementType,
+        url: str,
+        name: str,
+        mtime: Optional[datetime] = None,
+        description: Optional[str] = None
+    ) -> 'IliasPageElement':
+        if typ == IliasElementType.MEETING:
+            normalized = _sanitize_path_name(IliasPageElement._normalize_meeting_name(name))
+            log.explain(f"Normalized meeting name from {name!r} to {normalized!r}")
+            name = normalized
+        return IliasPageElement(typ, url, name, mtime, description)
+
+    @staticmethod
+    def _normalize_meeting_name(meeting_name: str) -> str:
+        """
+        Normalizes meeting names, which have a relative time as their first part,
+        to their date in ISO format.
+        """
+
+        # This checks whether we can reach a `:` without passing a `-`
+        if re.search(r"^[^-]+: ", meeting_name):
+            # Meeting name only contains date: "05. Jan 2000:"
+            split_delimiter = ":"
+        else:
+            # Meeting name contains date and start/end times: "05. Jan 2000, 16:00 - 17:30:"
+            split_delimiter = ", "
+
+        # We have a meeting day without time
+        date_portion_str = meeting_name.split(split_delimiter)[0]
+        date_portion = demangle_date(date_portion_str)
+
+        # We failed to parse the date, bail out
+        if not date_portion:
+            return meeting_name
+
+        # Replace the first section with the absolute date
+        rest_of_name = split_delimiter.join(meeting_name.split(split_delimiter)[1:])
+        return datetime.strftime(date_portion, "%Y-%m-%d") + split_delimiter + rest_of_name
+
 
 @dataclass
 class IliasDownloadForumData:
@@ -130,7 +171,7 @@ class IliasPage:
             attrs={"href": lambda x: x and "cmdClass=ilinfoscreengui" in x}
         )
         if tab is not None:
-            return IliasPageElement(
+            return IliasPageElement.create_new(
                 IliasElementType.INFO_TAB,
                 self._abs_url_from_link(tab),
                 "infos"
@@ -295,7 +336,7 @@ class IliasPage:
         if not element:
             return None
         link = self._abs_url_from_link(element)
-        return IliasPageElement(IliasElementType.FOLDER, link, "show all meetings")
+        return IliasPageElement.create_new(IliasElementType.FOLDER, link, "show all meetings")
 
     def _is_content_tab_selected(self) -> bool:
         return self._select_content_page_url() is None
@@ -315,7 +356,7 @@ class IliasPage:
         link = tab.find("a")
         if link:
             link = self._abs_url_from_link(link)
-            return IliasPageElement(IliasElementType.FOLDER, link, "select content page")
+            return IliasPageElement.create_new(IliasElementType.FOLDER, link, "select content page")
 
         _unexpected_html_warning()
         log.warn_contd(f"Could not find content tab URL on {self._page_url!r}.")
@@ -345,14 +386,16 @@ class IliasPage:
         # and just fetch the lone video url!
         if len(streams) == 1:
             video_url = streams[0]["sources"]["mp4"][0]["src"]
-            return [IliasPageElement(IliasElementType.OPENCAST_VIDEO, video_url, self._source_name)]
+            return [
+                IliasPageElement.create_new(IliasElementType.OPENCAST_VIDEO, video_url, self._source_name)
+            ]
 
         log.explain(f"Found multiple videos for stream at {self._source_name}")
         items = []
         for stream in sorted(streams, key=lambda stream: stream["content"]):
             full_name = f"{self._source_name.replace('.mp4', '')} ({stream['content']}).mp4"
             video_url = stream["sources"]["mp4"][0]["src"]
-            items.append(IliasPageElement(IliasElementType.OPENCAST_VIDEO, video_url, full_name))
+            items.append(IliasPageElement.create_new(IliasElementType.OPENCAST_VIDEO, video_url, full_name))
 
         return items
 
@@ -367,7 +410,7 @@ class IliasPage:
 
         link = self._abs_url_from_link(correct_link)
 
-        return IliasPageElement(IliasElementType.FORUM, link, "show all forum threads")
+        return IliasPageElement.create_new(IliasElementType.FORUM, link, "show all forum threads")
 
     def _find_personal_desktop_entries(self) -> List[IliasPageElement]:
         items: List[IliasPageElement] = []
@@ -394,7 +437,7 @@ class IliasPage:
                 url = re.sub(r"(target=file_\d+)", r"\1_download", url)
                 log.explain("Rewired file URL to include download part")
 
-            items.append(IliasPageElement(type, url, name))
+            items.append(IliasPageElement.create_new(type, url, name))
 
         return items
 
@@ -412,7 +455,7 @@ class IliasPage:
                 log.warn_contd(f"Found unknown content page item {name!r} with url {url!r}")
                 continue
 
-            items.append(IliasPageElement(IliasElementType.FILE, url, name))
+            items.append(IliasPageElement.create_new(IliasElementType.FILE, url, name))
 
         return items
 
@@ -425,7 +468,7 @@ class IliasPage:
                 continue
             if "cmd=sendfile" not in link["href"]:
                 continue
-            items.append(IliasPageElement(
+            items.append(IliasPageElement.create_new(
                 IliasElementType.FILE,
                 self._abs_url_from_link(link),
                 _sanitize_path_name(link.getText())
@@ -453,7 +496,9 @@ class IliasPage:
             query_params = {"limit": "800", "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
             url = url_set_query_params(url, query_params)
             log.explain("Found ILIAS video frame page, fetching actual content next")
-            return [IliasPageElement(IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED, url, "")]
+            return [
+                IliasPageElement.create_new(IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED, url, "")
+            ]
 
         is_paginated = self._soup.find(id=re.compile(r"tab_page_sel.+")) is not None
 
@@ -482,7 +527,7 @@ class IliasPage:
         url = url_set_query_params(self._page_url, query_params)
 
         log.explain("Disabled pagination, retrying folder as a new entry")
-        return [IliasPageElement(IliasElementType.OPENCAST_VIDEO_FOLDER, url, "")]
+        return [IliasPageElement.create_new(IliasElementType.OPENCAST_VIDEO_FOLDER, url, "")]
 
     def _find_opencast_video_entries_no_paging(self) -> List[IliasPageElement]:
         """
@@ -527,7 +572,7 @@ class IliasPage:
         video_url = self._abs_url_from_link(link)
 
         log.explain(f"Found video {video_name!r} at {video_url}")
-        return IliasPageElement(
+        return IliasPageElement.create_new(
             IliasElementType.OPENCAST_VIDEO_PLAYER, video_url, video_name, modification_time
         )
 
@@ -563,7 +608,7 @@ class IliasPage:
             if date is None:
                 log.warn(f"Date parsing failed for exercise entry {name!r}")
 
-            results.append(IliasPageElement(
+            results.append(IliasPageElement.create_new(
                 IliasElementType.FILE,
                 self._abs_url_from_link(link),
                 name,
@@ -600,7 +645,7 @@ class IliasPage:
                 url = self._abs_url_from_link(file_link)
 
                 log.explain(f"Found exercise entry {file_name!r}")
-                results.append(IliasPageElement(
+                results.append(IliasPageElement.create_new(
                     IliasElementType.FILE,
                     url,
                     container_name + "/" + file_name,
@@ -625,7 +670,7 @@ class IliasPage:
                 file_name = _sanitize_path_name(label_container.getText().strip())
                 url = self._abs_url_from_link(listing)
                 log.explain(f"Found exercise detail {file_name!r} at {url}")
-                results.append(IliasPageElement(
+                results.append(IliasPageElement.create_new(
                     IliasElementType.EXERCISE_FILES,
                     url,
                     container_name + "/" + file_name,
@@ -660,16 +705,13 @@ class IliasPage:
 
             if not element_type:
                 continue
-            if element_type == IliasElementType.MEETING:
-                normalized = _sanitize_path_name(self._normalize_meeting_name(element_name))
-                log.explain(f"Normalized meeting name from {element_name!r} to {normalized!r}")
-                element_name = normalized
             elif element_type == IliasElementType.FILE:
                 result.append(self._file_to_element(element_name, abs_url, link))
                 continue
 
             log.explain(f"Found {element_name!r}")
-            result.append(IliasPageElement(element_type, abs_url, element_name, description=description))
+            result.append(IliasPageElement.create_new(
+                element_type, abs_url, element_name, description=description))
 
         result += self._find_cards()
         result += self._find_mediacast_videos()
@@ -692,8 +734,8 @@ class IliasPage:
                 log.warn_contd(f"No <video> element found for mediacast video '{element_name}'")
                 continue
 
-            videos.append(IliasPageElement(
-                type=IliasElementType.MEDIACAST_VIDEO,
+            videos.append(IliasPageElement.create_new(
+                typ=IliasElementType.MEDIACAST_VIDEO,
                 url=self._abs_url_from_relative(video_element.get("src")),
                 name=element_name,
                 mtime=self._find_mediacast_video_mtime(elem.findParent(name="td"))
@@ -815,7 +857,7 @@ class IliasPage:
         full_path = name + "." + file_type
 
         log.explain(f"Found file {full_path!r}")
-        return IliasPageElement(IliasElementType.FILE, url, full_path, modification_date)
+        return IliasPageElement.create_new(IliasElementType.FILE, url, full_path, modification_date)
 
     def _find_cards(self) -> List[IliasPageElement]:
         result: List[IliasPageElement] = []
@@ -832,7 +874,7 @@ class IliasPage:
                 log.warn_contd(f"Could not extract type for {title}")
                 continue
 
-            result.append(IliasPageElement(type, url, name))
+            result.append(IliasPageElement.create_new(type, url, name))
 
         card_button_tiles: List[Tag] = self._soup.select(".card-title button")
 
@@ -861,7 +903,7 @@ class IliasPage:
                 log.warn_contd(f"Could not extract type for {button}")
                 continue
 
-            result.append(IliasPageElement(type, url, name, description=description))
+            result.append(IliasPageElement.create_new(type, url, name, description=description))
 
         return result
 
@@ -1038,33 +1080,6 @@ class IliasPage:
 
         return IliasElementType.FOLDER
 
-    @staticmethod
-    def _normalize_meeting_name(meeting_name: str) -> str:
-        """
-        Normalizes meeting names, which have a relative time as their first part,
-        to their date in ISO format.
-        """
-
-        # This checks whether we can reach a `:` without passing a `-`
-        if re.search(r"^[^-]+: ", meeting_name):
-            # Meeting name only contains date: "05. Jan 2000:"
-            split_delimiter = ":"
-        else:
-            # Meeting name contains date and start/end times: "05. Jan 2000, 16:00 - 17:30:"
-            split_delimiter = ", "
-
-        # We have a meeting day without time
-        date_portion_str = meeting_name.split(split_delimiter)[0]
-        date_portion = demangle_date(date_portion_str)
-
-        # We failed to parse the date, bail out
-        if not date_portion:
-            return meeting_name
-
-        # Replace the first section with the absolute date
-        rest_of_name = split_delimiter.join(meeting_name.split(split_delimiter)[1:])
-        return datetime.strftime(date_portion, "%Y-%m-%d") + split_delimiter + rest_of_name
-
     @staticmethod
     def is_logged_in(soup: BeautifulSoup) -> bool:
         # Normal ILIAS pages

From fc1f68ccd9a18f939b06908e32725d3ee70bc7ee Mon Sep 17 00:00:00 2001
From: Philipp Fruck <dev@p-fruck.de>
Date: Sat, 4 May 2024 17:44:18 +0200
Subject: [PATCH 065/147] refactor: Separate generic and KIT ilias functions

---
 PFERD/crawl/ilias/ilias_web_crawler.py     | 931 ++++++++++++++++++++
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 958 +--------------------
 PFERD/utils.py                             |  36 +
 3 files changed, 977 insertions(+), 948 deletions(-)
 create mode 100644 PFERD/crawl/ilias/ilias_web_crawler.py

diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
new file mode 100644
index 0000000..ba7d564
--- /dev/null
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -0,0 +1,931 @@
+import asyncio
+import base64
+import os
+import re
+from collections.abc import Awaitable, Coroutine
+from pathlib import PurePath
+from typing import Any, Dict, List, Literal, Optional, Set, Union, cast
+from urllib.parse import urljoin
+
+import aiohttp
+from aiohttp import hdrs
+from bs4 import BeautifulSoup, Tag
+
+from ...auth import Authenticator
+from ...config import Config
+from ...logging import ProgressBar, log
+from ...output_dir import FileSink, Redownload
+from ...utils import _iorepeat, fmt_path, soupify, url_set_query_param
+from ..crawler import CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
+from ..http_crawler import HttpCrawler, HttpCrawlerSection
+from .file_templates import Links, learning_module_template
+from .ilias_html_cleaner import clean, insert_base_markup
+from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
+                             IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
+
+TargetType = Union[str, int]
+_ILIAS_URL = "https://ilias.studium.kit.edu"
+
+
+class IliasWebCrawlerSection(HttpCrawlerSection):
+    def target(self) -> TargetType:
+        target = self.s.get("target")
+        if not target:
+            self.missing_value("target")
+
+        if re.fullmatch(r"\d+", target):
+            # Course id
+            return int(target)
+        if target == "desktop":
+            # Full personal desktop
+            return target
+        if target.startswith(_ILIAS_URL):
+            # ILIAS URL
+            return target
+
+        self.invalid_value("target", target, "Should be <course id | desktop | kit ilias URL>")
+
+    def links(self) -> Links:
+        type_str: Optional[str] = self.s.get("links")
+
+        if type_str is None:
+            return Links.FANCY
+
+        try:
+            return Links.from_string(type_str)
+        except ValueError as e:
+            self.invalid_value("links", type_str, str(e).capitalize())
+
+    def link_redirect_delay(self) -> int:
+        return self.s.getint("link_redirect_delay", fallback=-1)
+
+    def videos(self) -> bool:
+        return self.s.getboolean("videos", fallback=False)
+
+    def forums(self) -> bool:
+        return self.s.getboolean("forums", fallback=False)
+
+
+_DIRECTORY_PAGES: Set[IliasElementType] = {
+    IliasElementType.EXERCISE,
+    IliasElementType.EXERCISE_FILES,
+    IliasElementType.FOLDER,
+    IliasElementType.INFO_TAB,
+    IliasElementType.MEETING,
+    IliasElementType.MEDIACAST_VIDEO_FOLDER,
+    IliasElementType.OPENCAST_VIDEO_FOLDER,
+    IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
+}
+
+_VIDEO_ELEMENTS: Set[IliasElementType] = {
+    IliasElementType.MEDIACAST_VIDEO_FOLDER,
+    IliasElementType.MEDIACAST_VIDEO,
+    IliasElementType.OPENCAST_VIDEO,
+    IliasElementType.OPENCAST_VIDEO_PLAYER,
+    IliasElementType.OPENCAST_VIDEO_FOLDER,
+    IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
+}
+
+
+def _get_video_cache_key(element: IliasPageElement) -> str:
+    return f"ilias-video-cache-{element.id()}"
+
+
+# Crawler control flow:
+#
+#     crawl_desktop -+
+#                    |
+#     crawl_course --+
+#                    |
+#     @_io_repeat    |        # retries internally (before the bar)
+#  +- crawl_url    <-+
+#  |
+#  |
+#  |  @_wrap_io_exception     # does not need to retry as children acquire bars
+#  +> crawl_ilias_element -+
+#  ^                       |
+#  |  @_io_repeat          |  # retries internally (before the bar)
+#  +- crawl_ilias_page <---+
+#  |                       |
+#  +> get_page             |  # Handles and retries authentication
+#                          |
+#     @_io_repeat          |  # retries internally (before the bar)
+#  +- download_link    <---+
+#  |                       |
+#  +> resolve_target       |  # Handles and retries authentication
+#                          |
+#     @_io_repeat          |  # retries internally (before the bar)
+#  +- download_video   <---+
+#  |                       |
+#  |  @_io_repeat          |  # retries internally (before the bar)
+#  +- download_file    <---+
+#  |
+#  +> stream_from_url         # Handles and retries authentication
+class IliasWebCrawler(HttpCrawler):
+    def __init__(
+        self,
+        name: str,
+        section: IliasWebCrawlerSection,
+        config: Config,
+        authenticators: Dict[str, Authenticator]
+    ):
+        # Setting a main authenticator for cookie sharing
+        auth = section.auth(authenticators)
+        super().__init__(name, section, config, shared_auth=auth)
+
+        if section.tasks() > 1:
+            log.warn("""
+Please avoid using too many parallel requests as these are the KIT ILIAS
+instance's greatest bottleneck.
+            """.strip())
+
+        self._auth = auth
+        self._base_url = _ILIAS_URL
+
+        self._target = section.target()
+        self._link_file_redirect_delay = section.link_redirect_delay()
+        self._links = section.links()
+        self._videos = section.videos()
+        self._forums = section.forums()
+        self._visited_urls: Dict[str, PurePath] = dict()
+
+    async def _run(self) -> None:
+        if isinstance(self._target, int):
+            log.explain_topic(f"Inferred crawl target: Course with id {self._target}")
+            await self._crawl_course(self._target)
+        elif self._target == "desktop":
+            log.explain_topic("Inferred crawl target: Personal desktop")
+            await self._crawl_desktop()
+        else:
+            log.explain_topic(f"Inferred crawl target: URL {self._target}")
+            await self._crawl_url(self._target)
+
+    async def _crawl_course(self, course_id: int) -> None:
+        # Start crawling at the given course
+        root_url = url_set_query_param(
+            self._base_url + "/goto.php", "target", f"crs_{course_id}"
+        )
+
+        await self._crawl_url(root_url, expected_id=course_id)
+
+    async def _crawl_desktop(self) -> None:
+        appendix = r"ILIAS\Repository\Provider\RepositoryMainBarProvider|mm_pd_sel_items"
+        appendix = appendix.encode("ASCII").hex()
+        await self._crawl_url(self._base_url + "/gs_content.php?item=" + appendix)
+
+    async def _crawl_url(self, url: str, expected_id: Optional[int] = None) -> None:
+        maybe_cl = await self.crawl(PurePath("."))
+        if not maybe_cl:
+            return
+        cl = maybe_cl  # Not mypy's fault, but explained here: https://github.com/python/mypy/issues/2608
+
+        elements: List[IliasPageElement] = []
+        # A list as variable redefinitions are not propagated to outer scopes
+        description: List[BeautifulSoup] = []
+
+        @_iorepeat(3, "crawling url")
+        async def gather_elements() -> None:
+            elements.clear()
+            async with cl:
+                next_stage_url: Optional[str] = url
+                current_parent = None
+
+                # Duplicated code, but the root page is special - we want to avoid fetching it twice!
+                while next_stage_url:
+                    soup = await self._get_page(next_stage_url, root_page_allowed=True)
+
+                    if current_parent is None and expected_id is not None:
+                        perma_link = IliasPage.get_soup_permalink(soup)
+                        if not perma_link or "crs_" not in perma_link:
+                            raise CrawlError("Invalid course id? Didn't find anything looking like a course")
+
+                    log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
+                    log.explain(f"URL: {next_stage_url}")
+                    page = IliasPage(soup, next_stage_url, current_parent)
+                    if next_element := page.get_next_stage_element():
+                        current_parent = next_element
+                        next_stage_url = next_element.url
+                    else:
+                        next_stage_url = None
+
+                elements.extend(page.get_child_elements())
+                if info_tab := page.get_info_tab():
+                    elements.append(info_tab)
+                if description_string := page.get_description():
+                    description.append(description_string)
+
+        # Fill up our task list with the found elements
+        await gather_elements()
+
+        if description:
+            await self._download_description(PurePath("."), description[0])
+
+        elements.sort(key=lambda e: e.id())
+
+        tasks: List[Awaitable[None]] = []
+        for element in elements:
+            if handle := await self._handle_ilias_element(PurePath("."), element):
+                tasks.append(asyncio.create_task(handle))
+
+        # And execute them
+        await self.gather(tasks)
+
+    async def _handle_ilias_page(
+        self,
+        url: str,
+        parent: IliasPageElement,
+        path: PurePath,
+    ) -> Optional[Coroutine[Any, Any, None]]:
+        maybe_cl = await self.crawl(path)
+        if not maybe_cl:
+            return None
+        return self._crawl_ilias_page(url, parent, maybe_cl)
+
+    @anoncritical
+    async def _crawl_ilias_page(
+        self,
+        url: str,
+        parent: IliasPageElement,
+        cl: CrawlToken,
+    ) -> None:
+        elements: List[IliasPageElement] = []
+        # A list as variable redefinitions are not propagated to outer scopes
+        description: List[BeautifulSoup] = []
+
+        @_iorepeat(3, "crawling folder")
+        async def gather_elements() -> None:
+            elements.clear()
+            async with cl:
+                next_stage_url: Optional[str] = url
+                current_parent = parent
+
+                while next_stage_url:
+                    soup = await self._get_page(next_stage_url)
+                    log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
+                    log.explain(f"URL: {next_stage_url}")
+                    page = IliasPage(soup, next_stage_url, current_parent)
+                    if next_element := page.get_next_stage_element():
+                        current_parent = next_element
+                        next_stage_url = next_element.url
+                    else:
+                        next_stage_url = None
+
+                elements.extend(page.get_child_elements())
+                if description_string := page.get_description():
+                    description.append(description_string)
+
+        # Fill up our task list with the found elements
+        await gather_elements()
+
+        if description:
+            await self._download_description(cl.path, description[0])
+
+        elements.sort(key=lambda e: e.id())
+
+        tasks: List[Awaitable[None]] = []
+        for element in elements:
+            if handle := await self._handle_ilias_element(cl.path, element):
+                tasks.append(asyncio.create_task(handle))
+
+        # And execute them
+        await self.gather(tasks)
+
+    # These decorators only apply *to this method* and *NOT* to the returned
+    # awaitables!
+    # This method does not await the handlers but returns them instead.
+    # This ensures one level is handled at a time and name deduplication
+    # works correctly.
+    @anoncritical
+    async def _handle_ilias_element(
+        self,
+        parent_path: PurePath,
+        element: IliasPageElement,
+    ) -> Optional[Coroutine[Any, Any, None]]:
+        if element.url in self._visited_urls:
+            raise CrawlWarning(
+                f"Found second path to element {element.name!r} at {element.url!r}. "
+                + f"First path: {fmt_path(self._visited_urls[element.url])}. "
+                + f"Second path: {fmt_path(parent_path)}."
+            )
+        self._visited_urls[element.url] = parent_path
+
+        element_path = PurePath(parent_path, element.name)
+
+        if element.type in _VIDEO_ELEMENTS:
+            if not self._videos:
+                log.status(
+                    "[bold bright_black]",
+                    "Ignored",
+                    fmt_path(element_path),
+                    "[bright_black](enable with option 'videos')"
+                )
+                return None
+
+        if element.type == IliasElementType.FILE:
+            return await self._handle_file(element, element_path)
+        elif element.type == IliasElementType.FORUM:
+            if not self._forums:
+                log.status(
+                    "[bold bright_black]",
+                    "Ignored",
+                    fmt_path(element_path),
+                    "[bright_black](enable with option 'forums')"
+                )
+                return None
+            return await self._handle_forum(element, element_path)
+        elif element.type == IliasElementType.TEST:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](tests contain no relevant data)"
+            )
+            return None
+        elif element.type == IliasElementType.SURVEY:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](surveys contain no relevant data)"
+            )
+            return None
+        elif element.type == IliasElementType.SCORM_LEARNING_MODULE:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](scorm learning modules are not supported)"
+            )
+            return None
+        elif element.type == IliasElementType.LEARNING_MODULE:
+            return await self._handle_learning_module(element, element_path)
+        elif element.type == IliasElementType.LINK:
+            return await self._handle_link(element, element_path)
+        elif element.type == IliasElementType.BOOKING:
+            return await self._handle_booking(element, element_path)
+        elif element.type == IliasElementType.OPENCAST_VIDEO:
+            return await self._handle_file(element, element_path)
+        elif element.type == IliasElementType.OPENCAST_VIDEO_PLAYER:
+            return await self._handle_opencast_video(element, element_path)
+        elif element.type == IliasElementType.MEDIACAST_VIDEO:
+            return await self._handle_file(element, element_path)
+        elif element.type in _DIRECTORY_PAGES:
+            return await self._handle_ilias_page(element.url, element, element_path)
+        else:
+            # This will retry it a few times, failing everytime. It doesn't make any network
+            # requests, so that's fine.
+            raise CrawlWarning(f"Unknown element type: {element.type!r}")
+
+    async def _handle_link(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Coroutine[Any, Any, None]]:
+        log.explain_topic(f"Decision: Crawl Link {fmt_path(element_path)}")
+        log.explain(f"Links type is {self._links}")
+
+        link_template_maybe = self._links.template()
+        link_extension = self._links.extension()
+        if not link_template_maybe or not link_extension:
+            log.explain("Answer: No")
+            return None
+        else:
+            log.explain("Answer: Yes")
+        element_path = element_path.with_name(element_path.name + link_extension)
+
+        maybe_dl = await self.download(element_path, mtime=element.mtime)
+        if not maybe_dl:
+            return None
+
+        return self._download_link(element, link_template_maybe, maybe_dl)
+
+    @anoncritical
+    @_iorepeat(3, "resolving link")
+    async def _download_link(self, element: IliasPageElement, link_template: str, dl: DownloadToken) -> None:
+        async with dl as (bar, sink):
+            export_url = element.url.replace("cmd=calldirectlink", "cmd=exportHTML")
+            real_url = await self._resolve_link_target(export_url)
+            self._write_link_content(link_template, real_url, element.name, element.description, sink)
+
+    def _write_link_content(
+        self,
+        link_template: str,
+        url: str,
+        name: str,
+        description: Optional[str],
+        sink: FileSink,
+    ) -> None:
+        content = link_template
+        content = content.replace("{{link}}", url)
+        content = content.replace("{{name}}", name)
+        content = content.replace("{{description}}", str(description))
+        content = content.replace("{{redirect_delay}}", str(self._link_file_redirect_delay))
+        sink.file.write(content.encode("utf-8"))
+        sink.done()
+
+    async def _handle_booking(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Coroutine[Any, Any, None]]:
+        log.explain_topic(f"Decision: Crawl Booking Link {fmt_path(element_path)}")
+        log.explain(f"Links type is {self._links}")
+
+        link_template_maybe = self._links.template()
+        link_extension = self._links.extension()
+        if not link_template_maybe or not link_extension:
+            log.explain("Answer: No")
+            return None
+        else:
+            log.explain("Answer: Yes")
+        element_path = element_path.with_name(element_path.name + link_extension)
+
+        maybe_dl = await self.download(element_path, mtime=element.mtime)
+        if not maybe_dl:
+            return None
+
+        return self._download_booking(element, link_template_maybe, maybe_dl)
+
+    @anoncritical
+    @_iorepeat(1, "downloading description")
+    async def _download_description(self, parent_path: PurePath, description: BeautifulSoup) -> None:
+        path = parent_path / "Description.html"
+        dl = await self.download(path, redownload=Redownload.ALWAYS)
+        if not dl:
+            return
+
+        async with dl as (bar, sink):
+            description = clean(insert_base_markup(description))
+            sink.file.write(description.prettify().encode("utf-8"))
+            sink.done()
+
+    @anoncritical
+    @_iorepeat(3, "resolving booking")
+    async def _download_booking(
+        self,
+        element: IliasPageElement,
+        link_template: str,
+        dl: DownloadToken,
+    ) -> None:
+        async with dl as (bar, sink):
+            self._write_link_content(link_template, element.url, element.name, element.description, sink)
+
+    async def _resolve_link_target(self, export_url: str) -> str:
+        async with self.session.get(export_url, allow_redirects=False) as resp:
+            # No redirect means we were authenticated
+            if hdrs.LOCATION not in resp.headers:
+                return soupify(await resp.read()).select_one("a").get("href").strip()
+
+        await self._authenticate()
+
+        async with self.session.get(export_url, allow_redirects=False) as resp:
+            # No redirect means we were authenticated
+            if hdrs.LOCATION not in resp.headers:
+                return soupify(await resp.read()).select_one("a").get("href").strip()
+
+        raise CrawlError("resolve_link_target failed even after authenticating")
+
+    async def _handle_opencast_video(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Coroutine[Any, Any, None]]:
+        # Copy old mapping as it is likely still relevant
+        if self.prev_report:
+            self.report.add_custom_value(
+                _get_video_cache_key(element),
+                self.prev_report.get_custom_value(_get_video_cache_key(element))
+            )
+
+        # A video might contain other videos, so let's "crawl" the video first
+        # to ensure rate limits apply. This must be a download as *this token*
+        # is re-used if the video consists of a single stream. In that case the
+        # file name is used and *not* the stream name the ilias html parser reported
+        # to ensure backwards compatibility.
+        maybe_dl = await self.download(element_path, mtime=element.mtime, redownload=Redownload.ALWAYS)
+
+        # If we do not want to crawl it (user filter), we can move on
+        if not maybe_dl:
+            return None
+
+        # If we have every file from the cached mapping already, we can ignore this and bail
+        if self._all_opencast_videos_locally_present(element, maybe_dl.path):
+            # Mark all existing videos as known to ensure they do not get deleted during cleanup.
+            # We "downloaded" them, just without actually making a network request as we assumed
+            # they did not change.
+            contained = self._previous_contained_opencast_videos(element, maybe_dl.path)
+            if len(contained) > 1:
+                # Only do this if we threw away the original dl token,
+                # to not download single-stream videos twice
+                for video in contained:
+                    await self.download(video)
+
+            return None
+
+        return self._download_opencast_video(element, maybe_dl)
+
+    def _previous_contained_opencast_videos(
+        self, element: IliasPageElement, element_path: PurePath
+    ) -> List[PurePath]:
+        if not self.prev_report:
+            return []
+        custom_value = self.prev_report.get_custom_value(_get_video_cache_key(element))
+        if not custom_value:
+            return []
+        cached_value = cast(dict[str, Any], custom_value)
+        if "known_paths" not in cached_value or "own_path" not in cached_value:
+            log.explain(f"'known_paths' or 'own_path' missing from cached value: {cached_value}")
+            return []
+        transformed_own_path = self._transformer.transform(element_path)
+        if cached_value["own_path"] != str(transformed_own_path):
+            log.explain(
+                f"own_path '{transformed_own_path}' does not match cached value: '{cached_value['own_path']}"
+            )
+            return []
+        return [PurePath(name) for name in cached_value["known_paths"]]
+
+    def _all_opencast_videos_locally_present(self, element: IliasPageElement, element_path: PurePath) -> bool:
+        log.explain_topic(f"Checking local cache for video {fmt_path(element_path)}")
+        if contained_videos := self._previous_contained_opencast_videos(element, element_path):
+            log.explain(
+                f"The following contained videos are known: {','.join(map(fmt_path, contained_videos))}"
+            )
+            if all(self._output_dir.resolve(path).exists() for path in contained_videos):
+                log.explain("Found all known videos locally, skipping enumeration request")
+                return True
+            log.explain("Missing at least one video, continuing with requests!")
+        else:
+            log.explain("No local cache present")
+        return False
+
+    @anoncritical
+    @_iorepeat(3, "downloading video")
+    async def _download_opencast_video(self, element: IliasPageElement, dl: DownloadToken) -> None:
+        def add_to_report(paths: list[str]) -> None:
+            self.report.add_custom_value(
+                _get_video_cache_key(element),
+                {"known_paths": paths, "own_path": str(self._transformer.transform(dl.path))}
+            )
+
+        async with dl as (bar, sink):
+            page = IliasPage(await self._get_page(element.url), element.url, element)
+            stream_elements = page.get_child_elements()
+
+            if len(stream_elements) > 1:
+                log.explain(f"Found multiple video streams for {element.name}")
+            else:
+                log.explain(f"Using single video mode for {element.name}")
+                stream_element = stream_elements[0]
+
+                # We do not have a local cache yet
+                await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
+                add_to_report([str(self._transformer.transform(dl.path))])
+                return
+
+        contained_video_paths: List[str] = []
+
+        for stream_element in stream_elements:
+            video_path = dl.path.parent / stream_element.name
+
+            maybe_dl = await self.download(video_path, mtime=element.mtime, redownload=Redownload.NEVER)
+            if not maybe_dl:
+                continue
+            async with maybe_dl as (bar, sink):
+                log.explain(f"Streaming video from real url {stream_element.url}")
+                contained_video_paths.append(str(self._transformer.transform(maybe_dl.path)))
+                await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
+
+        add_to_report(contained_video_paths)
+
+    async def _handle_file(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Coroutine[Any, Any, None]]:
+        maybe_dl = await self.download(element_path, mtime=element.mtime)
+        if not maybe_dl:
+            return None
+        return self._download_file(element, maybe_dl)
+
+    @_iorepeat(3, "downloading file")
+    @anoncritical
+    async def _download_file(self, element: IliasPageElement, dl: DownloadToken) -> None:
+        assert dl  # The function is only reached when dl is not None
+        async with dl as (bar, sink):
+            await self._stream_from_url(element.url, sink, bar, is_video=False)
+
+    async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar, is_video: bool) -> None:
+        async def try_stream() -> bool:
+            next_url = url
+
+            # Normal files redirect to the magazine if we are not authenticated. As files could be HTML,
+            # we can not match on the content type here. Instead, we disallow redirects and inspect the
+            # new location. If we are redirected anywhere but the ILIAS 8 "sendfile" command, we assume
+            # our authentication expired.
+            if not is_video:
+                async with self.session.get(url, allow_redirects=False) as resp:
+                    # Redirect to anything except a "sendfile" means we weren't authenticated
+                    if hdrs.LOCATION in resp.headers:
+                        if "&cmd=sendfile" not in resp.headers[hdrs.LOCATION]:
+                            return False
+                        # Directly follow the redirect to not make a second, unnecessary request
+                        next_url = resp.headers[hdrs.LOCATION]
+
+            # Let's try this again and follow redirects
+            return await fetch_follow_redirects(next_url)
+
+        async def fetch_follow_redirects(file_url: str) -> bool:
+            async with self.session.get(file_url) as resp:
+                # We wanted a video but got HTML => Forbidden, auth expired. Logging in won't really
+                # solve that depending on the setup, but it is better than nothing.
+                if is_video and "html" in resp.content_type:
+                    return False
+
+                if resp.content_length:
+                    bar.set_total(resp.content_length)
+
+                async for data in resp.content.iter_chunked(1024):
+                    sink.file.write(data)
+                    bar.advance(len(data))
+
+                sink.done()
+            return True
+
+        auth_id = await self._current_auth_id()
+        if await try_stream():
+            return
+
+        await self.authenticate(auth_id)
+
+        if not await try_stream():
+            raise CrawlError("File streaming failed after authenticate()")
+
+    async def _handle_forum(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Coroutine[Any, Any, None]]:
+        maybe_cl = await self.crawl(element_path)
+        if not maybe_cl:
+            return None
+        return self._crawl_forum(element, maybe_cl)
+
+    @_iorepeat(3, "crawling forum")
+    @anoncritical
+    async def _crawl_forum(self, element: IliasPageElement, cl: CrawlToken) -> None:
+        elements: List[IliasForumThread] = []
+
+        async with cl:
+            next_stage_url = element.url
+            while next_stage_url:
+                log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
+                log.explain(f"URL: {next_stage_url}")
+
+                soup = await self._get_page(next_stage_url)
+                page = IliasPage(soup, next_stage_url, element)
+
+                if next := page.get_next_stage_element():
+                    next_stage_url = next.url
+                else:
+                    break
+
+            download_data = page.get_download_forum_data()
+            if not download_data:
+                raise CrawlWarning("Failed to extract forum data")
+            if download_data.empty:
+                log.explain("Forum had no threads")
+                return
+            html = await self._post_authenticated(download_data.url, download_data.form_data)
+            elements = parse_ilias_forum_export(soupify(html))
+
+        elements.sort(key=lambda elem: elem.title)
+
+        tasks: List[Awaitable[None]] = []
+        for elem in elements:
+            tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, elem)))
+
+        # And execute them
+        await self.gather(tasks)
+
+    @anoncritical
+    @_iorepeat(3, "saving forum thread")
+    async def _download_forum_thread(
+        self,
+        parent_path: PurePath,
+        element: IliasForumThread,
+    ) -> None:
+        path = parent_path / (_sanitize_path_name(element.title) + ".html")
+        maybe_dl = await self.download(path, mtime=element.mtime)
+        if not maybe_dl:
+            return
+
+        async with maybe_dl as (bar, sink):
+            content = element.title_tag.prettify()
+            content += element.content_tag.prettify()
+            sink.file.write(content.encode("utf-8"))
+            sink.done()
+
+    async def _handle_learning_module(
+        self,
+        element: IliasPageElement,
+        element_path: PurePath,
+    ) -> Optional[Coroutine[Any, Any, None]]:
+        maybe_cl = await self.crawl(element_path)
+        if not maybe_cl:
+            return None
+        return self._crawl_learning_module(element, maybe_cl)
+
+    @_iorepeat(3, "crawling learning module")
+    @anoncritical
+    async def _crawl_learning_module(self, element: IliasPageElement, cl: CrawlToken) -> None:
+        elements: List[IliasLearningModulePage] = []
+
+        async with cl:
+            log.explain_topic(f"Parsing initial HTML page for {fmt_path(cl.path)}")
+            log.explain(f"URL: {element.url}")
+            soup = await self._get_page(element.url)
+            page = IliasPage(soup, element.url, element)
+            if next := page.get_learning_module_data():
+                elements.extend(await self._crawl_learning_module_direction(
+                    cl.path, next.previous_url, "left", element
+                ))
+                elements.append(next)
+                elements.extend(await self._crawl_learning_module_direction(
+                    cl.path, next.next_url, "right", element
+                ))
+
+        # Reflect their natural ordering in the file names
+        for index, lm_element in enumerate(elements):
+            lm_element.title = f"{index:02}_{lm_element.title}"
+
+        tasks: List[Awaitable[None]] = []
+        for index, elem in enumerate(elements):
+            prev_url = elements[index - 1].title if index > 0 else None
+            next_url = elements[index + 1].title if index < len(elements) - 1 else None
+            tasks.append(asyncio.create_task(
+                self._download_learning_module_page(cl.path, elem, prev_url, next_url)
+            ))
+
+        # And execute them
+        await self.gather(tasks)
+
+    async def _crawl_learning_module_direction(
+        self,
+        path: PurePath,
+        start_url: Optional[str],
+        dir: Union[Literal["left"], Literal["right"]],
+        parent_element: IliasPageElement
+    ) -> List[IliasLearningModulePage]:
+        elements: List[IliasLearningModulePage] = []
+
+        if not start_url:
+            return elements
+
+        next_element_url: Optional[str] = start_url
+        counter = 0
+        while next_element_url:
+            log.explain_topic(f"Parsing HTML page for {fmt_path(path)} ({dir}-{counter})")
+            log.explain(f"URL: {next_element_url}")
+            soup = await self._get_page(next_element_url)
+            page = IliasPage(soup, next_element_url, parent_element)
+            if next := page.get_learning_module_data():
+                elements.append(next)
+                if dir == "left":
+                    next_element_url = next.previous_url
+                else:
+                    next_element_url = next.next_url
+            counter += 1
+
+        return elements
+
+    @anoncritical
+    @_iorepeat(3, "saving learning module page")
+    async def _download_learning_module_page(
+        self,
+        parent_path: PurePath,
+        element: IliasLearningModulePage,
+        prev: Optional[str],
+        next: Optional[str]
+    ) -> None:
+        path = parent_path / (_sanitize_path_name(element.title) + ".html")
+        maybe_dl = await self.download(path)
+        if not maybe_dl:
+            return
+        my_path = self._transformer.transform(maybe_dl.path)
+        if not my_path:
+            return
+
+        if prev:
+            prev_p = self._transformer.transform(parent_path / (_sanitize_path_name(prev) + ".html"))
+            if prev_p:
+                prev = os.path.relpath(prev_p, my_path.parent)
+            else:
+                prev = None
+        if next:
+            next_p = self._transformer.transform(parent_path / (_sanitize_path_name(next) + ".html"))
+            if next_p:
+                next = os.path.relpath(next_p, my_path.parent)
+            else:
+                next = None
+
+        async with maybe_dl as (bar, sink):
+            content = element.content
+            content = await self.internalize_images(content)
+            sink.file.write(learning_module_template(content, maybe_dl.path.name, prev, next).encode("utf-8"))
+            sink.done()
+
+    async def internalize_images(self, tag: Tag) -> Tag:
+        """
+        Tries to fetch ILIAS images and embed them as base64 data.
+        """
+        log.explain_topic("Internalizing images")
+        for elem in tag.find_all(recursive=True):
+            if not isinstance(elem, Tag):
+                continue
+            if elem.name == "img":
+                if src := elem.attrs.get("src", None):
+                    url = urljoin(_ILIAS_URL, src)
+                    if not url.startswith(_ILIAS_URL):
+                        continue
+                    log.explain(f"Internalizing {url!r}")
+                    img = await self._get_authenticated(url)
+                    elem.attrs["src"] = "data:;base64," + base64.b64encode(img).decode()
+            if elem.name == "iframe" and elem.attrs.get("src", "").startswith("//"):
+                # For unknown reasons the protocol seems to be stripped.
+                elem.attrs["src"] = "https:" + elem.attrs["src"]
+        return tag
+
+    async def _get_page(self, url: str, root_page_allowed: bool = False) -> BeautifulSoup:
+        auth_id = await self._current_auth_id()
+        async with self.session.get(url) as request:
+            soup = soupify(await request.read())
+            if IliasPage.is_logged_in(soup):
+                return self._verify_page(soup, url, root_page_allowed)
+
+        # We weren't authenticated, so try to do that
+        await self.authenticate(auth_id)
+
+        # Retry once after authenticating. If this fails, we will die.
+        async with self.session.get(url) as request:
+            soup = soupify(await request.read())
+            if IliasPage.is_logged_in(soup):
+                return self._verify_page(soup, url, root_page_allowed)
+        raise CrawlError(f"get_page failed even after authenticating on {url!r}")
+
+    @staticmethod
+    def _verify_page(soup: BeautifulSoup, url: str, root_page_allowed: bool) -> BeautifulSoup:
+        if IliasPage.is_root_page(soup) and not root_page_allowed:
+            raise CrawlError(
+                "Unexpectedly encountered ILIAS root page. "
+                "This usually happens because the ILIAS instance is broken. "
+                "If so, wait a day or two and try again. "
+                "It could also happen because a crawled element links to the ILIAS root page. "
+                "If so, use a transform with a ! as target to ignore the particular element. "
+                f"The redirect came from {url}"
+            )
+        return soup
+
+    async def _post_authenticated(
+        self,
+        url: str,
+        data: dict[str, Union[str, List[str]]]
+    ) -> bytes:
+        auth_id = await self._current_auth_id()
+
+        form_data = aiohttp.FormData()
+        for key, val in data.items():
+            form_data.add_field(key, val)
+
+        async with self.session.post(url, data=form_data(), allow_redirects=False) as request:
+            if request.status == 200:
+                return await request.read()
+
+        # We weren't authenticated, so try to do that
+        await self.authenticate(auth_id)
+
+        # Retry once after authenticating. If this fails, we will die.
+        async with self.session.post(url, data=data, allow_redirects=False) as request:
+            if request.status == 200:
+                return await request.read()
+        raise CrawlError("post_authenticated failed even after authenticating")
+
+    async def _get_authenticated(self, url: str) -> bytes:
+        auth_id = await self._current_auth_id()
+
+        async with self.session.get(url, allow_redirects=False) as request:
+            if request.status == 200:
+                return await request.read()
+
+        # We weren't authenticated, so try to do that
+        await self.authenticate(auth_id)
+
+        # Retry once after authenticating. If this fails, we will die.
+        async with self.session.get(url, allow_redirects=False) as request:
+            if request.status == 200:
+                return await request.read()
+        raise CrawlError("get_authenticated failed even after authenticating")
+
+    # ToDo: Is this still required?
+    @_iorepeat(3, "Login", failure_is_error=True)
+    async def _authenticate(self) -> None:
+        pass
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 371ffb3..e9d1475 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -1,28 +1,15 @@
-import asyncio
-import base64
-import os
-import re
-from collections.abc import Awaitable, Coroutine
-from pathlib import PurePath
-from typing import Any, Callable, Dict, List, Literal, Optional, Set, Union, cast
-from urllib.parse import urljoin
+from typing import Any, Dict, Optional, Union
 
 import aiohttp
 import yarl
-from aiohttp import hdrs
-from bs4 import BeautifulSoup, Tag
+from bs4 import BeautifulSoup
 
 from ...auth import Authenticator, TfaAuthenticator
 from ...config import Config
-from ...logging import ProgressBar, log
-from ...output_dir import FileSink, Redownload
-from ...utils import fmt_path, soupify, url_set_query_param
-from ..crawler import AWrapped, CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
-from ..http_crawler import HttpCrawler, HttpCrawlerSection
-from .file_templates import Links, learning_module_template
-from .ilias_html_cleaner import clean, insert_base_markup
-from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
-                             IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
+from ...logging import log
+from ...utils import _iorepeat, soupify
+from ..crawler import CrawlError, CrawlWarning
+from .ilias_web_crawler import IliasWebCrawler, IliasWebCrawlerSection
 
 TargetType = Union[str, int]
 
@@ -33,24 +20,7 @@ class KitShibbolethBackgroundLoginSuccessful():
     pass
 
 
-class KitIliasWebCrawlerSection(HttpCrawlerSection):
-    def target(self) -> TargetType:
-        target = self.s.get("target")
-        if not target:
-            self.missing_value("target")
-
-        if re.fullmatch(r"\d+", target):
-            # Course id
-            return int(target)
-        if target == "desktop":
-            # Full personal desktop
-            return target
-        if target.startswith(_ILIAS_URL):
-            # ILIAS URL
-            return target
-
-        self.invalid_value("target", target, "Should be <course id | desktop | kit ilias URL>")
-
+class KitIliasWebCrawlerSection(IliasWebCrawlerSection):
     def tfa_auth(self, authenticators: Dict[str, Authenticator]) -> Optional[Authenticator]:
         value: Optional[str] = self.s.get("tfa_auth")
         if value is None:
@@ -60,123 +30,8 @@ class KitIliasWebCrawlerSection(HttpCrawlerSection):
             self.invalid_value("tfa_auth", value, "No such auth section exists")
         return auth
 
-    def links(self) -> Links:
-        type_str: Optional[str] = self.s.get("links")
 
-        if type_str is None:
-            return Links.FANCY
-
-        try:
-            return Links.from_string(type_str)
-        except ValueError as e:
-            self.invalid_value("links", type_str, str(e).capitalize())
-
-    def link_redirect_delay(self) -> int:
-        return self.s.getint("link_redirect_delay", fallback=-1)
-
-    def videos(self) -> bool:
-        return self.s.getboolean("videos", fallback=False)
-
-    def forums(self) -> bool:
-        return self.s.getboolean("forums", fallback=False)
-
-
-_DIRECTORY_PAGES: Set[IliasElementType] = {
-    IliasElementType.EXERCISE,
-    IliasElementType.EXERCISE_FILES,
-    IliasElementType.FOLDER,
-    IliasElementType.INFO_TAB,
-    IliasElementType.MEETING,
-    IliasElementType.MEDIACAST_VIDEO_FOLDER,
-    IliasElementType.OPENCAST_VIDEO_FOLDER,
-    IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
-}
-
-_VIDEO_ELEMENTS: Set[IliasElementType] = {
-    IliasElementType.MEDIACAST_VIDEO_FOLDER,
-    IliasElementType.MEDIACAST_VIDEO,
-    IliasElementType.OPENCAST_VIDEO,
-    IliasElementType.OPENCAST_VIDEO_PLAYER,
-    IliasElementType.OPENCAST_VIDEO_FOLDER,
-    IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
-}
-
-
-def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Callable[[AWrapped], AWrapped]:
-    def decorator(f: AWrapped) -> AWrapped:
-        async def wrapper(*args: Any, **kwargs: Any) -> Optional[Any]:
-            last_exception: Optional[BaseException] = None
-            for round in range(attempts):
-                try:
-                    return await f(*args, **kwargs)
-                except aiohttp.ContentTypeError:  # invalid content type
-                    raise CrawlWarning("ILIAS returned an invalid content type")
-                except aiohttp.TooManyRedirects:
-                    raise CrawlWarning("Got stuck in a redirect loop")
-                except aiohttp.ClientPayloadError as e:  # encoding or not enough bytes
-                    last_exception = e
-                except aiohttp.ClientConnectionError as e:  # e.g. timeout, disconnect, resolve failed, etc.
-                    last_exception = e
-                except asyncio.exceptions.TimeoutError as e:  # explicit http timeouts in HttpCrawler
-                    last_exception = e
-                log.explain_topic(f"Retrying operation {name}. Retries left: {attempts - 1 - round}")
-
-            if last_exception:
-                message = f"Error in I/O Operation: {last_exception}"
-                if failure_is_error:
-                    raise CrawlError(message) from last_exception
-                else:
-                    raise CrawlWarning(message) from last_exception
-            raise CrawlError("Impossible return in ilias _iorepeat")
-
-        return wrapper  # type: ignore
-
-    return decorator
-
-
-def _wrap_io_in_warning(name: str) -> Callable[[AWrapped], AWrapped]:
-    """
-    Wraps any I/O exception in a CrawlWarning.
-    """
-    return _iorepeat(1, name)
-
-
-def _get_video_cache_key(element: IliasPageElement) -> str:
-    return f"ilias-video-cache-{element.id()}"
-
-
-# Crawler control flow:
-#
-#     crawl_desktop -+
-#                    |
-#     crawl_course --+
-#                    |
-#     @_io_repeat    |        # retries internally (before the bar)
-#  +- crawl_url    <-+
-#  |
-#  |
-#  |  @_wrap_io_exception     # does not need to retry as children acquire bars
-#  +> crawl_ilias_element -+
-#  ^                       |
-#  |  @_io_repeat          |  # retries internally (before the bar)
-#  +- crawl_ilias_page <---+
-#  |                       |
-#  +> get_page             |  # Handles and retries authentication
-#                          |
-#     @_io_repeat          |  # retries internally (before the bar)
-#  +- download_link    <---+
-#  |                       |
-#  +> resolve_target       |  # Handles and retries authentication
-#                          |
-#     @_io_repeat          |  # retries internally (before the bar)
-#  +- download_video   <---+
-#  |                       |
-#  |  @_io_repeat          |  # retries internally (before the bar)
-#  +- download_file    <---+
-#  |
-#  +> stream_from_url         # Handles and retries authentication
-
-class KitIliasWebCrawler(HttpCrawler):
+class KitIliasWebCrawler(IliasWebCrawler):
     def __init__(
         self,
         name: str,
@@ -184,806 +39,13 @@ class KitIliasWebCrawler(HttpCrawler):
         config: Config,
         authenticators: Dict[str, Authenticator]
     ):
-        # Setting a main authenticator for cookie sharing
-        auth = section.auth(authenticators)
-        super().__init__(name, section, config, shared_auth=auth)
-
-        if section.tasks() > 1:
-            log.warn("""
-Please avoid using too many parallel requests as these are the KIT ILIAS
-instance's greatest bottleneck.
-            """.strip())
+        super().__init__(name, section, config, authenticators)
 
         self._shibboleth_login = KitShibbolethLogin(
-            auth,
+            self._auth,
             section.tfa_auth(authenticators),
         )
 
-        self._base_url = _ILIAS_URL
-
-        self._target = section.target()
-        self._link_file_redirect_delay = section.link_redirect_delay()
-        self._links = section.links()
-        self._videos = section.videos()
-        self._forums = section.forums()
-        self._visited_urls: Dict[str, PurePath] = dict()
-
-    async def _run(self) -> None:
-        if isinstance(self._target, int):
-            log.explain_topic(f"Inferred crawl target: Course with id {self._target}")
-            await self._crawl_course(self._target)
-        elif self._target == "desktop":
-            log.explain_topic("Inferred crawl target: Personal desktop")
-            await self._crawl_desktop()
-        else:
-            log.explain_topic(f"Inferred crawl target: URL {self._target}")
-            await self._crawl_url(self._target)
-
-    async def _crawl_course(self, course_id: int) -> None:
-        # Start crawling at the given course
-        root_url = url_set_query_param(
-            self._base_url + "/goto.php", "target", f"crs_{course_id}"
-        )
-
-        await self._crawl_url(root_url, expected_id=course_id)
-
-    async def _crawl_desktop(self) -> None:
-        appendix = r"ILIAS\Repository\Provider\RepositoryMainBarProvider|mm_pd_sel_items"
-        appendix = appendix.encode("ASCII").hex()
-        await self._crawl_url(self._base_url + "/gs_content.php?item=" + appendix)
-
-    async def _crawl_url(self, url: str, expected_id: Optional[int] = None) -> None:
-        maybe_cl = await self.crawl(PurePath("."))
-        if not maybe_cl:
-            return
-        cl = maybe_cl  # Not mypy's fault, but explained here: https://github.com/python/mypy/issues/2608
-
-        elements: List[IliasPageElement] = []
-        # A list as variable redefinitions are not propagated to outer scopes
-        description: List[BeautifulSoup] = []
-
-        @_iorepeat(3, "crawling url")
-        async def gather_elements() -> None:
-            elements.clear()
-            async with cl:
-                next_stage_url: Optional[str] = url
-                current_parent = None
-
-                # Duplicated code, but the root page is special - we want to avoid fetching it twice!
-                while next_stage_url:
-                    soup = await self._get_page(next_stage_url, root_page_allowed=True)
-
-                    if current_parent is None and expected_id is not None:
-                        perma_link = IliasPage.get_soup_permalink(soup)
-                        if not perma_link or "crs_" not in perma_link:
-                            raise CrawlError("Invalid course id? Didn't find anything looking like a course")
-
-                    log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
-                    log.explain(f"URL: {next_stage_url}")
-                    page = IliasPage(soup, next_stage_url, current_parent)
-                    if next_element := page.get_next_stage_element():
-                        current_parent = next_element
-                        next_stage_url = next_element.url
-                    else:
-                        next_stage_url = None
-
-                elements.extend(page.get_child_elements())
-                if info_tab := page.get_info_tab():
-                    elements.append(info_tab)
-                if description_string := page.get_description():
-                    description.append(description_string)
-
-        # Fill up our task list with the found elements
-        await gather_elements()
-
-        if description:
-            await self._download_description(PurePath("."), description[0])
-
-        elements.sort(key=lambda e: e.id())
-
-        tasks: List[Awaitable[None]] = []
-        for element in elements:
-            if handle := await self._handle_ilias_element(PurePath("."), element):
-                tasks.append(asyncio.create_task(handle))
-
-        # And execute them
-        await self.gather(tasks)
-
-    async def _handle_ilias_page(
-        self,
-        url: str,
-        parent: IliasPageElement,
-        path: PurePath,
-    ) -> Optional[Coroutine[Any, Any, None]]:
-        maybe_cl = await self.crawl(path)
-        if not maybe_cl:
-            return None
-        return self._crawl_ilias_page(url, parent, maybe_cl)
-
-    @anoncritical
-    async def _crawl_ilias_page(
-        self,
-        url: str,
-        parent: IliasPageElement,
-        cl: CrawlToken,
-    ) -> None:
-        elements: List[IliasPageElement] = []
-        # A list as variable redefinitions are not propagated to outer scopes
-        description: List[BeautifulSoup] = []
-
-        @_iorepeat(3, "crawling folder")
-        async def gather_elements() -> None:
-            elements.clear()
-            async with cl:
-                next_stage_url: Optional[str] = url
-                current_parent = parent
-
-                while next_stage_url:
-                    soup = await self._get_page(next_stage_url)
-                    log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
-                    log.explain(f"URL: {next_stage_url}")
-                    page = IliasPage(soup, next_stage_url, current_parent)
-                    if next_element := page.get_next_stage_element():
-                        current_parent = next_element
-                        next_stage_url = next_element.url
-                    else:
-                        next_stage_url = None
-
-                elements.extend(page.get_child_elements())
-                if description_string := page.get_description():
-                    description.append(description_string)
-
-        # Fill up our task list with the found elements
-        await gather_elements()
-
-        if description:
-            await self._download_description(cl.path, description[0])
-
-        elements.sort(key=lambda e: e.id())
-
-        tasks: List[Awaitable[None]] = []
-        for element in elements:
-            if handle := await self._handle_ilias_element(cl.path, element):
-                tasks.append(asyncio.create_task(handle))
-
-        # And execute them
-        await self.gather(tasks)
-
-    # These decorators only apply *to this method* and *NOT* to the returned
-    # awaitables!
-    # This method does not await the handlers but returns them instead.
-    # This ensures one level is handled at a time and name deduplication
-    # works correctly.
-    @anoncritical
-    async def _handle_ilias_element(
-        self,
-        parent_path: PurePath,
-        element: IliasPageElement,
-    ) -> Optional[Coroutine[Any, Any, None]]:
-        if element.url in self._visited_urls:
-            raise CrawlWarning(
-                f"Found second path to element {element.name!r} at {element.url!r}. "
-                + f"First path: {fmt_path(self._visited_urls[element.url])}. "
-                + f"Second path: {fmt_path(parent_path)}."
-            )
-        self._visited_urls[element.url] = parent_path
-
-        element_path = PurePath(parent_path, element.name)
-
-        if element.type in _VIDEO_ELEMENTS:
-            if not self._videos:
-                log.status(
-                    "[bold bright_black]",
-                    "Ignored",
-                    fmt_path(element_path),
-                    "[bright_black](enable with option 'videos')"
-                )
-                return None
-
-        if element.type == IliasElementType.FILE:
-            return await self._handle_file(element, element_path)
-        elif element.type == IliasElementType.FORUM:
-            if not self._forums:
-                log.status(
-                    "[bold bright_black]",
-                    "Ignored",
-                    fmt_path(element_path),
-                    "[bright_black](enable with option 'forums')"
-                )
-                return None
-            return await self._handle_forum(element, element_path)
-        elif element.type == IliasElementType.TEST:
-            log.status(
-                "[bold bright_black]",
-                "Ignored",
-                fmt_path(element_path),
-                "[bright_black](tests contain no relevant data)"
-            )
-            return None
-        elif element.type == IliasElementType.SURVEY:
-            log.status(
-                "[bold bright_black]",
-                "Ignored",
-                fmt_path(element_path),
-                "[bright_black](surveys contain no relevant data)"
-            )
-            return None
-        elif element.type == IliasElementType.SCORM_LEARNING_MODULE:
-            log.status(
-                "[bold bright_black]",
-                "Ignored",
-                fmt_path(element_path),
-                "[bright_black](scorm learning modules are not supported)"
-            )
-            return None
-        elif element.type == IliasElementType.LEARNING_MODULE:
-            return await self._handle_learning_module(element, element_path)
-        elif element.type == IliasElementType.LINK:
-            return await self._handle_link(element, element_path)
-        elif element.type == IliasElementType.BOOKING:
-            return await self._handle_booking(element, element_path)
-        elif element.type == IliasElementType.OPENCAST_VIDEO:
-            return await self._handle_file(element, element_path)
-        elif element.type == IliasElementType.OPENCAST_VIDEO_PLAYER:
-            return await self._handle_opencast_video(element, element_path)
-        elif element.type == IliasElementType.MEDIACAST_VIDEO:
-            return await self._handle_file(element, element_path)
-        elif element.type in _DIRECTORY_PAGES:
-            return await self._handle_ilias_page(element.url, element, element_path)
-        else:
-            # This will retry it a few times, failing everytime. It doesn't make any network
-            # requests, so that's fine.
-            raise CrawlWarning(f"Unknown element type: {element.type!r}")
-
-    async def _handle_link(
-        self,
-        element: IliasPageElement,
-        element_path: PurePath,
-    ) -> Optional[Coroutine[Any, Any, None]]:
-        log.explain_topic(f"Decision: Crawl Link {fmt_path(element_path)}")
-        log.explain(f"Links type is {self._links}")
-
-        link_template_maybe = self._links.template()
-        link_extension = self._links.extension()
-        if not link_template_maybe or not link_extension:
-            log.explain("Answer: No")
-            return None
-        else:
-            log.explain("Answer: Yes")
-        element_path = element_path.with_name(element_path.name + link_extension)
-
-        maybe_dl = await self.download(element_path, mtime=element.mtime)
-        if not maybe_dl:
-            return None
-
-        return self._download_link(element, link_template_maybe, maybe_dl)
-
-    @anoncritical
-    @_iorepeat(3, "resolving link")
-    async def _download_link(self, element: IliasPageElement, link_template: str, dl: DownloadToken) -> None:
-        async with dl as (bar, sink):
-            export_url = element.url.replace("cmd=calldirectlink", "cmd=exportHTML")
-            real_url = await self._resolve_link_target(export_url)
-            self._write_link_content(link_template, real_url, element.name, element.description, sink)
-
-    def _write_link_content(
-        self,
-        link_template: str,
-        url: str,
-        name: str,
-        description: Optional[str],
-        sink: FileSink,
-    ) -> None:
-        content = link_template
-        content = content.replace("{{link}}", url)
-        content = content.replace("{{name}}", name)
-        content = content.replace("{{description}}", str(description))
-        content = content.replace("{{redirect_delay}}", str(self._link_file_redirect_delay))
-        sink.file.write(content.encode("utf-8"))
-        sink.done()
-
-    async def _handle_booking(
-        self,
-        element: IliasPageElement,
-        element_path: PurePath,
-    ) -> Optional[Coroutine[Any, Any, None]]:
-        log.explain_topic(f"Decision: Crawl Booking Link {fmt_path(element_path)}")
-        log.explain(f"Links type is {self._links}")
-
-        link_template_maybe = self._links.template()
-        link_extension = self._links.extension()
-        if not link_template_maybe or not link_extension:
-            log.explain("Answer: No")
-            return None
-        else:
-            log.explain("Answer: Yes")
-        element_path = element_path.with_name(element_path.name + link_extension)
-
-        maybe_dl = await self.download(element_path, mtime=element.mtime)
-        if not maybe_dl:
-            return None
-
-        return self._download_booking(element, link_template_maybe, maybe_dl)
-
-    @anoncritical
-    @_iorepeat(1, "downloading description")
-    async def _download_description(self, parent_path: PurePath, description: BeautifulSoup) -> None:
-        path = parent_path / "Description.html"
-        dl = await self.download(path, redownload=Redownload.ALWAYS)
-        if not dl:
-            return
-
-        async with dl as (bar, sink):
-            description = clean(insert_base_markup(description))
-            sink.file.write(description.prettify().encode("utf-8"))
-            sink.done()
-
-    @anoncritical
-    @_iorepeat(3, "resolving booking")
-    async def _download_booking(
-        self,
-        element: IliasPageElement,
-        link_template: str,
-        dl: DownloadToken,
-    ) -> None:
-        async with dl as (bar, sink):
-            self._write_link_content(link_template, element.url, element.name, element.description, sink)
-
-    async def _resolve_link_target(self, export_url: str) -> str:
-        async with self.session.get(export_url, allow_redirects=False) as resp:
-            # No redirect means we were authenticated
-            if hdrs.LOCATION not in resp.headers:
-                return soupify(await resp.read()).select_one("a").get("href").strip()
-
-        await self._authenticate()
-
-        async with self.session.get(export_url, allow_redirects=False) as resp:
-            # No redirect means we were authenticated
-            if hdrs.LOCATION not in resp.headers:
-                return soupify(await resp.read()).select_one("a").get("href").strip()
-
-        raise CrawlError("resolve_link_target failed even after authenticating")
-
-    async def _handle_opencast_video(
-        self,
-        element: IliasPageElement,
-        element_path: PurePath,
-    ) -> Optional[Coroutine[Any, Any, None]]:
-        # Copy old mapping as it is likely still relevant
-        if self.prev_report:
-            self.report.add_custom_value(
-                _get_video_cache_key(element),
-                self.prev_report.get_custom_value(_get_video_cache_key(element))
-            )
-
-        # A video might contain other videos, so let's "crawl" the video first
-        # to ensure rate limits apply. This must be a download as *this token*
-        # is re-used if the video consists of a single stream. In that case the
-        # file name is used and *not* the stream name the ilias html parser reported
-        # to ensure backwards compatibility.
-        maybe_dl = await self.download(element_path, mtime=element.mtime, redownload=Redownload.ALWAYS)
-
-        # If we do not want to crawl it (user filter), we can move on
-        if not maybe_dl:
-            return None
-
-        # If we have every file from the cached mapping already, we can ignore this and bail
-        if self._all_opencast_videos_locally_present(element, maybe_dl.path):
-            # Mark all existing videos as known to ensure they do not get deleted during cleanup.
-            # We "downloaded" them, just without actually making a network request as we assumed
-            # they did not change.
-            contained = self._previous_contained_opencast_videos(element, maybe_dl.path)
-            if len(contained) > 1:
-                # Only do this if we threw away the original dl token,
-                # to not download single-stream videos twice
-                for video in contained:
-                    await self.download(video)
-
-            return None
-
-        return self._download_opencast_video(element, maybe_dl)
-
-    def _previous_contained_opencast_videos(
-        self, element: IliasPageElement, element_path: PurePath
-    ) -> List[PurePath]:
-        if not self.prev_report:
-            return []
-        custom_value = self.prev_report.get_custom_value(_get_video_cache_key(element))
-        if not custom_value:
-            return []
-        cached_value = cast(dict[str, Any], custom_value)
-        if "known_paths" not in cached_value or "own_path" not in cached_value:
-            log.explain(f"'known_paths' or 'own_path' missing from cached value: {cached_value}")
-            return []
-        transformed_own_path = self._transformer.transform(element_path)
-        if cached_value["own_path"] != str(transformed_own_path):
-            log.explain(
-                f"own_path '{transformed_own_path}' does not match cached value: '{cached_value['own_path']}"
-            )
-            return []
-        return [PurePath(name) for name in cached_value["known_paths"]]
-
-    def _all_opencast_videos_locally_present(self, element: IliasPageElement, element_path: PurePath) -> bool:
-        log.explain_topic(f"Checking local cache for video {fmt_path(element_path)}")
-        if contained_videos := self._previous_contained_opencast_videos(element, element_path):
-            log.explain(
-                f"The following contained videos are known: {','.join(map(fmt_path, contained_videos))}"
-            )
-            if all(self._output_dir.resolve(path).exists() for path in contained_videos):
-                log.explain("Found all known videos locally, skipping enumeration request")
-                return True
-            log.explain("Missing at least one video, continuing with requests!")
-        else:
-            log.explain("No local cache present")
-        return False
-
-    @anoncritical
-    @_iorepeat(3, "downloading video")
-    async def _download_opencast_video(self, element: IliasPageElement, dl: DownloadToken) -> None:
-        def add_to_report(paths: list[str]) -> None:
-            self.report.add_custom_value(
-                _get_video_cache_key(element),
-                {"known_paths": paths, "own_path": str(self._transformer.transform(dl.path))}
-            )
-
-        async with dl as (bar, sink):
-            page = IliasPage(await self._get_page(element.url), element.url, element)
-            stream_elements = page.get_child_elements()
-
-            if len(stream_elements) > 1:
-                log.explain(f"Found multiple video streams for {element.name}")
-            else:
-                log.explain(f"Using single video mode for {element.name}")
-                stream_element = stream_elements[0]
-
-                # We do not have a local cache yet
-                await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
-                add_to_report([str(self._transformer.transform(dl.path))])
-                return
-
-        contained_video_paths: List[str] = []
-
-        for stream_element in stream_elements:
-            video_path = dl.path.parent / stream_element.name
-
-            maybe_dl = await self.download(video_path, mtime=element.mtime, redownload=Redownload.NEVER)
-            if not maybe_dl:
-                continue
-            async with maybe_dl as (bar, sink):
-                log.explain(f"Streaming video from real url {stream_element.url}")
-                contained_video_paths.append(str(self._transformer.transform(maybe_dl.path)))
-                await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
-
-        add_to_report(contained_video_paths)
-
-    async def _handle_file(
-        self,
-        element: IliasPageElement,
-        element_path: PurePath,
-    ) -> Optional[Coroutine[Any, Any, None]]:
-        maybe_dl = await self.download(element_path, mtime=element.mtime)
-        if not maybe_dl:
-            return None
-        return self._download_file(element, maybe_dl)
-
-    @_iorepeat(3, "downloading file")
-    @anoncritical
-    async def _download_file(self, element: IliasPageElement, dl: DownloadToken) -> None:
-        assert dl  # The function is only reached when dl is not None
-        async with dl as (bar, sink):
-            await self._stream_from_url(element.url, sink, bar, is_video=False)
-
-    async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar, is_video: bool) -> None:
-        async def try_stream() -> bool:
-            next_url = url
-
-            # Normal files redirect to the magazine if we are not authenticated. As files could be HTML,
-            # we can not match on the content type here. Instead, we disallow redirects and inspect the
-            # new location. If we are redirected anywhere but the ILIAS 8 "sendfile" command, we assume
-            # our authentication expired.
-            if not is_video:
-                async with self.session.get(url, allow_redirects=False) as resp:
-                    # Redirect to anything except a "sendfile" means we weren't authenticated
-                    if hdrs.LOCATION in resp.headers:
-                        if "&cmd=sendfile" not in resp.headers[hdrs.LOCATION]:
-                            return False
-                        # Directly follow the redirect to not make a second, unnecessary request
-                        next_url = resp.headers[hdrs.LOCATION]
-
-            # Let's try this again and follow redirects
-            return await fetch_follow_redirects(next_url)
-
-        async def fetch_follow_redirects(file_url: str) -> bool:
-            async with self.session.get(file_url) as resp:
-                # We wanted a video but got HTML => Forbidden, auth expired. Logging in won't really
-                # solve that depending on the setup, but it is better than nothing.
-                if is_video and "html" in resp.content_type:
-                    return False
-
-                if resp.content_length:
-                    bar.set_total(resp.content_length)
-
-                async for data in resp.content.iter_chunked(1024):
-                    sink.file.write(data)
-                    bar.advance(len(data))
-
-                sink.done()
-            return True
-
-        auth_id = await self._current_auth_id()
-        if await try_stream():
-            return
-
-        await self.authenticate(auth_id)
-
-        if not await try_stream():
-            raise CrawlError("File streaming failed after authenticate()")
-
-    async def _handle_forum(
-        self,
-        element: IliasPageElement,
-        element_path: PurePath,
-    ) -> Optional[Coroutine[Any, Any, None]]:
-        maybe_cl = await self.crawl(element_path)
-        if not maybe_cl:
-            return None
-        return self._crawl_forum(element, maybe_cl)
-
-    @_iorepeat(3, "crawling forum")
-    @anoncritical
-    async def _crawl_forum(self, element: IliasPageElement, cl: CrawlToken) -> None:
-        elements: List[IliasForumThread] = []
-
-        async with cl:
-            next_stage_url = element.url
-            while next_stage_url:
-                log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
-                log.explain(f"URL: {next_stage_url}")
-
-                soup = await self._get_page(next_stage_url)
-                page = IliasPage(soup, next_stage_url, element)
-
-                if next := page.get_next_stage_element():
-                    next_stage_url = next.url
-                else:
-                    break
-
-            download_data = page.get_download_forum_data()
-            if not download_data:
-                raise CrawlWarning("Failed to extract forum data")
-            if download_data.empty:
-                log.explain("Forum had no threads")
-                return
-            html = await self._post_authenticated(download_data.url, download_data.form_data)
-            elements = parse_ilias_forum_export(soupify(html))
-
-        elements.sort(key=lambda elem: elem.title)
-
-        tasks: List[Awaitable[None]] = []
-        for elem in elements:
-            tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, elem)))
-
-        # And execute them
-        await self.gather(tasks)
-
-    @anoncritical
-    @_iorepeat(3, "saving forum thread")
-    async def _download_forum_thread(
-        self,
-        parent_path: PurePath,
-        element: IliasForumThread,
-    ) -> None:
-        path = parent_path / (_sanitize_path_name(element.title) + ".html")
-        maybe_dl = await self.download(path, mtime=element.mtime)
-        if not maybe_dl:
-            return
-
-        async with maybe_dl as (bar, sink):
-            content = element.title_tag.prettify()
-            content += element.content_tag.prettify()
-            sink.file.write(content.encode("utf-8"))
-            sink.done()
-
-    async def _handle_learning_module(
-        self,
-        element: IliasPageElement,
-        element_path: PurePath,
-    ) -> Optional[Coroutine[Any, Any, None]]:
-        maybe_cl = await self.crawl(element_path)
-        if not maybe_cl:
-            return None
-        return self._crawl_learning_module(element, maybe_cl)
-
-    @_iorepeat(3, "crawling learning module")
-    @anoncritical
-    async def _crawl_learning_module(self, element: IliasPageElement, cl: CrawlToken) -> None:
-        elements: List[IliasLearningModulePage] = []
-
-        async with cl:
-            log.explain_topic(f"Parsing initial HTML page for {fmt_path(cl.path)}")
-            log.explain(f"URL: {element.url}")
-            soup = await self._get_page(element.url)
-            page = IliasPage(soup, element.url, element)
-            if next := page.get_learning_module_data():
-                elements.extend(await self._crawl_learning_module_direction(
-                    cl.path, next.previous_url, "left", element
-                ))
-                elements.append(next)
-                elements.extend(await self._crawl_learning_module_direction(
-                    cl.path, next.next_url, "right", element
-                ))
-
-        # Reflect their natural ordering in the file names
-        for index, lm_element in enumerate(elements):
-            lm_element.title = f"{index:02}_{lm_element.title}"
-
-        tasks: List[Awaitable[None]] = []
-        for index, elem in enumerate(elements):
-            prev_url = elements[index - 1].title if index > 0 else None
-            next_url = elements[index + 1].title if index < len(elements) - 1 else None
-            tasks.append(asyncio.create_task(
-                self._download_learning_module_page(cl.path, elem, prev_url, next_url)
-            ))
-
-        # And execute them
-        await self.gather(tasks)
-
-    async def _crawl_learning_module_direction(
-        self,
-        path: PurePath,
-        start_url: Optional[str],
-        dir: Union[Literal["left"], Literal["right"]],
-        parent_element: IliasPageElement
-    ) -> List[IliasLearningModulePage]:
-        elements: List[IliasLearningModulePage] = []
-
-        if not start_url:
-            return elements
-
-        next_element_url: Optional[str] = start_url
-        counter = 0
-        while next_element_url:
-            log.explain_topic(f"Parsing HTML page for {fmt_path(path)} ({dir}-{counter})")
-            log.explain(f"URL: {next_element_url}")
-            soup = await self._get_page(next_element_url)
-            page = IliasPage(soup, next_element_url, parent_element)
-            if next := page.get_learning_module_data():
-                elements.append(next)
-                if dir == "left":
-                    next_element_url = next.previous_url
-                else:
-                    next_element_url = next.next_url
-            counter += 1
-
-        return elements
-
-    @anoncritical
-    @_iorepeat(3, "saving learning module page")
-    async def _download_learning_module_page(
-        self,
-        parent_path: PurePath,
-        element: IliasLearningModulePage,
-        prev: Optional[str],
-        next: Optional[str]
-    ) -> None:
-        path = parent_path / (_sanitize_path_name(element.title) + ".html")
-        maybe_dl = await self.download(path)
-        if not maybe_dl:
-            return
-        my_path = self._transformer.transform(maybe_dl.path)
-        if not my_path:
-            return
-
-        if prev:
-            prev_p = self._transformer.transform(parent_path / (_sanitize_path_name(prev) + ".html"))
-            if prev_p:
-                prev = os.path.relpath(prev_p, my_path.parent)
-            else:
-                prev = None
-        if next:
-            next_p = self._transformer.transform(parent_path / (_sanitize_path_name(next) + ".html"))
-            if next_p:
-                next = os.path.relpath(next_p, my_path.parent)
-            else:
-                next = None
-
-        async with maybe_dl as (bar, sink):
-            content = element.content
-            content = await self.internalize_images(content)
-            sink.file.write(learning_module_template(content, maybe_dl.path.name, prev, next).encode("utf-8"))
-            sink.done()
-
-    async def internalize_images(self, tag: Tag) -> Tag:
-        """
-        Tries to fetch ILIAS images and embed them as base64 data.
-        """
-        log.explain_topic("Internalizing images")
-        for elem in tag.find_all(recursive=True):
-            if not isinstance(elem, Tag):
-                continue
-            if elem.name == "img":
-                if src := elem.attrs.get("src", None):
-                    url = urljoin(_ILIAS_URL, src)
-                    if not url.startswith(_ILIAS_URL):
-                        continue
-                    log.explain(f"Internalizing {url!r}")
-                    img = await self._get_authenticated(url)
-                    elem.attrs["src"] = "data:;base64," + base64.b64encode(img).decode()
-            if elem.name == "iframe" and elem.attrs.get("src", "").startswith("//"):
-                # For unknown reasons the protocol seems to be stripped.
-                elem.attrs["src"] = "https:" + elem.attrs["src"]
-        return tag
-
-    async def _get_page(self, url: str, root_page_allowed: bool = False) -> BeautifulSoup:
-        auth_id = await self._current_auth_id()
-        async with self.session.get(url) as request:
-            soup = soupify(await request.read())
-            if IliasPage.is_logged_in(soup):
-                return self._verify_page(soup, url, root_page_allowed)
-
-        # We weren't authenticated, so try to do that
-        await self.authenticate(auth_id)
-
-        # Retry once after authenticating. If this fails, we will die.
-        async with self.session.get(url) as request:
-            soup = soupify(await request.read())
-            if IliasPage.is_logged_in(soup):
-                return self._verify_page(soup, url, root_page_allowed)
-        raise CrawlError(f"get_page failed even after authenticating on {url!r}")
-
-    @staticmethod
-    def _verify_page(soup: BeautifulSoup, url: str, root_page_allowed: bool) -> BeautifulSoup:
-        if IliasPage.is_root_page(soup) and not root_page_allowed:
-            raise CrawlError(
-                "Unexpectedly encountered ILIAS root page. "
-                "This usually happens because the ILIAS instance is broken. "
-                "If so, wait a day or two and try again. "
-                "It could also happen because a crawled element links to the ILIAS root page. "
-                "If so, use a transform with a ! as target to ignore the particular element. "
-                f"The redirect came from {url}"
-            )
-        return soup
-
-    async def _post_authenticated(
-        self,
-        url: str,
-        data: dict[str, Union[str, List[str]]]
-    ) -> bytes:
-        auth_id = await self._current_auth_id()
-
-        form_data = aiohttp.FormData()
-        for key, val in data.items():
-            form_data.add_field(key, val)
-
-        async with self.session.post(url, data=form_data(), allow_redirects=False) as request:
-            if request.status == 200:
-                return await request.read()
-
-        # We weren't authenticated, so try to do that
-        await self.authenticate(auth_id)
-
-        # Retry once after authenticating. If this fails, we will die.
-        async with self.session.post(url, data=data, allow_redirects=False) as request:
-            if request.status == 200:
-                return await request.read()
-        raise CrawlError("post_authenticated failed even after authenticating")
-
-    async def _get_authenticated(self, url: str) -> bytes:
-        auth_id = await self._current_auth_id()
-
-        async with self.session.get(url, allow_redirects=False) as request:
-            if request.status == 200:
-                return await request.read()
-
-        # We weren't authenticated, so try to do that
-        await self.authenticate(auth_id)
-
-        # Retry once after authenticating. If this fails, we will die.
-        async with self.session.get(url, allow_redirects=False) as request:
-            if request.status == 200:
-                return await request.read()
-        raise CrawlError("get_authenticated failed even after authenticating")
-
     # We repeat this as the login method in shibboleth doesn't handle I/O errors.
     # Shibboleth is quite reliable as well, the repeat is likely not critical here.
     @_iorepeat(3, "Login", failure_is_error=True)
diff --git a/PFERD/utils.py b/PFERD/utils.py
index 7c7b6f4..9f5d4d5 100644
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -9,11 +9,47 @@ from types import TracebackType
 from typing import Any, Callable, Dict, Generic, Optional, Type, TypeVar
 from urllib.parse import parse_qs, urlencode, urlsplit, urlunsplit
 
+import aiohttp
 import bs4
 
+from .crawl.crawler import AWrapped, CrawlError, CrawlWarning
+from .logging import log
+
 T = TypeVar("T")
 
 
+def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Callable[[AWrapped], AWrapped]:
+    def decorator(f: AWrapped) -> AWrapped:
+        async def wrapper(*args: Any, **kwargs: Any) -> Optional[Any]:
+            last_exception: Optional[BaseException] = None
+            for round in range(attempts):
+                try:
+                    return await f(*args, **kwargs)
+                except aiohttp.ContentTypeError:  # invalid content type
+                    raise CrawlWarning("ILIAS returned an invalid content type")
+                except aiohttp.TooManyRedirects:
+                    raise CrawlWarning("Got stuck in a redirect loop")
+                except aiohttp.ClientPayloadError as e:  # encoding or not enough bytes
+                    last_exception = e
+                except aiohttp.ClientConnectionError as e:  # e.g. timeout, disconnect, resolve failed, etc.
+                    last_exception = e
+                except asyncio.exceptions.TimeoutError as e:  # explicit http timeouts in HttpCrawler
+                    last_exception = e
+                log.explain_topic(f"Retrying operation {name}. Retries left: {attempts - 1 - round}")
+
+            if last_exception:
+                message = f"Error in I/O Operation: {last_exception}"
+                if failure_is_error:
+                    raise CrawlError(message) from last_exception
+                else:
+                    raise CrawlWarning(message) from last_exception
+            raise CrawlError("Impossible return in ilias _iorepeat")
+
+        return wrapper  # type: ignore
+
+    return decorator
+
+
 async def in_daemon_thread(func: Callable[..., T], *args: Any, **kwargs: Any) -> T:
     loop = asyncio.get_running_loop()
     future: asyncio.Future[T] = asyncio.Future()

From df98153169257317301392e7c7ea5a24c183722e Mon Sep 17 00:00:00 2001
From: Philipp Fruck <dev@p-fruck.de>
Date: Mon, 8 Apr 2024 10:58:23 +0200
Subject: [PATCH 066/147] refactor: Extract generic settings from ilias command

Preparation for generic ilias_web command
---
 PFERD/cli/command_kit_ilias_web.py | 107 ++++-------------------------
 PFERD/cli/common_ilias_args.py     | 104 ++++++++++++++++++++++++++++
 2 files changed, 116 insertions(+), 95 deletions(-)
 create mode 100644 PFERD/cli/common_ilias_args.py

diff --git a/PFERD/cli/command_kit_ilias_web.py b/PFERD/cli/command_kit_ilias_web.py
index de74fc3..10797c2 100644
--- a/PFERD/cli/command_kit_ilias_web.py
+++ b/PFERD/cli/command_kit_ilias_web.py
@@ -1,120 +1,37 @@
 import argparse
 import configparser
-from pathlib import Path
 
-from ..crawl.ilias.file_templates import Links
 from ..logging import log
-from .parser import (CRAWLER_PARSER, SUBPARSERS, BooleanOptionalAction, ParserLoadError, load_crawler,
-                     show_value_error)
+from .common_ilias_args import configure_common_group_args, load_common
+from .parser import CRAWLER_PARSER, SUBPARSERS, load_crawler
+
+COMMAND_NAME = "kit-ilias-web"
 
 SUBPARSER = SUBPARSERS.add_parser(
-    "kit-ilias-web",
+    COMMAND_NAME,
     parents=[CRAWLER_PARSER],
 )
 
 GROUP = SUBPARSER.add_argument_group(
-    title="kit-ilias-web crawler arguments",
-    description="arguments for the 'kit-ilias-web' crawler",
-)
-GROUP.add_argument(
-    "target",
-    type=str,
-    metavar="TARGET",
-    help="course id, 'desktop', or ILIAS URL to crawl"
-)
-GROUP.add_argument(
-    "output",
-    type=Path,
-    metavar="OUTPUT",
-    help="output directory"
-)
-GROUP.add_argument(
-    "--username", "-u",
-    type=str,
-    metavar="USERNAME",
-    help="user name for authentication"
-)
-GROUP.add_argument(
-    "--keyring",
-    action=BooleanOptionalAction,
-    help="use the system keyring to store and retrieve passwords"
-)
-GROUP.add_argument(
-    "--credential-file",
-    type=Path,
-    metavar="PATH",
-    help="read username and password from a credential file"
-)
-GROUP.add_argument(
-    "--links",
-    type=show_value_error(Links.from_string),
-    metavar="OPTION",
-    help="how to represent external links"
-)
-GROUP.add_argument(
-    "--link-redirect-delay",
-    type=int,
-    metavar="SECONDS",
-    help="time before 'fancy' links redirect to to their target (-1 to disable)"
-)
-GROUP.add_argument(
-    "--videos",
-    action=BooleanOptionalAction,
-    help="crawl and download videos"
-)
-GROUP.add_argument(
-    "--forums",
-    action=BooleanOptionalAction,
-    help="crawl and download forum posts"
-)
-GROUP.add_argument(
-    "--http-timeout", "-t",
-    type=float,
-    metavar="SECONDS",
-    help="timeout for all HTTP requests"
+    title=f"{COMMAND_NAME} crawler arguments",
+    description=f"arguments for the '{COMMAND_NAME}' crawler",
 )
 
+configure_common_group_args(GROUP)
+
 
 def load(
         args: argparse.Namespace,
         parser: configparser.ConfigParser,
 ) -> None:
-    log.explain("Creating config for command 'kit-ilias-web'")
+    log.explain(f"Creating config for command '{COMMAND_NAME}'")
 
     parser["crawl:ilias"] = {}
     section = parser["crawl:ilias"]
     load_crawler(args, section)
 
-    section["type"] = "kit-ilias-web"
-    section["target"] = str(args.target)
-    section["output_dir"] = str(args.output)
-    section["auth"] = "auth:ilias"
-    if args.links is not None:
-        section["links"] = str(args.links.value)
-    if args.link_redirect_delay is not None:
-        section["link_redirect_delay"] = str(args.link_redirect_delay)
-    if args.videos is not None:
-        section["videos"] = "yes" if args.videos else "no"
-    if args.forums is not None:
-        section["forums"] = "yes" if args.forums else "no"
-    if args.http_timeout is not None:
-        section["http_timeout"] = str(args.http_timeout)
-
-    parser["auth:ilias"] = {}
-    auth_section = parser["auth:ilias"]
-    if args.credential_file is not None:
-        if args.username is not None:
-            raise ParserLoadError("--credential-file and --username can't be used together")
-        if args.keyring:
-            raise ParserLoadError("--credential-file and --keyring can't be used together")
-        auth_section["type"] = "credential-file"
-        auth_section["path"] = str(args.credential_file)
-    elif args.keyring:
-        auth_section["type"] = "keyring"
-    else:
-        auth_section["type"] = "simple"
-    if args.username is not None:
-        auth_section["username"] = args.username
+    section["type"] = COMMAND_NAME
+    load_common(section, args, parser)
 
 
 SUBPARSER.set_defaults(command=load)
diff --git a/PFERD/cli/common_ilias_args.py b/PFERD/cli/common_ilias_args.py
new file mode 100644
index 0000000..bbbbee5
--- /dev/null
+++ b/PFERD/cli/common_ilias_args.py
@@ -0,0 +1,104 @@
+import argparse
+import configparser
+from pathlib import Path
+
+from ..crawl.ilias.file_templates import Links
+from .parser import BooleanOptionalAction, ParserLoadError, show_value_error
+
+
+def configure_common_group_args(group: argparse._ArgumentGroup) -> None:
+    """These arguments are shared between the KIT and generic Ilias web command."""
+    group.add_argument(
+        "target",
+        type=str,
+        metavar="TARGET",
+        help="course id, 'desktop', or ILIAS URL to crawl"
+    )
+    group.add_argument(
+        "output",
+        type=Path,
+        metavar="OUTPUT",
+        help="output directory"
+    )
+    group.add_argument(
+        "--username", "-u",
+        type=str,
+        metavar="USERNAME",
+        help="user name for authentication"
+    )
+    group.add_argument(
+        "--keyring",
+        action=BooleanOptionalAction,
+        help="use the system keyring to store and retrieve passwords"
+    )
+    group.add_argument(
+        "--credential-file",
+        type=Path,
+        metavar="PATH",
+        help="read username and password from a credential file"
+    )
+    group.add_argument(
+        "--links",
+        type=show_value_error(Links.from_string),
+        metavar="OPTION",
+        help="how to represent external links"
+    )
+    group.add_argument(
+        "--link-redirect-delay",
+        type=int,
+        metavar="SECONDS",
+        help="time before 'fancy' links redirect to to their target (-1 to disable)"
+    )
+    group.add_argument(
+        "--videos",
+        action=BooleanOptionalAction,
+        help="crawl and download videos"
+    )
+    group.add_argument(
+        "--forums",
+        action=BooleanOptionalAction,
+        help="crawl and download forum posts"
+    )
+    group.add_argument(
+        "--http-timeout", "-t",
+        type=float,
+        metavar="SECONDS",
+        help="timeout for all HTTP requests"
+    )
+
+
+def load_common(
+    section: configparser.SectionProxy,
+    args: argparse.Namespace,
+    parser: configparser.ConfigParser,
+) -> None:
+    """Load common config between generic and KIT ilias web command"""
+    section["target"] = str(args.target)
+    section["output_dir"] = str(args.output)
+    section["auth"] = "auth:ilias"
+    if args.links is not None:
+        section["links"] = str(args.links.value)
+    if args.link_redirect_delay is not None:
+        section["link_redirect_delay"] = str(args.link_redirect_delay)
+    if args.videos is not None:
+        section["videos"] = "yes" if args.videos else "no"
+    if args.forums is not None:
+        section["forums"] = "yes" if args.forums else "no"
+    if args.http_timeout is not None:
+        section["http_timeout"] = str(args.http_timeout)
+
+    parser["auth:ilias"] = {}
+    auth_section = parser["auth:ilias"]
+    if args.credential_file is not None:
+        if args.username is not None:
+            raise ParserLoadError("--credential-file and --username can't be used together")
+        if args.keyring:
+            raise ParserLoadError("--credential-file and --keyring can't be used together")
+        auth_section["type"] = "credential-file"
+        auth_section["path"] = str(args.credential_file)
+    elif args.keyring:
+        auth_section["type"] = "keyring"
+    else:
+        auth_section["type"] = "simple"
+    if args.username is not None:
+        auth_section["username"] = args.username

From 5d0621420e3c7394506acb5db12d914c63f9dcbf Mon Sep 17 00:00:00 2001
From: Philipp Fruck <dev@p-fruck.de>
Date: Mon, 8 Apr 2024 11:03:09 +0200
Subject: [PATCH 067/147] feat: Generic ilias_web command

---
 PFERD/cli/__init__.py          |  1 +
 PFERD/cli/command_ilias_web.py | 56 ++++++++++++++++++++++++++++++++++
 PFERD/crawl/__init__.py        |  4 ++-
 PFERD/crawl/ilias/__init__.py  | 10 ++++--
 4 files changed, 68 insertions(+), 3 deletions(-)
 create mode 100644 PFERD/cli/command_ilias_web.py

diff --git a/PFERD/cli/__init__.py b/PFERD/cli/__init__.py
index efa8f00..c89f6f4 100644
--- a/PFERD/cli/__init__.py
+++ b/PFERD/cli/__init__.py
@@ -8,6 +8,7 @@
 # well.
 
 from . import command_local  # noqa: F401 imported but unused
+from . import command_ilias_web  # noqa: F401 imported but unused
 from . import command_kit_ilias_web  # noqa: F401 imported but unused
 from . import command_kit_ipd  # noqa: F401 imported but unused
 from .parser import PARSER, ParserLoadError, load_default_section  # noqa: F401 imported but unused
diff --git a/PFERD/cli/command_ilias_web.py b/PFERD/cli/command_ilias_web.py
new file mode 100644
index 0000000..58a7934
--- /dev/null
+++ b/PFERD/cli/command_ilias_web.py
@@ -0,0 +1,56 @@
+import argparse
+import configparser
+
+from ..logging import log
+from .common_ilias_args import configure_common_group_args, load_common
+from .parser import CRAWLER_PARSER, SUBPARSERS, load_crawler
+
+COMMAND_NAME = "ilias-web"
+
+SUBPARSER = SUBPARSERS.add_parser(
+    COMMAND_NAME,
+    parents=[CRAWLER_PARSER],
+)
+
+GROUP = SUBPARSER.add_argument_group(
+    title=f"{COMMAND_NAME} crawler arguments",
+    description=f"arguments for the '{COMMAND_NAME}' crawler",
+)
+
+GROUP.add_argument(
+    "--ilias-url",
+    type=str,
+    metavar="BASE_URL",
+    help="The base url of the ilias instance"
+)
+
+GROUP.add_argument(
+    "--client-id",
+    type=str,
+    metavar="CLIENT_ID",
+    help="The client id of the ilias instance"
+)
+
+configure_common_group_args(GROUP)
+
+
+def load(
+        args: argparse.Namespace,
+        parser: configparser.ConfigParser,
+) -> None:
+    log.explain(f"Creating config for command '{COMMAND_NAME}'")
+
+    parser["crawl:ilias"] = {}
+    section = parser["crawl:ilias"]
+    load_crawler(args, section)
+
+    section["type"] = COMMAND_NAME
+    if args.ilias_url is not None:
+        section["base_url"] = args.ilias_url
+    if args.client_id is not None:
+        section["client_id"] = args.client_id
+
+    load_common(section, args, parser)
+
+
+SUBPARSER.set_defaults(command=load)
diff --git a/PFERD/crawl/__init__.py b/PFERD/crawl/__init__.py
index 1f8bd59..9a0e080 100644
--- a/PFERD/crawl/__init__.py
+++ b/PFERD/crawl/__init__.py
@@ -4,7 +4,7 @@ from typing import Callable, Dict
 from ..auth import Authenticator
 from ..config import Config
 from .crawler import Crawler, CrawlError, CrawlerSection  # noqa: F401
-from .ilias import KitIliasWebCrawler, KitIliasWebCrawlerSection
+from .ilias import IliasWebCrawler, IliasWebCrawlerSection, KitIliasWebCrawler, KitIliasWebCrawlerSection
 from .kit_ipd_crawler import KitIpdCrawler, KitIpdCrawlerSection
 from .local_crawler import LocalCrawler, LocalCrawlerSection
 
@@ -18,6 +18,8 @@ CrawlerConstructor = Callable[[
 CRAWLERS: Dict[str, CrawlerConstructor] = {
     "local": lambda n, s, c, a:
         LocalCrawler(n, LocalCrawlerSection(s), c),
+    "ilias-web": lambda n, s, c, a:
+        IliasWebCrawler(n, IliasWebCrawlerSection(s), c, a),
     "kit-ilias-web": lambda n, s, c, a:
         KitIliasWebCrawler(n, KitIliasWebCrawlerSection(s), c, a),
     "kit-ipd": lambda n, s, c, a:
diff --git a/PFERD/crawl/ilias/__init__.py b/PFERD/crawl/ilias/__init__.py
index 26618a8..287bd3d 100644
--- a/PFERD/crawl/ilias/__init__.py
+++ b/PFERD/crawl/ilias/__init__.py
@@ -1,3 +1,9 @@
-from .kit_ilias_web_crawler import KitIliasWebCrawler, KitIliasWebCrawlerSection
+from .kit_ilias_web_crawler import (IliasWebCrawler, IliasWebCrawlerSection, KitIliasWebCrawler,
+                                    KitIliasWebCrawlerSection)
 
-__all__ = ["KitIliasWebCrawler", "KitIliasWebCrawlerSection"]
+__all__ = [
+    "IliasWebCrawler",
+    "IliasWebCrawlerSection",
+    "KitIliasWebCrawler",
+    "KitIliasWebCrawlerSection",
+]

From 7a00f73e0ec4de7008990cc836d24edda8cad69b Mon Sep 17 00:00:00 2001
From: Philipp Fruck <dev@p-fruck.de>
Date: Sat, 4 May 2024 17:51:38 +0200
Subject: [PATCH 068/147] feat: Add authentication to generic ilias dl

---
 PFERD/crawl/ilias/ilias_web_crawler.py | 98 +++++++++++++++++++++++---
 1 file changed, 89 insertions(+), 9 deletions(-)

diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index ba7d564..166034f 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -24,10 +24,34 @@ from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningMo
                              IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
 
 TargetType = Union[str, int]
-_ILIAS_URL = "https://ilias.studium.kit.edu"
+
+
+class IliasConfig():
+    def __init__(self, base_url: str, client_id: str):
+        self._base_url = base_url
+        self._client_id = client_id
+
+    @property
+    def base_url(self) -> str:
+        return self._base_url
+
+    @property
+    def client_id(self) -> str:
+        return self._client_id
 
 
 class IliasWebCrawlerSection(HttpCrawlerSection):
+    def conf(self) -> IliasConfig:
+        base_url = self.s.get("base_url")
+        if not base_url:
+            self.missing_value("base_url")
+
+        client_id = self.s.get("client_id")
+        if not client_id:
+            self.missing_value("client_id")
+
+        return IliasConfig(base_url, client_id)
+
     def target(self) -> TargetType:
         target = self.s.get("target")
         if not target:
@@ -39,7 +63,7 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
         if target == "desktop":
             # Full personal desktop
             return target
-        if target.startswith(_ILIAS_URL):
+        if target.startswith(self.conf().base_url):
             # ILIAS URL
             return target
 
@@ -140,7 +164,7 @@ instance's greatest bottleneck.
             """.strip())
 
         self._auth = auth
-        self._base_url = _ILIAS_URL
+        self._conf = section.conf()
 
         self._target = section.target()
         self._link_file_redirect_delay = section.link_redirect_delay()
@@ -163,7 +187,7 @@ instance's greatest bottleneck.
     async def _crawl_course(self, course_id: int) -> None:
         # Start crawling at the given course
         root_url = url_set_query_param(
-            self._base_url + "/goto.php", "target", f"crs_{course_id}"
+            self._conf.base_url + "/goto.php", "target", f"crs_{course_id}"
         )
 
         await self._crawl_url(root_url, expected_id=course_id)
@@ -171,7 +195,7 @@ instance's greatest bottleneck.
     async def _crawl_desktop(self) -> None:
         appendix = r"ILIAS\Repository\Provider\RepositoryMainBarProvider|mm_pd_sel_items"
         appendix = appendix.encode("ASCII").hex()
-        await self._crawl_url(self._base_url + "/gs_content.php?item=" + appendix)
+        await self._crawl_url(self._conf.base_url + "/gs_content.php?item=" + appendix)
 
     async def _crawl_url(self, url: str, expected_id: Optional[int] = None) -> None:
         maybe_cl = await self.crawl(PurePath("."))
@@ -844,8 +868,8 @@ instance's greatest bottleneck.
                 continue
             if elem.name == "img":
                 if src := elem.attrs.get("src", None):
-                    url = urljoin(_ILIAS_URL, src)
-                    if not url.startswith(_ILIAS_URL):
+                    url = urljoin(self._conf.base_url, src)
+                    if not url.startswith(self._conf.base_url):
                         continue
                     log.explain(f"Internalizing {url!r}")
                     img = await self._get_authenticated(url)
@@ -925,7 +949,63 @@ instance's greatest bottleneck.
                 return await request.read()
         raise CrawlError("get_authenticated failed even after authenticating")
 
-    # ToDo: Is this still required?
+    # ToDo: Is iorepeat still required?
     @_iorepeat(3, "Login", failure_is_error=True)
     async def _authenticate(self) -> None:
-        pass
+        # fill the session with the correct cookies
+        params = {
+            "client_id": self._conf.client_id,
+            "cmd": "force_login",
+        }
+        async with self.session.get(f"{self._conf.base_url}/login.php", params=params) as request:
+            login_page = soupify(await request.read())
+
+        login_form = login_page.find("form", attrs={"name": "formlogin"})
+        if login_form is None:
+            raise CrawlError("Could not find the login form! Specified client id might be invalid.")
+
+        login_url = login_form.attrs.get("action")
+        if login_url is None:
+            raise CrawlError("Could not find the action URL in the login form!")
+
+        username, password = await self._auth.credentials()
+
+        login_data = {
+            "username": username,
+            "password": password,
+            "cmd[doStandardAuthentication]": "Login",
+        }
+
+        # do the actual login
+        async with self.session.post(f"{self._conf.base_url}/{login_url}", data=login_data) as request:
+            soup = soupify(await request.read())
+            if not self._is_logged_in(soup):
+                self._auth.invalidate_credentials()
+
+    @ staticmethod
+    def _is_logged_in(soup: BeautifulSoup) -> bool:
+        # Normal ILIAS pages
+        mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")
+        if mainbar is not None:
+            login_button = mainbar.find(attrs={"href": lambda x: x and "login.php" in x})
+            shib_login = soup.find(id="button_shib_login")
+            return not login_button and not shib_login
+
+        # Personal Desktop
+        if soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x}):
+            return True
+
+        # Video listing embeds do not have complete ILIAS html. Try to match them by
+        # their video listing table
+        video_table = soup.find(
+            recursive=True,
+            name="table",
+            attrs={"id": lambda x: x is not None and x.startswith("tbl_xoct")}
+        )
+        if video_table is not None:
+            return True
+        # The individual video player wrapper page has nothing of the above.
+        # Match it by its playerContainer.
+        if soup.select_one("#playerContainer") is not None:
+            return True
+        return False

From 3a05b905251f0430ca8d34a353ffe9983304bbfc Mon Sep 17 00:00:00 2001
From: Philipp Fruck <dev@p-fruck.de>
Date: Mon, 8 Apr 2024 11:49:28 +0200
Subject: [PATCH 069/147] fix circular import for _io_repeat

---
 PFERD/crawl/ilias/async_helper.py          | 39 ++++++++++++++++++++++
 PFERD/crawl/ilias/ilias_web_crawler.py     |  3 +-
 PFERD/crawl/ilias/kit_ilias_web_crawler.py |  3 +-
 PFERD/utils.py                             | 36 --------------------
 4 files changed, 43 insertions(+), 38 deletions(-)
 create mode 100644 PFERD/crawl/ilias/async_helper.py

diff --git a/PFERD/crawl/ilias/async_helper.py b/PFERD/crawl/ilias/async_helper.py
new file mode 100644
index 0000000..527a819
--- /dev/null
+++ b/PFERD/crawl/ilias/async_helper.py
@@ -0,0 +1,39 @@
+import asyncio
+from typing import Any, Callable, Optional
+
+import aiohttp
+
+from ...logging import log
+from ..crawler import AWrapped, CrawlError, CrawlWarning
+
+
+def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Callable[[AWrapped], AWrapped]:
+    def decorator(f: AWrapped) -> AWrapped:
+        async def wrapper(*args: Any, **kwargs: Any) -> Optional[Any]:
+            last_exception: Optional[BaseException] = None
+            for round in range(attempts):
+                try:
+                    return await f(*args, **kwargs)
+                except aiohttp.ContentTypeError:  # invalid content type
+                    raise CrawlWarning("ILIAS returned an invalid content type")
+                except aiohttp.TooManyRedirects:
+                    raise CrawlWarning("Got stuck in a redirect loop")
+                except aiohttp.ClientPayloadError as e:  # encoding or not enough bytes
+                    last_exception = e
+                except aiohttp.ClientConnectionError as e:  # e.g. timeout, disconnect, resolve failed, etc.
+                    last_exception = e
+                except asyncio.exceptions.TimeoutError as e:  # explicit http timeouts in HttpCrawler
+                    last_exception = e
+                log.explain_topic(f"Retrying operation {name}. Retries left: {attempts - 1 - round}")
+
+            if last_exception:
+                message = f"Error in I/O Operation: {last_exception}"
+                if failure_is_error:
+                    raise CrawlError(message) from last_exception
+                else:
+                    raise CrawlWarning(message) from last_exception
+            raise CrawlError("Impossible return in ilias _iorepeat")
+
+        return wrapper  # type: ignore
+
+    return decorator
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 166034f..1048c30 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -15,9 +15,10 @@ from ...auth import Authenticator
 from ...config import Config
 from ...logging import ProgressBar, log
 from ...output_dir import FileSink, Redownload
-from ...utils import _iorepeat, fmt_path, soupify, url_set_query_param
+from ...utils import fmt_path, soupify, url_set_query_param
 from ..crawler import CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
 from ..http_crawler import HttpCrawler, HttpCrawlerSection
+from .async_helper import _iorepeat
 from .file_templates import Links, learning_module_template
 from .ilias_html_cleaner import clean, insert_base_markup
 from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index e9d1475..3cd0334 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -7,8 +7,9 @@ from bs4 import BeautifulSoup
 from ...auth import Authenticator, TfaAuthenticator
 from ...config import Config
 from ...logging import log
-from ...utils import _iorepeat, soupify
+from ...utils import soupify
 from ..crawler import CrawlError, CrawlWarning
+from .async_helper import _iorepeat
 from .ilias_web_crawler import IliasWebCrawler, IliasWebCrawlerSection
 
 TargetType = Union[str, int]
diff --git a/PFERD/utils.py b/PFERD/utils.py
index 9f5d4d5..7c7b6f4 100644
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -9,47 +9,11 @@ from types import TracebackType
 from typing import Any, Callable, Dict, Generic, Optional, Type, TypeVar
 from urllib.parse import parse_qs, urlencode, urlsplit, urlunsplit
 
-import aiohttp
 import bs4
 
-from .crawl.crawler import AWrapped, CrawlError, CrawlWarning
-from .logging import log
-
 T = TypeVar("T")
 
 
-def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Callable[[AWrapped], AWrapped]:
-    def decorator(f: AWrapped) -> AWrapped:
-        async def wrapper(*args: Any, **kwargs: Any) -> Optional[Any]:
-            last_exception: Optional[BaseException] = None
-            for round in range(attempts):
-                try:
-                    return await f(*args, **kwargs)
-                except aiohttp.ContentTypeError:  # invalid content type
-                    raise CrawlWarning("ILIAS returned an invalid content type")
-                except aiohttp.TooManyRedirects:
-                    raise CrawlWarning("Got stuck in a redirect loop")
-                except aiohttp.ClientPayloadError as e:  # encoding or not enough bytes
-                    last_exception = e
-                except aiohttp.ClientConnectionError as e:  # e.g. timeout, disconnect, resolve failed, etc.
-                    last_exception = e
-                except asyncio.exceptions.TimeoutError as e:  # explicit http timeouts in HttpCrawler
-                    last_exception = e
-                log.explain_topic(f"Retrying operation {name}. Retries left: {attempts - 1 - round}")
-
-            if last_exception:
-                message = f"Error in I/O Operation: {last_exception}"
-                if failure_is_error:
-                    raise CrawlError(message) from last_exception
-                else:
-                    raise CrawlWarning(message) from last_exception
-            raise CrawlError("Impossible return in ilias _iorepeat")
-
-        return wrapper  # type: ignore
-
-    return decorator
-
-
 async def in_daemon_thread(func: Callable[..., T], *args: Any, **kwargs: Any) -> T:
     loop = asyncio.get_running_loop()
     future: asyncio.Future[T] = asyncio.Future()

From b01f0934749ba613881446dfa0b41ebf803c3204 Mon Sep 17 00:00:00 2001
From: Philipp Fruck <dev@p-fruck.de>
Date: Mon, 8 Apr 2024 11:55:48 +0200
Subject: [PATCH 070/147] fix: Element detection for other universities

Other universities might use other URL schemes
for different element types
---
 PFERD/crawl/ilias/kit_ilias_html.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 866f7c0..54d56a0 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -48,6 +48,10 @@ class IliasPageElement:
         regexes = [
             r"eid=(?P<id>[0-9a-z\-]+)",
             r"file_(?P<id>\d+)",
+            r"copa_(?P<id>\d+)",
+            r"fold_(?P<id>\d+)",
+            r"frm_(?P<id>\d+)",
+            r"exc_(?P<id>\d+)",
             r"ref_id=(?P<id>\d+)",
             r"target=[a-z]+_(?P<id>\d+)",
             r"mm_(?P<id>\d+)"
@@ -997,6 +1001,19 @@ class IliasPage:
         if "baseClass=ilSAHSPresentationGUI" in parsed_url.query:
             return IliasElementType.SCORM_LEARNING_MODULE
 
+        # other universities might have content type specified in URL path
+        if "_file_" in parsed_url.path:
+            return IliasElementType.FILE
+
+        if "_fold_" in parsed_url.path or "_copa_" in parsed_url.path:
+            return IliasElementType.FOLDER
+
+        if "_frm_" in parsed_url.path:
+            return IliasElementType.FORUM
+
+        if "_exc_" in parsed_url.path:
+            return IliasElementType.EXERCISE
+
         # Booking and Meeting can not be detected based on the link. They do have a ref_id though, so
         # try to guess it from the image.
 

From 5c87517ceb178240651787be76f968f1b320dad2 Mon Sep 17 00:00:00 2001
From: Philipp Fruck <dev@p-fruck.de>
Date: Mon, 8 Apr 2024 12:02:20 +0200
Subject: [PATCH 071/147] docs: Explain usage with generic ilias

---
 README.md | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/README.md b/README.md
index 31a3475..54e77be 100644
--- a/README.md
+++ b/README.md
@@ -56,6 +56,8 @@ Also, you can download most ILIAS pages directly like this:
 $ pferd kit-ilias-web <url> <output_directory>
 ```
 
+[You want to use another ILIAS instance? Read here!](#other-ilias-instances)
+
 However, the CLI only lets you download a single thing at a time, and the
 resulting command can grow long quite quickly. Because of this, PFERD can also
 be used with a config file.
@@ -145,3 +147,53 @@ type = kit-ilias-web
 auth = auth:ilias
 target = 1337420
 ```
+
+## Other ILIAS instances
+
+PFERD was originally developed for the KIT ILIAS instance, so you might have to adapt some parts of the code for full compatibility with your instance.
+
+To run PFERD against any instance using the integrated ILIAS authentication, simply add the `--base-url` and `--client-id` flags to the CLI command, e.g.
+
+```
+$ pferd ilias-web --base-url https://ilias.my-university.example --client-id My_University desktop <output_directory>
+```
+
+To use a config file for the given instance, just extend the example above as follows:
+
+```ini
+[DEFAULT]
+# instance related settings
+base_url = https://ilias.my-university.example
+client_id = My_University
+type = ilias-web
+
+# same as described above
+[auth:ilias]
+...
+
+[crawl:Foo]
+auth = auth:ilias
+target = 1337420
+```
+
+## Example configuration
+
+Inspired by [this ILIAS downloader](https://github.com/V3lop5/ilias-downloader/blob/main/configs), the following configurations should work out of the box for the corresponding universities:
+
+| University    | `base_url`                           | `client_id`   |
+|---------------|--------------------------------------|---------------|
+| FH Aachen     | https://www.ili.fh-aachen.de         | elearning     |
+| Uni Koeln     | https://www.ilias.uni-koeln.de/ilias | uk            |
+| Uni Konstanz  | https://ilias.uni-konstanz.de        | ILIASKONSTANZ |
+| Uni Stuttgart | https://ilias3.uni-stuttgart.de      | Uni_Stuttgart |
+
+
+### My university isn't listed
+
+No problem, your university might also just work fine. To retrieve the values required for your instance, navigate to the corresponding login page. You should be redirected to
+
+```jinja
+{{ base_url }}/login.php?client_id={{ client_id }}&cmd=force_login&lang=
+```
+
+From this URL, you can simply extract your required values. Feel free to open a PR to add your configuration to the table above. This will help people find out if their ILIAS instance is supported too.

From fd6cb7b9660439e26c4523da5be037e75bbd547c Mon Sep 17 00:00:00 2001
From: Philipp Fruck <dev@p-fruck.de>
Date: Sun, 5 May 2024 02:34:00 +0200
Subject: [PATCH 072/147] docs: Remove some filler words

---
 README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 54e77be..abdf607 100644
--- a/README.md
+++ b/README.md
@@ -152,13 +152,13 @@ target = 1337420
 
 PFERD was originally developed for the KIT ILIAS instance, so you might have to adapt some parts of the code for full compatibility with your instance.
 
-To run PFERD against any instance using the integrated ILIAS authentication, simply add the `--base-url` and `--client-id` flags to the CLI command, e.g.
+To run PFERD against any instance using the integrated ILIAS authentication, add the `--base-url` and `--client-id` flags to the CLI command, e.g.
 
 ```
 $ pferd ilias-web --base-url https://ilias.my-university.example --client-id My_University desktop <output_directory>
 ```
 
-To use a config file for the given instance, just extend the example above as follows:
+To use a config file for the given instance, extend the example above as follows:
 
 ```ini
 [DEFAULT]
@@ -196,4 +196,4 @@ No problem, your university might also just work fine. To retrieve the values re
 {{ base_url }}/login.php?client_id={{ client_id }}&cmd=force_login&lang=
 ```
 
-From this URL, you can simply extract your required values. Feel free to open a PR to add your configuration to the table above. This will help people find out if their ILIAS instance is supported too.
+From this URL, you can extract your required values. Feel free to open a PR to add your configuration to the table above. This will help people find out if their ILIAS instance is supported too.

From ade6309dd9ba1ff4f094d6af590f7f959a187880 Mon Sep 17 00:00:00 2001
From: Philipp Fruck <dev@p-fruck.de>
Date: Sun, 5 May 2024 02:34:26 +0200
Subject: [PATCH 073/147] Update copyright information

---
 LICENSE | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/LICENSE b/LICENSE
index d81e827..13fa307 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
-Copyright 2019-2021 Garmelon, I-Al-Istannen, danstooamerican, pavelzw,
+Copyright 2019-2024 Garmelon, I-Al-Istannen, danstooamerican, pavelzw,
                     TheChristophe, Scriptim, thelukasprobst, Toorero,
-                    Mr-Pine
+                    Mr-Pine, p-fruck
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in

From 428b0179fc0ab042a35407833f782c65ac7fef45 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 10 May 2024 14:09:07 +0200
Subject: [PATCH 074/147] Remove IliasConfig

Also uses urljoin() in a few places that previously used string
concatenation or fstrings.

At this point, there isn't yet a need for IliasConfig, so I'd rather
keep the code base simpler and more consistent. Should we need a
structure like IliasConfig in the future (maybe because we have a few
more ilias parsers), it's easy to add back.
---
 PFERD/crawl/ilias/ilias_web_crawler.py | 48 +++++++++++---------------
 1 file changed, 21 insertions(+), 27 deletions(-)

diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 1048c30..59f28b8 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -27,31 +27,20 @@ from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningMo
 TargetType = Union[str, int]
 
 
-class IliasConfig():
-    def __init__(self, base_url: str, client_id: str):
-        self._base_url = base_url
-        self._client_id = client_id
-
-    @property
-    def base_url(self) -> str:
-        return self._base_url
-
-    @property
-    def client_id(self) -> str:
-        return self._client_id
-
-
 class IliasWebCrawlerSection(HttpCrawlerSection):
-    def conf(self) -> IliasConfig:
+    def base_url(self) -> str:
         base_url = self.s.get("base_url")
         if not base_url:
             self.missing_value("base_url")
 
+        return base_url
+
+    def client_id(self) -> str:
         client_id = self.s.get("client_id")
         if not client_id:
             self.missing_value("client_id")
 
-        return IliasConfig(base_url, client_id)
+        return client_id
 
     def target(self) -> TargetType:
         target = self.s.get("target")
@@ -64,8 +53,8 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
         if target == "desktop":
             # Full personal desktop
             return target
-        if target.startswith(self.conf().base_url):
-            # ILIAS URL
+        if target.startswith(self.base_url()):
+            # URL
             return target
 
         self.invalid_value("target", target, "Should be <course id | desktop | kit ilias URL>")
@@ -165,7 +154,8 @@ instance's greatest bottleneck.
             """.strip())
 
         self._auth = auth
-        self._conf = section.conf()
+        self._base_url = section.base_url()
+        self._client_id = section.client_id()
 
         self._target = section.target()
         self._link_file_redirect_delay = section.link_redirect_delay()
@@ -188,7 +178,8 @@ instance's greatest bottleneck.
     async def _crawl_course(self, course_id: int) -> None:
         # Start crawling at the given course
         root_url = url_set_query_param(
-            self._conf.base_url + "/goto.php", "target", f"crs_{course_id}"
+            urljoin(self._base_url, "/goto.php"),
+            "target", f"crs_{course_id}",
         )
 
         await self._crawl_url(root_url, expected_id=course_id)
@@ -196,7 +187,10 @@ instance's greatest bottleneck.
     async def _crawl_desktop(self) -> None:
         appendix = r"ILIAS\Repository\Provider\RepositoryMainBarProvider|mm_pd_sel_items"
         appendix = appendix.encode("ASCII").hex()
-        await self._crawl_url(self._conf.base_url + "/gs_content.php?item=" + appendix)
+        await self._crawl_url(url_set_query_param(
+            urljoin(self._base_url, "/gs_content.php"),
+            "item=", appendix,
+        ))
 
     async def _crawl_url(self, url: str, expected_id: Optional[int] = None) -> None:
         maybe_cl = await self.crawl(PurePath("."))
@@ -869,8 +863,8 @@ instance's greatest bottleneck.
                 continue
             if elem.name == "img":
                 if src := elem.attrs.get("src", None):
-                    url = urljoin(self._conf.base_url, src)
-                    if not url.startswith(self._conf.base_url):
+                    url = urljoin(self._base_url, src)
+                    if not url.startswith(self._base_url):
                         continue
                     log.explain(f"Internalizing {url!r}")
                     img = await self._get_authenticated(url)
@@ -955,10 +949,10 @@ instance's greatest bottleneck.
     async def _authenticate(self) -> None:
         # fill the session with the correct cookies
         params = {
-            "client_id": self._conf.client_id,
+            "client_id": self._client_id,
             "cmd": "force_login",
         }
-        async with self.session.get(f"{self._conf.base_url}/login.php", params=params) as request:
+        async with self.session.get(urljoin(self._base_url, "/login.php"), params=params) as request:
             login_page = soupify(await request.read())
 
         login_form = login_page.find("form", attrs={"name": "formlogin"})
@@ -978,12 +972,12 @@ instance's greatest bottleneck.
         }
 
         # do the actual login
-        async with self.session.post(f"{self._conf.base_url}/{login_url}", data=login_data) as request:
+        async with self.session.post(urljoin(self._base_url, login_url), data=login_data) as request:
             soup = soupify(await request.read())
             if not self._is_logged_in(soup):
                 self._auth.invalidate_credentials()
 
-    @ staticmethod
+    @staticmethod
     def _is_logged_in(soup: BeautifulSoup) -> bool:
         # Normal ILIAS pages
         mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")

From 778517d8c625ca5a8b967efb761a555ec03da136 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 10 May 2024 14:12:45 +0200
Subject: [PATCH 075/147] Fix KIT crawler requiring base_url and client_id
 options

---
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 3cd0334..558221d 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -22,6 +22,14 @@ class KitShibbolethBackgroundLoginSuccessful():
 
 
 class KitIliasWebCrawlerSection(IliasWebCrawlerSection):
+    def base_url(self) -> str:
+        return _ILIAS_URL
+
+    def client_id(self) -> str:
+        # KIT ILIAS uses the Shibboleth service for authentication. There's no
+        # use for a client id.
+        return "unused"
+
     def tfa_auth(self, authenticators: Dict[str, Authenticator]) -> Optional[Authenticator]:
         value: Optional[str] = self.s.get("tfa_auth")
         if value is None:

From 4b4f72b2ca9e003e9cc92dd097ddc0ddce870e02 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 10 May 2024 14:34:20 +0200
Subject: [PATCH 076/147] Fix command name

---
 PFERD/cli/command_ilias_web.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/cli/command_ilias_web.py b/PFERD/cli/command_ilias_web.py
index 58a7934..77a1657 100644
--- a/PFERD/cli/command_ilias_web.py
+++ b/PFERD/cli/command_ilias_web.py
@@ -18,7 +18,7 @@ GROUP = SUBPARSER.add_argument_group(
 )
 
 GROUP.add_argument(
-    "--ilias-url",
+    "--base-url",
     type=str,
     metavar="BASE_URL",
     help="The base url of the ilias instance"

From 89b44c69a71f07885a2118467012802abf7cc52f Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 10 May 2024 14:35:19 +0200
Subject: [PATCH 077/147] Update docs

All config file options must be documented in CONFIG.md. The README.md
is just a starting point. To avoid duplicated info, I've moved most of
the docs to CONFIG.md.
---
 CONFIG.md | 68 ++++++++++++++++++++++++++++++++++++++++++++++---------
 README.md | 61 ++++++++-----------------------------------------
 2 files changed, 67 insertions(+), 62 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index 25496e0..7766d39 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -4,11 +4,11 @@ A config file consists of sections. A section begins with a `[section]` header,
 which is followed by a list of `key = value` pairs. Comments must be on their
 own line and start with `#`. Multiline values must be indented beyond their key.
 Boolean values can be `yes` or `no`. For more details and some examples on the
-format, see the [configparser documentation][1] ([interpolation][2] is
-disabled).
+format, see the [configparser documentation][cp-file]
+([interpolation][cp-interp] is disabled).
 
-[1]: <https://docs.python.org/3/library/configparser.html#supported-ini-file-structure> "Supported INI File Structure"
-[2]: <https://docs.python.org/3/library/configparser.html#interpolation-of-values> "Interpolation of values"
+[cp-file]: <https://docs.python.org/3/library/configparser.html#supported-ini-file-structure> "Supported INI File Structure"
+[cp-interp]: <https://docs.python.org/3/library/configparser.html#interpolation-of-values> "Interpolation of values"
 
 ## The `DEFAULT` section
 
@@ -154,6 +154,52 @@ requests is likely a good idea.
   matches, the given link is downloaded as a file. This is used to extract
   files from KIT-IPD pages. (Default: `^.*?[^/]+\.(pdf|zip|c|cpp|java)$`)
 
+### The `ilias-web` crawler
+
+This crawler crawls a generic ILIAS instance.
+
+Inspired by [this ILIAS downloader][ilias-dl], the following configurations should work
+out of the box for the corresponding universities:
+
+[ilias-dl]: https://github.com/V3lop5/ilias-downloader/blob/main/configs "ilias-downloader configs"
+
+| University    | `base_url`                           | `client_id`   |
+|---------------|--------------------------------------|---------------|
+| FH Aachen     | https://www.ili.fh-aachen.de         | elearning     |
+| Uni Köln      | https://www.ilias.uni-koeln.de/ilias | uk            |
+| Uni Konstanz  | https://ilias.uni-konstanz.de        | ILIASKONSTANZ |
+| Uni Stuttgart | https://ilias3.uni-stuttgart.de      | Uni_Stuttgart |
+
+If your university isn't listed, try navigating to your instance's login page.
+Assuming no custom login service is used, the URL will look something like this:
+
+```jinja
+{{ base_url }}/login.php?client_id={{ client_id }}&cmd=force_login&lang=
+```
+
+If the values work, feel free to submit a PR and add them to the table above.
+
+- `base_url`: The URL where the ILIAS instance is located. (Required)
+- `client_id`: An ID used for authentication. (Required)
+- `target`: The ILIAS element to crawl. (Required)
+    - `desktop`: Crawl your personal desktop
+    - `<course id>`: Crawl the course with the given id
+    - `<url>`: Crawl a given element by URL (preferably the permanent URL linked
+      at the bottom of its ILIAS page)
+- `auth`: Name of auth section to use for login. (Required)
+- `links`: How to represent external links. (Default: `fancy`)
+    - `ignore`: Don't download links.
+    - `plaintext`: A text file containing only the URL.
+    - `fancy`: A HTML file looking like the ILIAS link element.
+    - `internet-shortcut`: An internet shortcut file (`.url` file).
+- `link_redirect_delay`: Time (in seconds) until `fancy` link files will
+  redirect to the actual URL. Set to a negative value to disable the automatic
+  redirect. (Default: `-1`)
+- `videos`: Whether to download videos. (Default: `no`)
+- `forums`: Whether to download forum threads. (Default: `no`)
+- `http_timeout`: The timeout (in seconds) for all HTTP requests. (Default:
+  `20.0`)
+
 ### The `kit-ilias-web` crawler
 
 This crawler crawls the KIT ILIAS instance.
@@ -232,10 +278,10 @@ is stored in the keyring.
 
 ### The `pass` authenticator
 
-This authenticator queries the [`pass` password manager][3] for a username and
-password. It tries to be mostly compatible with [browserpass][4] and
-[passff][5], so see those links for an overview of the format. If PFERD fails
-to load your password, you can use the `--explain` flag to see why.
+This authenticator queries the [`pass` password manager][pass] for a username
+and password. It tries to be mostly compatible with [browserpass][browserpass]
+and [passff][passff], so see those links for an overview of the format. If PFERD
+fails to load your password, you can use the `--explain` flag to see why.
 
 - `passname`: The name of the password to use (Required)
 - `username_prefixes`: A comma-separated list of username line prefixes
@@ -243,9 +289,9 @@ to load your password, you can use the `--explain` flag to see why.
 - `password_prefixes`: A comma-separated list of password line prefixes
   (Default: `password,pass,secret`)
 
-[3]: <https://www.passwordstore.org/> "Pass: The Standard Unix Password Manager"
-[4]: <https://github.com/browserpass/browserpass-extension#organizing-password-store> "Organizing password store"
-[5]: <https://github.com/passff/passff#multi-line-format> "Multi-line format"
+[pass]: <https://www.passwordstore.org/> "Pass: The Standard Unix Password Manager"
+[browserpass]: <https://github.com/browserpass/browserpass-extension#organizing-password-store> "Organizing password store"
+[passff]: <https://github.com/passff/passff#multi-line-format> "Multi-line format"
 
 ### The `tfa` authenticator
 
diff --git a/README.md b/README.md
index abdf607..d5d7980 100644
--- a/README.md
+++ b/README.md
@@ -56,7 +56,16 @@ Also, you can download most ILIAS pages directly like this:
 $ pferd kit-ilias-web <url> <output_directory>
 ```
 
-[You want to use another ILIAS instance? Read here!](#other-ilias-instances)
+PFERD supports other ILIAS instances as well, using the `ilias-web` crawler (see
+the [config section on `ilias-web`](CONFIG.md#the-ilias-web-crawler) for more
+detail on the `base-url` and `client-id` parameters):
+
+```
+$ pferd ilias-web \
+    --base-url https://ilias.my-university.example \
+    --client-id My_University desktop \
+    <output_directory>
+```
 
 However, the CLI only lets you download a single thing at a time, and the
 resulting command can grow long quite quickly. Because of this, PFERD can also
@@ -147,53 +156,3 @@ type = kit-ilias-web
 auth = auth:ilias
 target = 1337420
 ```
-
-## Other ILIAS instances
-
-PFERD was originally developed for the KIT ILIAS instance, so you might have to adapt some parts of the code for full compatibility with your instance.
-
-To run PFERD against any instance using the integrated ILIAS authentication, add the `--base-url` and `--client-id` flags to the CLI command, e.g.
-
-```
-$ pferd ilias-web --base-url https://ilias.my-university.example --client-id My_University desktop <output_directory>
-```
-
-To use a config file for the given instance, extend the example above as follows:
-
-```ini
-[DEFAULT]
-# instance related settings
-base_url = https://ilias.my-university.example
-client_id = My_University
-type = ilias-web
-
-# same as described above
-[auth:ilias]
-...
-
-[crawl:Foo]
-auth = auth:ilias
-target = 1337420
-```
-
-## Example configuration
-
-Inspired by [this ILIAS downloader](https://github.com/V3lop5/ilias-downloader/blob/main/configs), the following configurations should work out of the box for the corresponding universities:
-
-| University    | `base_url`                           | `client_id`   |
-|---------------|--------------------------------------|---------------|
-| FH Aachen     | https://www.ili.fh-aachen.de         | elearning     |
-| Uni Koeln     | https://www.ilias.uni-koeln.de/ilias | uk            |
-| Uni Konstanz  | https://ilias.uni-konstanz.de        | ILIASKONSTANZ |
-| Uni Stuttgart | https://ilias3.uni-stuttgart.de      | Uni_Stuttgart |
-
-
-### My university isn't listed
-
-No problem, your university might also just work fine. To retrieve the values required for your instance, navigate to the corresponding login page. You should be redirected to
-
-```jinja
-{{ base_url }}/login.php?client_id={{ client_id }}&cmd=force_login&lang=
-```
-
-From this URL, you can extract your required values. Feel free to open a PR to add your configuration to the table above. This will help people find out if their ILIAS instance is supported too.

From 819c6673c7724a8810ccceb0c09f2f214dea4763 Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Fri, 10 May 2024 14:37:12 +0200
Subject: [PATCH 078/147] Update changelog

---
 CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f244a9b..6de08a3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Added
+- Generic `ilias-web` crawler and `ilias-web` CLI command
+
 ### Fixed
 - Normalization of meeting names in cards
 

From 422cf05f15e1d7acf095c7dfe888e2c572c9b83a Mon Sep 17 00:00:00 2001
From: Pavel Zwerschke <pavelzw@gmail.com>
Date: Sat, 11 May 2024 10:26:19 +0200
Subject: [PATCH 079/147] Move all configuration into pyproject.toml, add x86
 mac to CI

---
 .github/dependabot.yml                  | 10 +++++++
 .github/workflows/build-and-release.yml | 23 ++++++++++-----
 mypy.ini                                | 11 -------
 pyproject.toml                          | 39 +++++++++++++++++++++++++
 scripts/check                           |  2 +-
 scripts/format                          |  4 +--
 scripts/setup                           |  2 +-
 setup.cfg                               | 23 ---------------
 8 files changed, 69 insertions(+), 45 deletions(-)
 create mode 100644 .github/dependabot.yml
 delete mode 100644 mypy.ini
 delete mode 100644 setup.cfg

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000..3891848
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,10 @@
+version: 2
+updates:
+  - package-ecosystem: github-actions
+    directory: /
+    schedule:
+      interval: monthly
+    groups:
+      gh-actions:
+        patterns:
+          - "*"
diff --git a/.github/workflows/build-and-release.yml b/.github/workflows/build-and-release.yml
index 83a36e4..740c233 100644
--- a/.github/workflows/build-and-release.yml
+++ b/.github/workflows/build-and-release.yml
@@ -1,6 +1,9 @@
 name: build-and-release
 
-on: push
+on:
+  push:
+    branches: [master]
+  pull_request:
 
 defaults:
   run:
@@ -13,13 +16,12 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-latest, windows-latest, macos-latest]
+        os: [ubuntu-latest, windows-latest, macos-13, macos-latest]
         python: ["3.9"]
     steps:
+      - uses: actions/checkout@v4
 
-      - uses: actions/checkout@v3
-
-      - uses: actions/setup-python@v4
+      - uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python }}
 
@@ -34,7 +36,12 @@ jobs:
         run: ./scripts/setup --no-pip
 
       - name: Run checks
-        run: ./scripts/check
+        run: |
+          ./scripts/check
+          ./scripts/format
+
+      - name: Assert no changes
+        run: git diff --exit-code
 
       - name: Build
         run: ./scripts/build
@@ -65,10 +72,11 @@ jobs:
         run: |
           mv pferd-ubuntu-latest pferd-linux
           mv pferd-windows-latest pferd-windows.exe
+          mv pferd-macos-13 pferd-mac-x86_64
           mv pferd-macos-latest pferd-mac
 
       - name: Create release
-        uses: softprops/action-gh-release@v1
+        uses: softprops/action-gh-release@v2
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         with:
@@ -76,3 +84,4 @@ jobs:
             pferd-linux
             pferd-windows.exe
             pferd-mac
+            pferd-mac-x86_64
diff --git a/mypy.ini b/mypy.ini
deleted file mode 100644
index 14509d6..0000000
--- a/mypy.ini
+++ /dev/null
@@ -1,11 +0,0 @@
-[mypy]
-disallow_any_generics = True
-disallow_untyped_defs = True
-disallow_incomplete_defs = True
-no_implicit_optional = True
-warn_unused_ignores = True
-warn_unreachable = True
-show_error_context = True
-
-[mypy-rich.*,bs4,keyring]
-ignore_missing_imports = True
diff --git a/pyproject.toml b/pyproject.toml
index 9787c3b..bc67e1c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,3 +1,42 @@
 [build-system]
 requires = ["setuptools", "wheel"]
 build-backend = "setuptools.build_meta"
+
+[project]
+name = "PFERD"
+dependencies = [
+  "aiohttp>=3.8.1",
+  "beautifulsoup4>=4.10.0",
+  "rich>=11.0.0",
+  "keyring>=23.5.0",
+  "certifi>=2021.10.8"
+]
+dynamic = ["version"]
+requires-python = ">=3.9"
+
+[project.scripts]
+pferd = "PFERD.__main__:main"
+
+[tool.setuptools.dynamic]
+version = {attr = "PFERD.version.VERSION"}
+
+[tool.flake8]
+max-line-length = 110
+
+[tool.isort]
+line_length = 110
+
+[tool.autopep8]
+max_line_length = 110
+in-place = true
+recursive = true
+
+[tool.mypy]
+disallow_any_generics = true
+disallow_untyped_defs = true
+disallow_incomplete_defs = true
+no_implicit_optional = true
+warn_unused_ignores = true
+warn_unreachable = true
+show_error_context = true
+ignore_missing_imports = true
diff --git a/scripts/check b/scripts/check
index 2283951..aea2783 100755
--- a/scripts/check
+++ b/scripts/check
@@ -2,5 +2,5 @@
 
 set -e
 
-mypy PFERD
+mypy .
 flake8 PFERD
diff --git a/scripts/format b/scripts/format
index d8917ef..981cd75 100755
--- a/scripts/format
+++ b/scripts/format
@@ -2,5 +2,5 @@
 
 set -e
 
-autopep8 --recursive --in-place PFERD
-isort PFERD
+autopep8 .
+isort .
diff --git a/scripts/setup b/scripts/setup
index f6680bb..0114266 100755
--- a/scripts/setup
+++ b/scripts/setup
@@ -13,5 +13,5 @@ pip install --upgrade setuptools
 pip install --editable .
 
 # Installing tools and type hints
-pip install --upgrade mypy flake8 autopep8 isort pyinstaller
+pip install --upgrade mypy flake8 flake8-pyproject autopep8 isort pyinstaller
 pip install --upgrade types-chardet types-certifi
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index 2378c48..0000000
--- a/setup.cfg
+++ /dev/null
@@ -1,23 +0,0 @@
-[metadata]
-name = PFERD
-version = attr: PFERD.version.VERSION
-
-[options]
-packages = find:
-python_requires = >=3.9
-install_requires =
-  aiohttp>=3.8.1
-  beautifulsoup4>=4.10.0
-  rich>=11.0.0
-  keyring>=23.5.0
-  certifi>=2021.10.8
-
-[options.entry_points]
-console_scripts =
-  pferd = PFERD.__main__:main
-
-[flake8]
-max_line_length = 110
-
-[isort]
-line_length = 110

From 318226d7cba2a2191c3021ee526237c8c82f0808 Mon Sep 17 00:00:00 2001
From: Pavel Zwerschke <pavelzw@gmail.com>
Date: Sat, 11 May 2024 10:27:54 +0200
Subject: [PATCH 080/147] fix bump-version script

---
 scripts/bump-version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/bump-version b/scripts/bump-version
index 4479ef8..e341a4e 100755
--- a/scripts/bump-version
+++ b/scripts/bump-version
@@ -1,8 +1,8 @@
 #!/usr/bin/env python3
 
 import argparse
-import time
 import re
+import time
 from subprocess import run
 
 

From b29b6f93f81bed50afdc248757e467ff0db0cb68 Mon Sep 17 00:00:00 2001
From: Pavel Zwerschke <pavelzw@gmail.com>
Date: Sat, 11 May 2024 16:09:46 +0200
Subject: [PATCH 081/147] run ci twice

Co-authored-by: Garmelon <joscha@plugh.de>
---
 .github/workflows/build-and-release.yml | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/.github/workflows/build-and-release.yml b/.github/workflows/build-and-release.yml
index 740c233..dc7d4cc 100644
--- a/.github/workflows/build-and-release.yml
+++ b/.github/workflows/build-and-release.yml
@@ -1,9 +1,6 @@
 name: build-and-release
 
-on:
-  push:
-    branches: [master]
-  pull_request:
+on: [push, pull_request]
 
 defaults:
   run:

From 21a266e302034289507ee1e4b21da1d3d55a46ae Mon Sep 17 00:00:00 2001
From: Joscha <joscha@plugh.de>
Date: Sat, 11 May 2024 16:33:10 +0200
Subject: [PATCH 082/147] Update upload-artifact action to v4

https://github.com/actions/upload-artifact/blob/main/docs/MIGRATION.md#multiple-uploads-to-the-same-named-artifact
---
 .github/workflows/build-and-release.yml | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/build-and-release.yml b/.github/workflows/build-and-release.yml
index dc7d4cc..1f60c59 100644
--- a/.github/workflows/build-and-release.yml
+++ b/.github/workflows/build-and-release.yml
@@ -49,9 +49,9 @@ jobs:
         run: mv dist/pferd* dist/pferd-${{ matrix.os }}
 
       - name: Upload binary
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
-          name: Binaries
+          name: pferd-${{ matrix.os }}
           path: dist/pferd-${{ matrix.os }}
 
   release:
@@ -61,9 +61,10 @@ jobs:
     steps:
 
       - name: Download binaries
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@v4
         with:
-          name: Binaries
+          pattern: pferd-*
+          merge-multiple: true
 
       - name: Rename binaries
         run: |

From c897d9e2f50d3c281bbf4a10c2dc7bb960ec202f Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 26 Jun 2024 16:39:24 +0200
Subject: [PATCH 083/147] Support finding entries for course overview page

Related to issue #93
---
 CHANGELOG.md                        |  2 ++
 PFERD/crawl/ilias/kit_ilias_html.py | 10 +++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6de08a3..b93bd33 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,8 @@ ambiguous situations.
 
 ### Added
 - Generic `ilias-web` crawler and `ilias-web` CLI command
+- Support for the course overview page. Using this URL as a target might cause
+  duplication warnings, as subgroups are listed separately.
 
 ### Fixed
 - Normalization of meeting names in cards
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 54d56a0..4c1d798 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -349,6 +349,9 @@ class IliasPage:
         might_be_info = self._soup.find("form", attrs={"name": lambda x: x == "formInfoScreen"}) is not None
         return self._page_type == IliasElementType.INFO_TAB and might_be_info
 
+    def _is_course_overview_page(self) -> bool:
+        return "baseClass=ilmembershipoverviewgui" in self._page_url
+
     def _select_content_page_url(self) -> Optional[IliasPageElement]:
         tab = self._soup.find(
             id="tab_view_content",
@@ -686,8 +689,13 @@ class IliasPage:
     def _find_normal_entries(self) -> List[IliasPageElement]:
         result: List[IliasPageElement] = []
 
+        links: List[Tag] = []
         # Fetch all links and throw them to the general interpreter
-        links: List[Tag] = self._soup.select("a.il_ContainerItemTitle")
+        if self._is_course_overview_page():
+            log.explain("Page is a course overview page, adjusting link selector")
+            links.extend(self._soup.select(".il-item-title > a"))
+        else:
+            links.extend(self._soup.select("a.il_ContainerItemTitle"))
 
         for link in links:
             abs_url = self._abs_url_from_link(link)

From 19beb8f07b68c39e04d9c24a897a3d08c919e529 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 31 Jul 2024 22:02:43 +0200
Subject: [PATCH 084/147] Document course overview downloading in config.md

---
 CONFIG.md | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index 7766d39..9a6eb4a 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -182,10 +182,14 @@ If the values work, feel free to submit a PR and add them to the table above.
 - `base_url`: The URL where the ILIAS instance is located. (Required)
 - `client_id`: An ID used for authentication. (Required)
 - `target`: The ILIAS element to crawl. (Required)
-    - `desktop`: Crawl your personal desktop
+    - `desktop`: Crawl your personal desktop / dashboard
     - `<course id>`: Crawl the course with the given id
     - `<url>`: Crawl a given element by URL (preferably the permanent URL linked
-      at the bottom of its ILIAS page)
+      at the bottom of its ILIAS page).  
+      This also supports the "My Courses" overview page to download *all*
+      courses. Note that this might produce confusing local directory layouts
+      and duplication warnings if you are a member of an ILIAS group. The
+      `desktop` target is generally preferable.
 - `auth`: Name of auth section to use for login. (Required)
 - `links`: How to represent external links. (Default: `fancy`)
     - `ignore`: Don't download links.

From 4f9e2ab48d5dc2a1ba4c2ccb8de987db900d2ed8 Mon Sep 17 00:00:00 2001
From: "Mr. Pine" <git@mr-pine.de>
Date: Mon, 21 Oct 2024 15:21:33 +0200
Subject: [PATCH 085/147] Support named capture groups in regex transformers
 (#94)

---
 CHANGELOG.md         | 1 +
 CONFIG.md            | 3 ++-
 PFERD/transformer.py | 4 ++++
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b93bd33..e56b011 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -26,6 +26,7 @@ ambiguous situations.
 - Generic `ilias-web` crawler and `ilias-web` CLI command
 - Support for the course overview page. Using this URL as a target might cause
   duplication warnings, as subgroups are listed separately.
+- Support for named capture groups in regex transforms
 
 ### Fixed
 - Normalization of meeting names in cards
diff --git a/CONFIG.md b/CONFIG.md
index 9a6eb4a..a52506d 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -394,7 +394,8 @@ matches `SOURCE`, the output path is created using `TARGET` as template.
 be referred to as `{g<n>}` (e.g. `{g3}`). `{g0}` refers to the original path.
 If capturing group *n*'s contents are a valid integer, the integer value is
 available as `{i<n>}` (e.g. `{i3}`). If capturing group *n*'s contents are a
-valid float, the float value is available as `{f<n>}` (e.g. `{f3}`). If a
+valid float, the float value is available as `{f<n>}` (e.g. `{f3}`). Named capture
+groups (e.g. `(?P<name>)`) are available by their name (e.g. `{name}`). If a
 capturing group is not present (e.g. when matching the string `cd` with the
 regex `(ab)?cd`), the corresponding variables are not defined.
 
diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index 1a56e27..a48c827 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -110,6 +110,10 @@ class ExactReTf(Transformation):
             except ValueError:
                 pass
 
+        named_groups: Dict[str, str] = match.groupdict()
+        for name, capture in named_groups.items():
+            locals_dir[name] = capture
+
         result = eval(f"f{right!r}", {}, locals_dir)
         return Transformed(PurePath(result))
 

From f9bb2e41cfa28443d22a41953a79262b2e2b83d1 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 21 Oct 2024 16:28:49 +0200
Subject: [PATCH 086/147] Sanitize slashes in exercise container names

---
 CHANGELOG.md                           |  1 +
 PFERD/crawl/ilias/ilias_web_crawler.py |  3 +++
 PFERD/crawl/ilias/kit_ilias_html.py    | 36 ++++++++++++++++++--------
 3 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e56b011..5f6e5d0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,6 +30,7 @@ ambiguous situations.
 
 ### Fixed
 - Normalization of meeting names in cards
+- Sanitization of slashes in exercise container names
 
 ## 3.5.2 - 2024-04-14
 
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 59f28b8..b77f4fc 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -328,6 +328,9 @@ instance's greatest bottleneck.
             )
         self._visited_urls[element.url] = parent_path
 
+        # element.name might contain `/` if the crawler created nested elements,
+        # so we can not sanitize it here. We trust in the output dir to thwart worst-case
+        # directory escape attacks.
         element_path = PurePath(parent_path, element.name)
 
         if element.type in _VIDEO_ELEMENTS:
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 4c1d798..a3b9459 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -71,12 +71,17 @@ class IliasPageElement:
         url: str,
         name: str,
         mtime: Optional[datetime] = None,
-        description: Optional[str] = None
+        description: Optional[str] = None,
+        skip_sanitize: bool = False
     ) -> 'IliasPageElement':
         if typ == IliasElementType.MEETING:
-            normalized = _sanitize_path_name(IliasPageElement._normalize_meeting_name(name))
+            normalized = IliasPageElement._normalize_meeting_name(name)
             log.explain(f"Normalized meeting name from {name!r} to {normalized!r}")
             name = normalized
+
+        if not skip_sanitize:
+            name = _sanitize_path_name(name)
+
         return IliasPageElement(typ, url, name, mtime, description)
 
     @staticmethod
@@ -648,15 +653,15 @@ class IliasPage:
                 # Two divs, side by side. Left is the name, right is the link ==> get left
                 # sibling
                 file_name = file_link.parent.findPrevious(name="div").getText().strip()
-                file_name = _sanitize_path_name(file_name)
                 url = self._abs_url_from_link(file_link)
 
                 log.explain(f"Found exercise entry {file_name!r}")
                 results.append(IliasPageElement.create_new(
                     IliasElementType.FILE,
                     url,
-                    container_name + "/" + file_name,
-                    None  # We do not have any timestamp
+                    _sanitize_path_name(container_name) + "/" + _sanitize_path_name(file_name),
+                    mtime=None,  # We do not have any timestamp
+                    skip_sanitize=True
                 ))
 
             # Find all links to file listings (e.g. "Submitted Files" for groups)
@@ -674,14 +679,15 @@ class IliasPage:
                 label_container: Tag = parent_container.find(
                     attrs={"class": lambda x: x and "control-label" in x}
                 )
-                file_name = _sanitize_path_name(label_container.getText().strip())
+                file_name = label_container.getText().strip()
                 url = self._abs_url_from_link(listing)
                 log.explain(f"Found exercise detail {file_name!r} at {url}")
                 results.append(IliasPageElement.create_new(
                     IliasElementType.EXERCISE_FILES,
                     url,
-                    container_name + "/" + file_name,
-                    None  # we do not have any timestamp
+                    _sanitize_path_name(container_name) + "/" + _sanitize_path_name(file_name),
+                    None,  # we do not have any timestamp
+                    skip_sanitize=True
                 ))
 
         return results
@@ -699,7 +705,8 @@ class IliasPage:
 
         for link in links:
             abs_url = self._abs_url_from_link(link)
-            parents = self._find_upwards_folder_hierarchy(link)
+            # Make sure parents are sanitized. We do not want accidental parents
+            parents = [_sanitize_path_name(x) for x in self._find_upwards_folder_hierarchy(link)]
 
             if parents:
                 element_name = "/".join(parents) + "/" + _sanitize_path_name(link.getText())
@@ -723,7 +730,12 @@ class IliasPage:
 
             log.explain(f"Found {element_name!r}")
             result.append(IliasPageElement.create_new(
-                element_type, abs_url, element_name, description=description))
+                element_type,
+                abs_url,
+                element_name,
+                description=description,
+                skip_sanitize=True
+            ))
 
         result += self._find_cards()
         result += self._find_mediacast_videos()
@@ -869,7 +881,9 @@ class IliasPage:
         full_path = name + "." + file_type
 
         log.explain(f"Found file {full_path!r}")
-        return IliasPageElement.create_new(IliasElementType.FILE, url, full_path, modification_date)
+        return IliasPageElement.create_new(
+            IliasElementType.FILE, url, full_path, modification_date, skip_sanitize=True
+        )
 
     def _find_cards(self) -> List[IliasPageElement]:
         result: List[IliasPageElement] = []

From 52fdeae7528473b7f8a8fc9e4bdb4e6029a0be8a Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 21 Oct 2024 23:41:08 +0200
Subject: [PATCH 087/147] Crawl custom item groups as folders

---
 CHANGELOG.md                        | 1 +
 PFERD/crawl/ilias/kit_ilias_html.py | 7 +++++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5f6e5d0..a755d93 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,7 @@ ambiguous situations.
 - Support for the course overview page. Using this URL as a target might cause
   duplication warnings, as subgroups are listed separately.
 - Support for named capture groups in regex transforms
+- Crawl custom item groups as folders
 
 ### Fixed
 - Normalization of meeting names in cards
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index a3b9459..34e02ba 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -817,11 +817,14 @@ class IliasPage:
             # ILIAS has proper accordions and weird blocks that look like normal headings,
             # but some JS later transforms them into an accordion.
 
-            # This is for these weird JS-y blocks
+            # This is for these weird JS-y blocks and custom item groups
             if "ilContainerItemsContainer" in parent.get("class"):
+                data_store_url = parent.parent.get("data-store-url", "").lower()
+                is_custom_item_group = "baseclass=ilcontainerblockpropertiesstoragegui" in data_store_url \
+                                       and "cont_block_id=" in data_store_url
                 # I am currently under the impression that *only* those JS blocks have an
                 # ilNoDisplay class.
-                if "ilNoDisplay" not in parent.get("class"):
+                if not is_custom_item_group and "ilNoDisplay" not in parent.get("class"):
                     continue
                 prev: Tag = parent.findPreviousSibling("div")
                 if "ilContainerBlockHeader" in prev.get("class"):

From d7f2229978c902d3c9f51e5bb9dbfe99d122e980 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 23 Oct 2024 20:17:47 +0200
Subject: [PATCH 088/147] Bump version to 3.6.0

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a755d93..573cad9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.6.0 - 2024-10-23
+
 ### Added
 - Generic `ilias-web` crawler and `ilias-web` CLI command
 - Support for the course overview page. Using this URL as a target might cause
diff --git a/PFERD/version.py b/PFERD/version.py
index 47da4a6..0bf695b 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.5.2"
+VERSION = "3.6.0"

From c54c3bcfa157631af1d55a210b60ad3bfc64f972 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 27 Oct 2024 10:50:59 +0100
Subject: [PATCH 089/147] Fix crawling of favorites

---
 CHANGELOG.md                           | 3 +++
 PFERD/crawl/ilias/ilias_web_crawler.py | 9 +++------
 PFERD/crawl/ilias/kit_ilias_html.py    | 9 +++++++--
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 573cad9..ce20269 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Fixed
+- Personal desktop/dashboard/favorites crawling
+
 ## 3.6.0 - 2024-10-23
 
 ### Added
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index b77f4fc..a566ce5 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -185,12 +185,9 @@ instance's greatest bottleneck.
         await self._crawl_url(root_url, expected_id=course_id)
 
     async def _crawl_desktop(self) -> None:
-        appendix = r"ILIAS\Repository\Provider\RepositoryMainBarProvider|mm_pd_sel_items"
-        appendix = appendix.encode("ASCII").hex()
-        await self._crawl_url(url_set_query_param(
-            urljoin(self._base_url, "/gs_content.php"),
-            "item=", appendix,
-        ))
+        await self._crawl_url(
+            urljoin(self._base_url, "/ilias.php?baseClass=ilDashboardGUI&cmd=show")
+        )
 
     async def _crawl_url(self, url: str, expected_id: Optional[int] = None) -> None:
         maybe_cl = await self.crawl(PurePath("."))
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 34e02ba..98b32c3 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -322,7 +322,7 @@ class IliasPage:
         return False
 
     def _is_personal_desktop(self) -> bool:
-        return self._soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x})
+        return "baseclass=ildashboardgui" in self._page_url.lower() and "&cmd=show" in self._page_url.lower()
 
     def _is_content_page(self) -> bool:
         if link := self.get_permalink():
@@ -427,9 +427,14 @@ class IliasPage:
     def _find_personal_desktop_entries(self) -> List[IliasPageElement]:
         items: List[IliasPageElement] = []
 
-        titles: List[Tag] = self._soup.select(".il-item-title")
+        titles: List[Tag] = self._soup.select("#block_pditems_0 .il-item-title")
         for title in titles:
             link = title.find("a")
+
+            if not link:
+                log.explain(f"Skipping offline item: {title.getText().strip()!r}")
+                continue
+
             name = _sanitize_path_name(link.text.strip())
             url = self._abs_url_from_link(link)
 

From 739dd958500349dfc54f6a8370a10b122b1e1bee Mon Sep 17 00:00:00 2001
From: Tim <me@scriptim.dev>
Date: Sun, 27 Oct 2024 19:03:47 +0100
Subject: [PATCH 090/147] Use Last-Modified and ETag headers to determine
 KIT-IPD file versions (#95)

Co-authored-by: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
---
 PFERD/crawl/crawler.py         | 11 ++++++-
 PFERD/crawl/http_crawler.py    | 52 +++++++++++++++++++++++++++++++++-
 PFERD/crawl/kit_ipd_crawler.py | 36 +++++++++++++++++++----
 PFERD/output_dir.py            | 15 ++++++++--
 4 files changed, 104 insertions(+), 10 deletions(-)

diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index 0e67c02..dd500e6 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -293,6 +293,8 @@ class Crawler(ABC):
     async def download(
             self,
             path: PurePath,
+            *,
+            etag_differs: Optional[bool] = None,
             mtime: Optional[datetime] = None,
             redownload: Optional[Redownload] = None,
             on_conflict: Optional[OnConflict] = None,
@@ -307,7 +309,14 @@ class Crawler(ABC):
             log.status("[bold bright_black]", "Ignored", fmt_path(path))
             return None
 
-        fs_token = await self._output_dir.download(path, transformed_path, mtime, redownload, on_conflict)
+        fs_token = await self._output_dir.download(
+            path,
+            transformed_path,
+            etag_differs=etag_differs,
+            mtime=mtime,
+            redownload=redownload,
+            on_conflict=on_conflict
+        )
         if fs_token is None:
             log.explain("Answer: No")
             return None
diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index 44ec4dd..39b22f3 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -1,8 +1,9 @@
 import asyncio
 import http.cookies
 import ssl
+from datetime import datetime
 from pathlib import Path, PurePath
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Tuple
 
 import aiohttp
 import certifi
@@ -15,6 +16,8 @@ from ..utils import fmt_real_path
 from ..version import NAME, VERSION
 from .crawler import Crawler, CrawlerSection
 
+ETAGS_CUSTOM_REPORT_VALUE_KEY = "etags"
+
 
 class HttpCrawlerSection(CrawlerSection):
     def http_timeout(self) -> float:
@@ -169,6 +172,53 @@ class HttpCrawler(Crawler):
             log.warn(f"Failed to save cookies to {fmt_real_path(self._cookie_jar_path)}")
             log.warn(str(e))
 
+    def _get_previous_etag_from_report(self, path: PurePath) -> Optional[str]:
+        """
+        If available, retrieves the entity tag for a given path which was stored in the previous report.
+        """
+        if not self._output_dir.prev_report:
+            return None
+
+        etags = self._output_dir.prev_report.get_custom_value(ETAGS_CUSTOM_REPORT_VALUE_KEY) or {}
+        return etags.get(str(path))
+
+    def _add_etag_to_report(self, path: PurePath, etag: Optional[str]) -> None:
+        """
+        Adds an entity tag for a given path to the report's custom values.
+        """
+        if not etag:
+            return
+
+        etags = self._output_dir.report.get_custom_value(ETAGS_CUSTOM_REPORT_VALUE_KEY) or {}
+        etags[str(path)] = etag
+        self._output_dir.report.add_custom_value(ETAGS_CUSTOM_REPORT_VALUE_KEY, etags)
+
+    async def _request_resource_version(self, resource_url: str) -> Tuple[Optional[str], Optional[datetime]]:
+        """
+        Requests the ETag and Last-Modified headers of a resource via a HEAD request.
+        If no entity tag / modification date can be obtained, the according value will be None.
+        """
+        try:
+            async with self.session.head(resource_url) as resp:
+                if resp.status != 200:
+                    return None, None
+
+                etag_header = resp.headers.get("ETag")
+                last_modified_header = resp.headers.get("Last-Modified")
+
+                if last_modified_header:
+                    try:
+                        # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Last-Modified#directives
+                        datetime_format = "%a, %d %b %Y %H:%M:%S GMT"
+                        last_modified = datetime.strptime(last_modified_header, datetime_format)
+                    except ValueError:
+                        # last_modified remains None
+                        pass
+
+                return etag_header, last_modified
+        except aiohttp.ClientError:
+            return None, None
+
     async def run(self) -> None:
         self._request_count = 0
         self._cookie_jar = aiohttp.CookieJar()
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index c852be0..d9515e2 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -1,6 +1,7 @@
 import os
 import re
 from dataclasses import dataclass
+from datetime import datetime
 from pathlib import PurePath
 from typing import Awaitable, List, Optional, Pattern, Set, Tuple, Union
 from urllib.parse import urljoin
@@ -75,8 +76,11 @@ class KitIpdCrawler(HttpCrawler):
                 if isinstance(item, KitIpdFolder):
                     tasks.append(self._crawl_folder(item))
                 else:
+                    # do this here to at least be sequential and not parallel (rate limiting is hard, as the
+                    # crawl abstraction does not hold for these requests)
+                    etag, mtime = await self._request_resource_version(item.url)
                     # Orphan files are placed in the root folder
-                    tasks.append(self._download_file(PurePath("."), item))
+                    tasks.append(self._download_file(PurePath("."), item, etag, mtime))
 
         await self.gather(tasks)
 
@@ -85,18 +89,36 @@ class KitIpdCrawler(HttpCrawler):
         if not await self.crawl(path):
             return
 
-        tasks = [self._download_file(path, file) for file in folder.files]
+        tasks = []
+        for file in folder.files:
+            # do this here to at least be sequential and not parallel (rate limiting is hard, as the crawl
+            # abstraction does not hold for these requests)
+            etag, mtime = await self._request_resource_version(file.url)
+            tasks.append(self._download_file(path, file, etag, mtime))
 
         await self.gather(tasks)
 
-    async def _download_file(self, parent: PurePath, file: KitIpdFile) -> None:
+    async def _download_file(
+        self,
+        parent: PurePath,
+        file: KitIpdFile,
+        etag: Optional[str],
+        mtime: Optional[datetime]
+    ) -> None:
         element_path = parent / file.name
-        maybe_dl = await self.download(element_path)
+
+        prev_etag = self._get_previous_etag_from_report(element_path)
+        etag_differs = None if prev_etag is None else prev_etag != etag
+
+        maybe_dl = await self.download(element_path, etag_differs=etag_differs, mtime=mtime)
         if not maybe_dl:
+            # keep storing the known file's etag
+            if prev_etag:
+                self._add_etag_to_report(element_path, prev_etag)
             return
 
         async with maybe_dl as (bar, sink):
-            await self._stream_from_url(file.url, sink, bar)
+            await self._stream_from_url(file.url, element_path, sink, bar)
 
     async def _fetch_items(self) -> Set[Union[KitIpdFile, KitIpdFolder]]:
         page, url = await self.get_page()
@@ -146,7 +168,7 @@ class KitIpdCrawler(HttpCrawler):
     def _abs_url_from_link(self, url: str, link_tag: Tag) -> str:
         return urljoin(url, link_tag.get("href"))
 
-    async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar) -> None:
+    async def _stream_from_url(self, url: str, path: PurePath, sink: FileSink, bar: ProgressBar) -> None:
         async with self.session.get(url, allow_redirects=False) as resp:
             if resp.status == 403:
                 raise CrawlError("Received a 403. Are you within the KIT network/VPN?")
@@ -159,6 +181,8 @@ class KitIpdCrawler(HttpCrawler):
 
             sink.done()
 
+            self._add_etag_to_report(path, resp.headers.get("ETag"))
+
     async def get_page(self) -> Tuple[BeautifulSoup, str]:
         async with self.session.get(self._url) as request:
             # The web page for Algorithmen für Routenplanung contains some
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index e9e9b93..09cf133 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -57,6 +57,7 @@ class OnConflict(Enum):
 
 @dataclass
 class Heuristics:
+    etag_differs: Optional[bool]
     mtime: Optional[datetime]
 
 
@@ -233,8 +234,16 @@ class OutputDirectory:
 
         remote_newer = None
 
+        # ETag should be a more reliable indicator than mtime, so we check it first
+        if heuristics.etag_differs is not None:
+            remote_newer = heuristics.etag_differs
+            if remote_newer:
+                log.explain("Remote file's entity tag differs")
+            else:
+                log.explain("Remote file's entity tag is the same")
+
         # Python on Windows crashes when faced with timestamps around the unix epoch
-        if heuristics.mtime and (os.name != "nt" or heuristics.mtime.year > 1970):
+        if remote_newer is None and heuristics.mtime and (os.name != "nt" or heuristics.mtime.year > 1970):
             mtime = heuristics.mtime
             remote_newer = mtime.timestamp() > stat.st_mtime
             if remote_newer:
@@ -366,6 +375,8 @@ class OutputDirectory:
             self,
             remote_path: PurePath,
             path: PurePath,
+            *,
+            etag_differs: Optional[bool] = None,
             mtime: Optional[datetime] = None,
             redownload: Optional[Redownload] = None,
             on_conflict: Optional[OnConflict] = None,
@@ -375,7 +386,7 @@ class OutputDirectory:
         MarkConflictError.
         """
 
-        heuristics = Heuristics(mtime)
+        heuristics = Heuristics(etag_differs, mtime)
         redownload = self._redownload if redownload is None else redownload
         on_conflict = self._on_conflict if on_conflict is None else on_conflict
         local_path = self.resolve(path)

From 8fbd1978affb059f79bab374030afa139b341a6c Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 28 Oct 2024 18:52:09 +0100
Subject: [PATCH 091/147] Fix crawling of nested courses

---
 CHANGELOG.md                           |  1 +
 PFERD/crawl/ilias/ilias_web_crawler.py |  7 ++++---
 PFERD/crawl/ilias/kit_ilias_html.py    | 19 +++++++++++--------
 3 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ce20269..3ee3f43 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ ambiguous situations.
 
 ### Fixed
 - Personal desktop/dashboard/favorites crawling
+- Crawling of nested courses
 
 ## 3.6.0 - 2024-10-23
 
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index a566ce5..1ff4910 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -81,23 +81,24 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
 
 
 _DIRECTORY_PAGES: Set[IliasElementType] = {
+    IliasElementType.COURSE,
     IliasElementType.EXERCISE,
     IliasElementType.EXERCISE_FILES,
     IliasElementType.FOLDER,
     IliasElementType.INFO_TAB,
-    IliasElementType.MEETING,
     IliasElementType.MEDIACAST_VIDEO_FOLDER,
+    IliasElementType.MEETING,
     IliasElementType.OPENCAST_VIDEO_FOLDER,
     IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
 }
 
 _VIDEO_ELEMENTS: Set[IliasElementType] = {
-    IliasElementType.MEDIACAST_VIDEO_FOLDER,
     IliasElementType.MEDIACAST_VIDEO,
+    IliasElementType.MEDIACAST_VIDEO_FOLDER,
     IliasElementType.OPENCAST_VIDEO,
-    IliasElementType.OPENCAST_VIDEO_PLAYER,
     IliasElementType.OPENCAST_VIDEO_FOLDER,
     IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
+    IliasElementType.OPENCAST_VIDEO_PLAYER,
 }
 
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 98b32c3..31107cf 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -15,25 +15,26 @@ TargetType = Union[str, int]
 
 
 class IliasElementType(Enum):
+    BOOKING = "booking"
+    COURSE = "course"
     EXERCISE = "exercise"
     EXERCISE_FILES = "exercise_files"  # own submitted files
-    TEST = "test"  # an online test. Will be ignored currently.
     FILE = "file"
     FOLDER = "folder"
     FORUM = "forum"
-    LINK = "link"
     INFO_TAB = "info_tab"
     LEARNING_MODULE = "learning_module"
-    BOOKING = "booking"
-    MEETING = "meeting"
-    SURVEY = "survey"
-    SCORM_LEARNING_MODULE = "scorm_learning_module"
-    MEDIACAST_VIDEO_FOLDER = "mediacast_video_folder"
+    LINK = "link"
     MEDIACAST_VIDEO = "mediacast_video"
+    MEDIACAST_VIDEO_FOLDER = "mediacast_video_folder"
+    MEETING = "meeting"
     OPENCAST_VIDEO = "opencast_video"
-    OPENCAST_VIDEO_PLAYER = "opencast_video_player"
     OPENCAST_VIDEO_FOLDER = "opencast_video_folder"
     OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED = "opencast_video_folder_maybe_paginated"
+    OPENCAST_VIDEO_PLAYER = "opencast_video_player"
+    SCORM_LEARNING_MODULE = "scorm_learning_module"
+    SURVEY = "survey"
+    TEST = "test"  # an online test. Will be ignored currently.
 
 
 @dataclass
@@ -968,6 +969,8 @@ class IliasPage:
             return IliasElementType.LINK
         if "book" in icon["class"]:
             return IliasElementType.BOOKING
+        if "crsr" in icon["class"]:
+            return IliasElementType.COURSE
         if "frm" in icon["class"]:
             return IliasElementType.FORUM
         if "sess" in icon["class"]:

From c1046498e7ff6ab054c65db4a133af6e53e93f03 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 28 Oct 2024 19:15:40 +0100
Subject: [PATCH 092/147] Fix download of links without a target URL

They are now downloaded as links to the empty url.
---
 CHANGELOG.md                           |  1 +
 PFERD/crawl/ilias/ilias_web_crawler.py | 25 +++++++++++++++++--------
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3ee3f43..8bc6f06 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ ambiguous situations.
 ### Fixed
 - Personal desktop/dashboard/favorites crawling
 - Crawling of nested courses
+- Downloading of links with no target URL
 
 ## 3.6.0 - 2024-10-23
 
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 1ff4910..8fbd90f 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -491,17 +491,26 @@ instance's greatest bottleneck.
             self._write_link_content(link_template, element.url, element.name, element.description, sink)
 
     async def _resolve_link_target(self, export_url: str) -> str:
-        async with self.session.get(export_url, allow_redirects=False) as resp:
-            # No redirect means we were authenticated
-            if hdrs.LOCATION not in resp.headers:
-                return soupify(await resp.read()).select_one("a").get("href").strip()
+        async def impl() -> Optional[str]:
+            async with self.session.get(export_url, allow_redirects=False) as resp:
+                # No redirect means we were authenticated
+                if hdrs.LOCATION not in resp.headers:
+                    return soupify(await resp.read()).select_one("a").get("href").strip()
+                # We are either unauthenticated or the link is not active
+                new_url = resp.headers[hdrs.LOCATION].lower()
+                if "baseclass=illinkresourcehandlergui" in new_url and "cmd=infoscreen" in new_url:
+                    return ""
+                return None
+
+        target = await impl()
+        if target is not None:
+            return target
 
         await self._authenticate()
 
-        async with self.session.get(export_url, allow_redirects=False) as resp:
-            # No redirect means we were authenticated
-            if hdrs.LOCATION not in resp.headers:
-                return soupify(await resp.read()).select_one("a").get("href").strip()
+        target = await impl()
+        if target is not None:
+            return target
 
         raise CrawlError("resolve_link_target failed even after authenticating")
 

From 71c65e89d178cde2e2a625d078eba713139a3601 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 28 Oct 2024 19:31:50 +0100
Subject: [PATCH 093/147] Internalize images in course descriptions

---
 PFERD/crawl/ilias/ilias_web_crawler.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 8fbd90f..08add07 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -476,6 +476,7 @@ instance's greatest bottleneck.
 
         async with dl as (bar, sink):
             description = clean(insert_base_markup(description))
+            description = await self.internalize_images(description)
             sink.file.write(description.prettify().encode("utf-8"))
             sink.done()
 

From d7a2b6e019a994a9e18e00cffe14da2db763e025 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 28 Oct 2024 19:32:16 +0100
Subject: [PATCH 094/147] Delete videos from course descriptions

---
 CHANGELOG.md                            | 3 +++
 PFERD/crawl/ilias/ilias_html_cleaner.py | 5 +++++
 2 files changed, 8 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8bc6f06..f635719 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Changed
+- Remove videos from description pages
+
 ### Fixed
 - Personal desktop/dashboard/favorites crawling
 - Crawling of nested courses
diff --git a/PFERD/crawl/ilias/ilias_html_cleaner.py b/PFERD/crawl/ilias/ilias_html_cleaner.py
index 5495304..0075784 100644
--- a/PFERD/crawl/ilias/ilias_html_cleaner.py
+++ b/PFERD/crawl/ilias/ilias_html_cleaner.py
@@ -85,6 +85,11 @@ def clean(soup: BeautifulSoup) -> BeautifulSoup:
         if isinstance(type(children[0]), Comment):
             dummy.decompose()
 
+    # Delete video figures, as they can not be internalized anyway
+    for video in soup.select(".ilc_media_cont_MediaContainerHighlighted .ilPageVideo"):
+        if figure := video.find_parent("figure"):
+            figure.decompose()
+
     for hrule_imposter in soup.find_all(class_="ilc_section_Separator"):
         hrule_imposter.insert(0, soup.new_tag("hr"))
 

From 81d6ff53c43f0ed7cc49f66c5505f36c0bf0f1b3 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 28 Oct 2024 19:34:45 +0100
Subject: [PATCH 095/147] Respect row flex in descriptions

---
 CHANGELOG.md                            | 1 +
 PFERD/crawl/ilias/ilias_html_cleaner.py | 7 +++++++
 2 files changed, 8 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f635719..e14f785 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ ambiguous situations.
 - Personal desktop/dashboard/favorites crawling
 - Crawling of nested courses
 - Downloading of links with no target URL
+- Handle row flex on description pages
 
 ## 3.6.0 - 2024-10-23
 
diff --git a/PFERD/crawl/ilias/ilias_html_cleaner.py b/PFERD/crawl/ilias/ilias_html_cleaner.py
index 0075784..e82906f 100644
--- a/PFERD/crawl/ilias/ilias_html_cleaner.py
+++ b/PFERD/crawl/ilias/ilias_html_cleaner.py
@@ -12,6 +12,13 @@ _STYLE_TAG_CONTENT = """
       font-weight: bold;
     }
 
+    .row-flex {
+      display: flex;
+    }
+    .row-flex-wrap {
+      flex-wrap: wrap;
+    }
+
     .accordion-head {
       background-color: #f5f7fa;
       padding: 0.5rem 0;

From fa71a9f44fe11a367a396b0cd80b745fe7ef6fe8 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 28 Oct 2024 20:15:55 +0100
Subject: [PATCH 096/147] Add support for mob videos in page descriptions

---
 CHANGELOG.md                           |  3 +++
 PFERD/crawl/ilias/ilias_web_crawler.py | 16 ++++++++++---
 PFERD/crawl/ilias/kit_ilias_html.py    | 33 ++++++++++++++++++++++++++
 3 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e14f785..d9431bc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Added
+- Support for MOB videos in page descriptions
+
 ### Changed
 - Remove videos from description pages
 
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 08add07..73fed9c 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -389,6 +389,8 @@ instance's greatest bottleneck.
             return await self._handle_opencast_video(element, element_path)
         elif element.type == IliasElementType.MEDIACAST_VIDEO:
             return await self._handle_file(element, element_path)
+        elif element.type == IliasElementType.MOB_VIDEO:
+            return await self._handle_file(element, element_path, is_video=True)
         elif element.type in _DIRECTORY_PAGES:
             return await self._handle_ilias_page(element.url, element, element_path)
         else:
@@ -631,18 +633,19 @@ instance's greatest bottleneck.
         self,
         element: IliasPageElement,
         element_path: PurePath,
+        is_video: bool = False,
     ) -> Optional[Coroutine[Any, Any, None]]:
         maybe_dl = await self.download(element_path, mtime=element.mtime)
         if not maybe_dl:
             return None
-        return self._download_file(element, maybe_dl)
+        return self._download_file(element, maybe_dl, is_video)
 
     @_iorepeat(3, "downloading file")
     @anoncritical
-    async def _download_file(self, element: IliasPageElement, dl: DownloadToken) -> None:
+    async def _download_file(self, element: IliasPageElement, dl: DownloadToken, is_video: bool) -> None:
         assert dl  # The function is only reached when dl is not None
         async with dl as (bar, sink):
-            await self._stream_from_url(element.url, sink, bar, is_video=False)
+            await self._stream_from_url(element.url, sink, bar, is_video)
 
     async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar, is_video: bool) -> None:
         async def try_stream() -> bool:
@@ -671,6 +674,13 @@ instance's greatest bottleneck.
                 if is_video and "html" in resp.content_type:
                     return False
 
+                # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Range
+                if content_range := resp.headers.get(hdrs.CONTENT_RANGE, default=None):
+                    parts = content_range.split("/")
+                    if len(parts) == 2 and parts[1].isdigit():
+                        bar.set_total(int(parts[1]))
+
+                # Prefer the content length header
                 if resp.content_length:
                     bar.set_total(resp.content_length)
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 31107cf..e0c87ad 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -28,6 +28,7 @@ class IliasElementType(Enum):
     MEDIACAST_VIDEO = "mediacast_video"
     MEDIACAST_VIDEO_FOLDER = "mediacast_video_folder"
     MEETING = "meeting"
+    MOB_VIDEO = "mob_video"
     OPENCAST_VIDEO = "opencast_video"
     OPENCAST_VIDEO_FOLDER = "opencast_video_folder"
     OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED = "opencast_video_folder_maybe_paginated"
@@ -745,6 +746,7 @@ class IliasPage:
 
         result += self._find_cards()
         result += self._find_mediacast_videos()
+        result += self._find_mob_videos()
 
         return result
 
@@ -773,6 +775,37 @@ class IliasPage:
 
         return videos
 
+    def _find_mob_videos(self) -> List[IliasPageElement]:
+        videos: List[IliasPageElement] = []
+
+        for figure in self._soup.select("figure.ilc_media_cont_MediaContainerHighlighted"):
+            title = figure.select_one("figcaption").getText().strip() + ".mp4"
+            video_element = figure.select_one("video")
+            if not video_element:
+                _unexpected_html_warning()
+                log.warn_contd(f"No <video> element found for mob video '{title}'")
+                continue
+
+            url = None
+            for source in video_element.select("source"):
+                if source.get("type", "") == "video/mp4":
+                    url = source.get("src")
+                    break
+
+            if url is None:
+                _unexpected_html_warning()
+                log.warn_contd(f"No <source> element found for mob video '{title}'")
+                continue
+
+            videos.append(IliasPageElement.create_new(
+                typ=IliasElementType.MOB_VIDEO,
+                url=self._abs_url_from_relative(url),
+                name=_sanitize_path_name(title),
+                mtime=None
+            ))
+
+        return videos
+
     def _find_mediacast_video_mtime(self, enclosing_td: Tag) -> Optional[datetime]:
         description_td: Tag = enclosing_td.findPreviousSibling("td")
         if not description_td:

From f5273f7ca0c7a899bac2251aaa8087196db77c5e Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 2 Nov 2024 21:53:47 +0100
Subject: [PATCH 097/147] Collapse ilias url crawling into normal page crawling

---
 PFERD/crawl/ilias/ilias_web_crawler.py | 74 ++++++--------------------
 1 file changed, 16 insertions(+), 58 deletions(-)

diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 73fed9c..14dde89 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -191,79 +191,28 @@ instance's greatest bottleneck.
         )
 
     async def _crawl_url(self, url: str, expected_id: Optional[int] = None) -> None:
-        maybe_cl = await self.crawl(PurePath("."))
-        if not maybe_cl:
-            return
-        cl = maybe_cl  # Not mypy's fault, but explained here: https://github.com/python/mypy/issues/2608
-
-        elements: List[IliasPageElement] = []
-        # A list as variable redefinitions are not propagated to outer scopes
-        description: List[BeautifulSoup] = []
-
-        @_iorepeat(3, "crawling url")
-        async def gather_elements() -> None:
-            elements.clear()
-            async with cl:
-                next_stage_url: Optional[str] = url
-                current_parent = None
-
-                # Duplicated code, but the root page is special - we want to avoid fetching it twice!
-                while next_stage_url:
-                    soup = await self._get_page(next_stage_url, root_page_allowed=True)
-
-                    if current_parent is None and expected_id is not None:
-                        perma_link = IliasPage.get_soup_permalink(soup)
-                        if not perma_link or "crs_" not in perma_link:
-                            raise CrawlError("Invalid course id? Didn't find anything looking like a course")
-
-                    log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
-                    log.explain(f"URL: {next_stage_url}")
-                    page = IliasPage(soup, next_stage_url, current_parent)
-                    if next_element := page.get_next_stage_element():
-                        current_parent = next_element
-                        next_stage_url = next_element.url
-                    else:
-                        next_stage_url = None
-
-                elements.extend(page.get_child_elements())
-                if info_tab := page.get_info_tab():
-                    elements.append(info_tab)
-                if description_string := page.get_description():
-                    description.append(description_string)
-
-        # Fill up our task list with the found elements
-        await gather_elements()
-
-        if description:
-            await self._download_description(PurePath("."), description[0])
-
-        elements.sort(key=lambda e: e.id())
-
-        tasks: List[Awaitable[None]] = []
-        for element in elements:
-            if handle := await self._handle_ilias_element(PurePath("."), element):
-                tasks.append(asyncio.create_task(handle))
-
-        # And execute them
-        await self.gather(tasks)
+        if awaitable := await self._handle_ilias_page(url, None, PurePath("."), expected_id):
+            await awaitable
 
     async def _handle_ilias_page(
         self,
         url: str,
-        parent: IliasPageElement,
+        parent: Optional[IliasPageElement],
         path: PurePath,
+        expected_course_id: Optional[int] = None,
     ) -> Optional[Coroutine[Any, Any, None]]:
         maybe_cl = await self.crawl(path)
         if not maybe_cl:
             return None
-        return self._crawl_ilias_page(url, parent, maybe_cl)
+        return self._crawl_ilias_page(url, parent, maybe_cl, expected_course_id)
 
     @anoncritical
     async def _crawl_ilias_page(
         self,
         url: str,
-        parent: IliasPageElement,
+        parent: Optional[IliasPageElement],
         cl: CrawlToken,
+        expected_course_id: Optional[int] = None,
     ) -> None:
         elements: List[IliasPageElement] = []
         # A list as variable redefinitions are not propagated to outer scopes
@@ -280,6 +229,15 @@ instance's greatest bottleneck.
                     soup = await self._get_page(next_stage_url)
                     log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
                     log.explain(f"URL: {next_stage_url}")
+
+                    # If we expect to find a root course, enforce it
+                    if current_parent is None and expected_course_id is not None:
+                        perma_link = IliasPage.get_soup_permalink(soup)
+                        if not perma_link or "crs_" not in perma_link:
+                            raise CrawlError("Invalid course id? Didn't find anything looking like a course")
+                        if str(expected_course_id) not in perma_link:
+                            raise CrawlError(f"Expected course id {expected_course_id} but got {perma_link}")
+
                     page = IliasPage(soup, next_stage_url, current_parent)
                     if next_element := page.get_next_stage_element():
                         current_parent = next_element

From f5c4e828160cf408fcaffd1300ed5920976a8580 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 2 Nov 2024 22:17:26 +0100
Subject: [PATCH 098/147] Delay ilias loop detection after transform

This allows users to filter out duplicated elements and suppress the
warning.
---
 CHANGELOG.md                           |  2 ++
 PFERD/crawl/ilias/ilias_web_crawler.py | 36 +++++++++++++++++---------
 2 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d9431bc..3926f7a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,8 @@ ambiguous situations.
 
 ### Changed
 - Remove videos from description pages
+- Perform ILIAS cycle detection after processing the transform to allow
+  ignoring duplicated elements
 
 ### Fixed
 - Personal desktop/dashboard/favorites crawling
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 14dde89..941b265 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -197,20 +197,23 @@ instance's greatest bottleneck.
     async def _handle_ilias_page(
         self,
         url: str,
-        parent: Optional[IliasPageElement],
+        current_element: Optional[IliasPageElement],
         path: PurePath,
         expected_course_id: Optional[int] = None,
     ) -> Optional[Coroutine[Any, Any, None]]:
         maybe_cl = await self.crawl(path)
         if not maybe_cl:
             return None
-        return self._crawl_ilias_page(url, parent, maybe_cl, expected_course_id)
+        if current_element:
+            self._ensure_not_seen(current_element, path)
+
+        return self._crawl_ilias_page(url, current_element, maybe_cl, expected_course_id)
 
     @anoncritical
     async def _crawl_ilias_page(
         self,
         url: str,
-        parent: Optional[IliasPageElement],
+        current_element: Optional[IliasPageElement],
         cl: CrawlToken,
         expected_course_id: Optional[int] = None,
     ) -> None:
@@ -223,7 +226,7 @@ instance's greatest bottleneck.
             elements.clear()
             async with cl:
                 next_stage_url: Optional[str] = url
-                current_parent = parent
+                current_parent = current_element
 
                 while next_stage_url:
                     soup = await self._get_page(next_stage_url)
@@ -276,14 +279,6 @@ instance's greatest bottleneck.
         parent_path: PurePath,
         element: IliasPageElement,
     ) -> Optional[Coroutine[Any, Any, None]]:
-        if element.url in self._visited_urls:
-            raise CrawlWarning(
-                f"Found second path to element {element.name!r} at {element.url!r}. "
-                + f"First path: {fmt_path(self._visited_urls[element.url])}. "
-                + f"Second path: {fmt_path(parent_path)}."
-            )
-        self._visited_urls[element.url] = parent_path
-
         # element.name might contain `/` if the crawler created nested elements,
         # so we can not sanitize it here. We trust in the output dir to thwart worst-case
         # directory escape attacks.
@@ -424,6 +419,8 @@ instance's greatest bottleneck.
         if not maybe_dl:
             return None
 
+        self._ensure_not_seen(element, element_path)
+
         return self._download_booking(element, link_template_maybe, maybe_dl)
 
     @anoncritical
@@ -498,6 +495,8 @@ instance's greatest bottleneck.
         if not maybe_dl:
             return None
 
+        self._ensure_not_seen(element, element_path)
+
         # If we have every file from the cached mapping already, we can ignore this and bail
         if self._all_opencast_videos_locally_present(element, maybe_dl.path):
             # Mark all existing videos as known to ensure they do not get deleted during cleanup.
@@ -596,6 +595,8 @@ instance's greatest bottleneck.
         maybe_dl = await self.download(element_path, mtime=element.mtime)
         if not maybe_dl:
             return None
+        self._ensure_not_seen(element, element_path)
+
         return self._download_file(element, maybe_dl, is_video)
 
     @_iorepeat(3, "downloading file")
@@ -731,6 +732,8 @@ instance's greatest bottleneck.
         maybe_cl = await self.crawl(element_path)
         if not maybe_cl:
             return None
+        self._ensure_not_seen(element, element_path)
+
         return self._crawl_learning_module(element, maybe_cl)
 
     @_iorepeat(3, "crawling learning module")
@@ -853,6 +856,15 @@ instance's greatest bottleneck.
                 elem.attrs["src"] = "https:" + elem.attrs["src"]
         return tag
 
+    def _ensure_not_seen(self, element: IliasPageElement, parent_path: PurePath) -> None:
+        if element.url in self._visited_urls:
+            raise CrawlWarning(
+                f"Found second path to element {element.name!r} at {element.url!r}. "
+                + f"First path: {fmt_path(self._visited_urls[element.url])}. "
+                + f"Second path: {fmt_path(parent_path)}."
+            )
+        self._visited_urls[element.url] = parent_path
+
     async def _get_page(self, url: str, root_page_allowed: bool = False) -> BeautifulSoup:
         auth_id = await self._current_auth_id()
         async with self.session.get(url) as request:

From 26e802d88b367e5bcc9f72ab6d40b4f107dd9ca4 Mon Sep 17 00:00:00 2001
From: Tim <me@scriptim.dev>
Date: Mon, 4 Nov 2024 00:32:32 +0100
Subject: [PATCH 099/147] Add clickable links to file names in the printed
 report (#100)

Co-authored-by: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
---
 CHANGELOG.md           |  1 +
 PFERD/crawl/crawler.py |  4 ++++
 PFERD/pferd.py         | 14 ++++++++++----
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3926f7a..c6c9cb9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ ambiguous situations.
 
 ### Added
 - Support for MOB videos in page descriptions
+- Clickable links in the report to directly open new/modified/not-deleted files
 
 ### Changed
 - Remove videos from description pages
diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index dd500e6..fda1307 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -258,6 +258,10 @@ class Crawler(ABC):
     def prev_report(self) -> Optional[Report]:
         return self._output_dir.prev_report
 
+    @property
+    def output_dir(self) -> OutputDirectory:
+        return self._output_dir
+
     @staticmethod
     async def gather(awaitables: Sequence[Awaitable[Any]]) -> List[Any]:
         """
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index b30a04a..850e68e 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -1,5 +1,6 @@
-from pathlib import Path
+from pathlib import Path, PurePath
 from typing import Dict, List, Optional
+from urllib.parse import quote
 
 from rich.markup import escape
 
@@ -168,19 +169,24 @@ class Pferd:
             log.report("")
             log.report(f"[bold bright_cyan]Report[/] for {escape(name)}")
 
+            def fmt_path_link(relative_path: PurePath) -> str:
+                # We need to URL-encode the path because it might contain spaces or special characters
+                link = f"file://{quote(str(crawler.output_dir.resolve(relative_path).absolute()))}"
+                return f"[link={link}]{fmt_path(relative_path)}[/link]"
+
             something_changed = False
             for path in sorted(crawler.report.added_files):
                 something_changed = True
-                log.report(f"  [bold bright_green]Added[/] {fmt_path(path)}")
+                log.report(f"  [bold bright_green]Added[/] {fmt_path_link(path)}")
             for path in sorted(crawler.report.changed_files):
                 something_changed = True
-                log.report(f"  [bold bright_yellow]Changed[/] {fmt_path(path)}")
+                log.report(f"  [bold bright_yellow]Changed[/] {fmt_path_link(path)}")
             for path in sorted(crawler.report.deleted_files):
                 something_changed = True
                 log.report(f"  [bold bright_magenta]Deleted[/] {fmt_path(path)}")
             for path in sorted(crawler.report.not_deleted_files):
                 something_changed = True
-                log.report_not_deleted(f"  [bold bright_magenta]Not deleted[/] {fmt_path(path)}")
+                log.report_not_deleted(f"  [bold bright_magenta]Not deleted[/] {fmt_path_link(path)}")
 
             for warning in crawler.report.encountered_warnings:
                 something_changed = True

From 59832002470b8691f89e1bf822cee56c8e03ee10 Mon Sep 17 00:00:00 2001
From: Tim <me@scriptim.dev>
Date: Mon, 4 Nov 2024 23:53:48 +0100
Subject: [PATCH 100/147] Treat headings as folders in kit-ipd crawler (#99)

---
 CHANGELOG.md                   |  1 +
 PFERD/crawl/http_crawler.py    | 26 +++++++++++
 PFERD/crawl/kit_ipd_crawler.py | 84 ++++++++++++++++------------------
 3 files changed, 66 insertions(+), 45 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c6c9cb9..12cda26 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,6 +30,7 @@ ambiguous situations.
 - Remove videos from description pages
 - Perform ILIAS cycle detection after processing the transform to allow
   ignoring duplicated elements
+- Parse headings (h1-h3) as folders in kit-ipd crawler
 
 ### Fixed
 - Personal desktop/dashboard/favorites crawling
diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index 39b22f3..fe8a360 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -8,6 +8,7 @@ from typing import Any, Dict, List, Optional, Tuple
 import aiohttp
 import certifi
 from aiohttp.client import ClientTimeout
+from bs4 import Tag
 
 from ..auth import Authenticator
 from ..config import Config
@@ -172,6 +173,31 @@ class HttpCrawler(Crawler):
             log.warn(f"Failed to save cookies to {fmt_real_path(self._cookie_jar_path)}")
             log.warn(str(e))
 
+    @staticmethod
+    def get_folder_structure_from_heading_hierarchy(file_link: Tag, drop_h1: bool = False) -> PurePath:
+        """
+        Retrieves the hierarchy of headings associated with the give file link and constructs a folder
+        structure from them.
+
+        <h1> level headings usually only appear once and serve as the page title, so they would introduce
+        redundant nesting. To avoid this, <h1> headings are ignored via the drop_h1 parameter.
+        """
+
+        def find_associated_headings(tag: Tag, level: int) -> PurePath:
+            if level == 0 or (level == 1 and drop_h1):
+                return PurePath()
+
+            level_heading = tag.find_previous(name=f"h{level}")
+
+            if level_heading is None:
+                return find_associated_headings(tag, level - 1)
+
+            folder_name = level_heading.getText().strip()
+            return find_associated_headings(level_heading, level - 1) / folder_name
+
+        # start at level <h3> because paragraph-level headings are usually too granular for folder names
+        return find_associated_headings(file_link, 3)
+
     def _get_previous_etag_from_report(self, path: PurePath) -> Optional[str]:
         """
         If available, retrieves the entity tag for a given path which was stored in the previous report.
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index d9515e2..e1d13a7 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -3,7 +3,7 @@ import re
 from dataclasses import dataclass
 from datetime import datetime
 from pathlib import PurePath
-from typing import Awaitable, List, Optional, Pattern, Set, Tuple, Union
+from typing import Any, Awaitable, Generator, Iterable, List, Optional, Pattern, Tuple, Union
 from urllib.parse import urljoin
 
 from bs4 import BeautifulSoup, Tag
@@ -32,24 +32,24 @@ class KitIpdCrawlerSection(HttpCrawlerSection):
         return re.compile(regex)
 
 
-@dataclass(unsafe_hash=True)
+@dataclass
 class KitIpdFile:
     name: str
     url: str
 
+    def explain(self) -> None:
+        log.explain(f"File {self.name!r} (href={self.url!r})")
+
 
 @dataclass
 class KitIpdFolder:
     name: str
-    files: List[KitIpdFile]
+    entries: List[Union[KitIpdFile, "KitIpdFolder"]]
 
     def explain(self) -> None:
         log.explain_topic(f"Folder {self.name!r}")
-        for file in self.files:
-            log.explain(f"File {file.name!r} (href={file.url!r})")
-
-    def __hash__(self) -> int:
-        return self.name.__hash__()
+        for entry in self.entries:
+            entry.explain()
 
 
 class KitIpdCrawler(HttpCrawler):
@@ -73,28 +73,33 @@ class KitIpdCrawler(HttpCrawler):
 
         async with maybe_cl:
             for item in await self._fetch_items():
+                item.explain()
                 if isinstance(item, KitIpdFolder):
-                    tasks.append(self._crawl_folder(item))
+                    tasks.append(self._crawl_folder(PurePath("."), item))
                 else:
+                    log.explain_topic(f"Orphan file {item.name!r} (href={item.url!r})")
+                    log.explain("Attributing it to root folder")
                     # do this here to at least be sequential and not parallel (rate limiting is hard, as the
                     # crawl abstraction does not hold for these requests)
                     etag, mtime = await self._request_resource_version(item.url)
-                    # Orphan files are placed in the root folder
                     tasks.append(self._download_file(PurePath("."), item, etag, mtime))
 
         await self.gather(tasks)
 
-    async def _crawl_folder(self, folder: KitIpdFolder) -> None:
-        path = PurePath(folder.name)
+    async def _crawl_folder(self, parent: PurePath, folder: KitIpdFolder) -> None:
+        path = parent / folder.name
         if not await self.crawl(path):
             return
 
         tasks = []
-        for file in folder.files:
-            # do this here to at least be sequential and not parallel (rate limiting is hard, as the crawl
-            # abstraction does not hold for these requests)
-            etag, mtime = await self._request_resource_version(file.url)
-            tasks.append(self._download_file(path, file, etag, mtime))
+        for entry in folder.entries:
+            if isinstance(entry, KitIpdFolder):
+                tasks.append(self._crawl_folder(path, entry))
+            else:
+                # do this here to at least be sequential and not parallel (rate limiting is hard, as the crawl
+                # abstraction does not hold for these requests)
+                etag, mtime = await self._request_resource_version(entry.url)
+                tasks.append(self._download_file(path, entry, etag, mtime))
 
         await self.gather(tasks)
 
@@ -120,42 +125,31 @@ class KitIpdCrawler(HttpCrawler):
         async with maybe_dl as (bar, sink):
             await self._stream_from_url(file.url, element_path, sink, bar)
 
-    async def _fetch_items(self) -> Set[Union[KitIpdFile, KitIpdFolder]]:
+    async def _fetch_items(self) -> Iterable[Union[KitIpdFile, KitIpdFolder]]:
         page, url = await self.get_page()
         elements: List[Tag] = self._find_file_links(page)
-        items: Set[Union[KitIpdFile, KitIpdFolder]] = set()
 
+        # do not add unnecessary nesting for a single <h1> heading
+        drop_h1: bool = len(page.find_all(name="h1")) <= 1
+
+        folder_tree: KitIpdFolder = KitIpdFolder(".", [])
         for element in elements:
-            folder_label = self._find_folder_label(element)
-            if folder_label:
-                folder = self._extract_folder(folder_label, url)
-                if folder not in items:
-                    items.add(folder)
-                    folder.explain()
-            else:
-                file = self._extract_file(element, url)
-                items.add(file)
-                log.explain_topic(f"Orphan file {file.name!r} (href={file.url!r})")
-                log.explain("Attributing it to root folder")
+            parent = HttpCrawler.get_folder_structure_from_heading_hierarchy(element, drop_h1)
+            file = self._extract_file(element, url)
 
-        return items
+            current_folder: KitIpdFolder = folder_tree
+            for folder_name in parent.parts:
+                # helps the type checker to verify that current_folder is indeed a folder
+                def subfolders() -> Generator[KitIpdFolder, Any, None]:
+                    return (entry for entry in current_folder.entries if isinstance(entry, KitIpdFolder))
 
-    def _extract_folder(self, folder_tag: Tag, url: str) -> KitIpdFolder:
-        files: List[KitIpdFile] = []
-        name = folder_tag.getText().strip()
+                if not any(entry.name == folder_name for entry in subfolders()):
+                    current_folder.entries.append(KitIpdFolder(folder_name, []))
+                current_folder = next(entry for entry in subfolders() if entry.name == folder_name)
 
-        container: Tag = folder_tag.findNextSibling(name="table")
-        for link in self._find_file_links(container):
-            files.append(self._extract_file(link, url))
+            current_folder.entries.append(file)
 
-        return KitIpdFolder(name, files)
-
-    @staticmethod
-    def _find_folder_label(file_link: Tag) -> Optional[Tag]:
-        enclosing_table: Tag = file_link.findParent(name="table")
-        if enclosing_table is None:
-            return None
-        return enclosing_table.findPreviousSibling(name=re.compile("^h[1-6]$"))
+        return folder_tree.entries
 
     def _extract_file(self, link: Tag, url: str) -> KitIpdFile:
         url = self._abs_url_from_link(url, link)

From 596b6a7688a5101ec6e44a13f602c4673eb5e8e0 Mon Sep 17 00:00:00 2001
From: PinieP <59698589+PinieP@users.noreply.github.com>
Date: Tue, 5 Nov 2024 18:30:34 +0100
Subject: [PATCH 101/147] Add support for non-KIT shibboleth login (#98)

Co-authored-by: Mr-Pine <git@mr-pine.de>
Co-authored-by: I-Al-Istannen <I-Al-Istannen@users.noreply.github.com>
---
 CHANGELOG.md                               |   1 +
 CONFIG.md                                  |  21 ++-
 LICENSE                                    |   2 +-
 PFERD/crawl/http_crawler.py                |   7 +-
 PFERD/crawl/ilias/async_helper.py          |   3 +-
 PFERD/crawl/ilias/ilias_web_crawler.py     |  98 ++++++----
 PFERD/crawl/ilias/kit_ilias_web_crawler.py | 210 +--------------------
 PFERD/crawl/ilias/shibboleth_login.py      | 128 +++++++++++++
 8 files changed, 226 insertions(+), 244 deletions(-)
 create mode 100644 PFERD/crawl/ilias/shibboleth_login.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 12cda26..8024bba 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ ambiguous situations.
 ### Added
 - Support for MOB videos in page descriptions
 - Clickable links in the report to directly open new/modified/not-deleted files
+- Support for non KIT shibboleth login
 
 ### Changed
 - Remove videos from description pages
diff --git a/CONFIG.md b/CONFIG.md
index a52506d..9b79be8 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -163,12 +163,13 @@ out of the box for the corresponding universities:
 
 [ilias-dl]: https://github.com/V3lop5/ilias-downloader/blob/main/configs "ilias-downloader configs"
 
-| University    | `base_url`                           | `client_id`   |
-|---------------|--------------------------------------|---------------|
-| FH Aachen     | https://www.ili.fh-aachen.de         | elearning     |
-| Uni Köln      | https://www.ilias.uni-koeln.de/ilias | uk            |
-| Uni Konstanz  | https://ilias.uni-konstanz.de        | ILIASKONSTANZ |
-| Uni Stuttgart | https://ilias3.uni-stuttgart.de      | Uni_Stuttgart |
+| University    | `base_url`                              | `login_type` | `client_id`   |
+|---------------|-----------------------------------------|--------------|---------------|
+| FH Aachen     | https://www.ili.fh-aachen.de            | local        | elearning     |
+| Uni Köln      | https://www.ilias.uni-koeln.de/ilias    | local        | uk            |
+| Uni Konstanz  | https://ilias.uni-konstanz.de           | local        | ILIASKONSTANZ |
+| Uni Stuttgart | https://ilias3.uni-stuttgart.de         | local        | Uni_Stuttgart |
+| Uni Tübingen  | https://ovidius.uni-tuebingen.de/ilias3 | shibboleth   |               |
 
 If your university isn't listed, try navigating to your instance's login page.
 Assuming no custom login service is used, the URL will look something like this:
@@ -180,7 +181,11 @@ Assuming no custom login service is used, the URL will look something like this:
 If the values work, feel free to submit a PR and add them to the table above.
 
 - `base_url`: The URL where the ILIAS instance is located. (Required)
-- `client_id`: An ID used for authentication. (Required)
+- `login_type`: How you authenticate. (Required)
+    - `local`: Use `client_id` for authentication.
+    - `shibboleth`: Use shibboleth for authentication.
+- `client_id`: An ID used for authentication if `login_type` is `local`. Is
+  ignored if `login_type` is `shibboleth`.
 - `target`: The ILIAS element to crawl. (Required)
     - `desktop`: Crawl your personal desktop / dashboard
     - `<course id>`: Crawl the course with the given id
@@ -191,6 +196,8 @@ If the values work, feel free to submit a PR and add them to the table above.
       and duplication warnings if you are a member of an ILIAS group. The
       `desktop` target is generally preferable.
 - `auth`: Name of auth section to use for login. (Required)
+- `tfa_auth`: Name of auth section to use for two-factor authentication. Only
+  uses the auth section's password. (Default: Anonymous `tfa` authenticator)
 - `links`: How to represent external links. (Default: `fancy`)
     - `ignore`: Don't download links.
     - `plaintext`: A text file containing only the URL.
diff --git a/LICENSE b/LICENSE
index 13fa307..ccccbe3 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 Copyright 2019-2024 Garmelon, I-Al-Istannen, danstooamerican, pavelzw,
                     TheChristophe, Scriptim, thelukasprobst, Toorero,
-                    Mr-Pine, p-fruck
+                    Mr-Pine, p-fruck, PinieP
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index fe8a360..2cc97e1 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -262,7 +262,12 @@ class HttpCrawler(Crawler):
                     connect=self._http_timeout,
                     sock_connect=self._http_timeout,
                     sock_read=self._http_timeout,
-                )
+                ),
+                # See https://github.com/aio-libs/aiohttp/issues/6626
+                # Without this aiohttp will mangle the redirect header from Shibboleth, invalidating the
+                # passed signature. Shibboleth will not accept the broken signature and authentication will
+                # fail.
+                requote_redirect_url=False
         ) as session:
             self.session = session
             try:
diff --git a/PFERD/crawl/ilias/async_helper.py b/PFERD/crawl/ilias/async_helper.py
index 527a819..5e586b1 100644
--- a/PFERD/crawl/ilias/async_helper.py
+++ b/PFERD/crawl/ilias/async_helper.py
@@ -25,9 +25,10 @@ def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Calla
                 except asyncio.exceptions.TimeoutError as e:  # explicit http timeouts in HttpCrawler
                     last_exception = e
                 log.explain_topic(f"Retrying operation {name}. Retries left: {attempts - 1 - round}")
+                log.explain(f"Last exception: {last_exception!r}")
 
             if last_exception:
-                message = f"Error in I/O Operation: {last_exception}"
+                message = f"Error in I/O Operation: {last_exception!r}"
                 if failure_is_error:
                     raise CrawlError(message) from last_exception
                 else:
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 941b265..a6c68f1 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -23,10 +23,16 @@ from .file_templates import Links, learning_module_template
 from .ilias_html_cleaner import clean, insert_base_markup
 from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
                              IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
+from .shibboleth_login import ShibbolethLogin
 
 TargetType = Union[str, int]
 
 
+class LoginTypeLocal:
+    def __init__(self, client_id: str):
+        self.client_id = client_id
+
+
 class IliasWebCrawlerSection(HttpCrawlerSection):
     def base_url(self) -> str:
         base_url = self.s.get("base_url")
@@ -35,12 +41,30 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
 
         return base_url
 
-    def client_id(self) -> str:
-        client_id = self.s.get("client_id")
-        if not client_id:
-            self.missing_value("client_id")
+    def login(self) -> Union[Literal["shibboleth"], LoginTypeLocal]:
+        login_type = self.s.get("login_type")
+        if not login_type:
+            self.missing_value("login_type")
+        if login_type == "shibboleth":
+            return "shibboleth"
+        if login_type == "local":
+            client_id = self.s.get("client_id")
+            if not client_id:
+                self.missing_value("client_id")
+            return LoginTypeLocal(client_id)
 
-        return client_id
+        self.invalid_value("login_type", login_type, "Should be <shibboleth | local>")
+
+    def tfa_auth(
+        self, authenticators: Dict[str, Authenticator]
+    ) -> Optional[Authenticator]:
+        value: Optional[str] = self.s.get("tfa_auth")
+        if value is None:
+            return None
+        auth = authenticators.get(value)
+        if auth is None:
+            self.invalid_value("tfa_auth", value, "No such auth section exists")
+        return auth
 
     def target(self) -> TargetType:
         target = self.s.get("target")
@@ -156,7 +180,13 @@ instance's greatest bottleneck.
 
         self._auth = auth
         self._base_url = section.base_url()
-        self._client_id = section.client_id()
+        self._tfa_auth = section.tfa_auth(authenticators)
+
+        self._login_type = section.login()
+        if isinstance(self._login_type, LoginTypeLocal):
+            self._client_id = self._login_type.client_id
+        else:
+            self._shibboleth_login = ShibbolethLogin(self._base_url, self._auth, self._tfa_auth)
 
         self._target = section.target()
         self._link_file_redirect_delay = section.link_redirect_delay()
@@ -179,7 +209,7 @@ instance's greatest bottleneck.
     async def _crawl_course(self, course_id: int) -> None:
         # Start crawling at the given course
         root_url = url_set_query_param(
-            urljoin(self._base_url, "/goto.php"),
+            urljoin(self._base_url + "/", "goto.php"),
             "target", f"crs_{course_id}",
         )
 
@@ -460,11 +490,12 @@ instance's greatest bottleneck.
                     return ""
                 return None
 
+        auth_id = await self._current_auth_id()
         target = await impl()
         if target is not None:
             return target
 
-        await self._authenticate()
+        await self.authenticate(auth_id)
 
         target = await impl()
         if target is not None:
@@ -935,38 +966,39 @@ instance's greatest bottleneck.
                 return await request.read()
         raise CrawlError("get_authenticated failed even after authenticating")
 
-    # ToDo: Is iorepeat still required?
-    @_iorepeat(3, "Login", failure_is_error=True)
     async def _authenticate(self) -> None:
         # fill the session with the correct cookies
-        params = {
-            "client_id": self._client_id,
-            "cmd": "force_login",
-        }
-        async with self.session.get(urljoin(self._base_url, "/login.php"), params=params) as request:
-            login_page = soupify(await request.read())
+        if self._login_type == "shibboleth":
+            await self._shibboleth_login.login(self.session)
+        else:
+            params = {
+                "client_id": self._client_id,
+                "cmd": "force_login",
+            }
+            async with self.session.get(urljoin(self._base_url, "/login.php"), params=params) as request:
+                login_page = soupify(await request.read())
 
-        login_form = login_page.find("form", attrs={"name": "formlogin"})
-        if login_form is None:
-            raise CrawlError("Could not find the login form! Specified client id might be invalid.")
+            login_form = login_page.find("form", attrs={"name": "formlogin"})
+            if login_form is None:
+                raise CrawlError("Could not find the login form! Specified client id might be invalid.")
 
-        login_url = login_form.attrs.get("action")
-        if login_url is None:
-            raise CrawlError("Could not find the action URL in the login form!")
+            login_url = login_form.attrs.get("action")
+            if login_url is None:
+                raise CrawlError("Could not find the action URL in the login form!")
 
-        username, password = await self._auth.credentials()
+            username, password = await self._auth.credentials()
 
-        login_data = {
-            "username": username,
-            "password": password,
-            "cmd[doStandardAuthentication]": "Login",
-        }
+            login_data = {
+                "username": username,
+                "password": password,
+                "cmd[doStandardAuthentication]": "Login",
+            }
 
-        # do the actual login
-        async with self.session.post(urljoin(self._base_url, login_url), data=login_data) as request:
-            soup = soupify(await request.read())
-            if not self._is_logged_in(soup):
-                self._auth.invalidate_credentials()
+            # do the actual login
+            async with self.session.post(urljoin(self._base_url, login_url), data=login_data) as request:
+                soup = soupify(await request.read())
+                if not self._is_logged_in(soup):
+                    self._auth.invalidate_credentials()
 
     @staticmethod
     def _is_logged_in(soup: BeautifulSoup) -> bool:
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index 558221d..fc1d58f 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -1,23 +1,14 @@
-from typing import Any, Dict, Optional, Union
+from typing import Dict, Literal
 
-import aiohttp
-import yarl
-from bs4 import BeautifulSoup
-
-from ...auth import Authenticator, TfaAuthenticator
+from ...auth import Authenticator
 from ...config import Config
-from ...logging import log
-from ...utils import soupify
-from ..crawler import CrawlError, CrawlWarning
-from .async_helper import _iorepeat
 from .ilias_web_crawler import IliasWebCrawler, IliasWebCrawlerSection
-
-TargetType = Union[str, int]
+from .shibboleth_login import ShibbolethLogin
 
 _ILIAS_URL = "https://ilias.studium.kit.edu"
 
 
-class KitShibbolethBackgroundLoginSuccessful():
+class KitShibbolethBackgroundLoginSuccessful:
     pass
 
 
@@ -25,19 +16,8 @@ class KitIliasWebCrawlerSection(IliasWebCrawlerSection):
     def base_url(self) -> str:
         return _ILIAS_URL
 
-    def client_id(self) -> str:
-        # KIT ILIAS uses the Shibboleth service for authentication. There's no
-        # use for a client id.
-        return "unused"
-
-    def tfa_auth(self, authenticators: Dict[str, Authenticator]) -> Optional[Authenticator]:
-        value: Optional[str] = self.s.get("tfa_auth")
-        if value is None:
-            return None
-        auth = authenticators.get(value)
-        if auth is None:
-            self.invalid_value("tfa_auth", value, "No such auth section exists")
-        return auth
+    def login(self) -> Literal["shibboleth"]:
+        return "shibboleth"
 
 
 class KitIliasWebCrawler(IliasWebCrawler):
@@ -46,184 +26,12 @@ class KitIliasWebCrawler(IliasWebCrawler):
         name: str,
         section: KitIliasWebCrawlerSection,
         config: Config,
-        authenticators: Dict[str, Authenticator]
+        authenticators: Dict[str, Authenticator],
     ):
         super().__init__(name, section, config, authenticators)
 
-        self._shibboleth_login = KitShibbolethLogin(
+        self._shibboleth_login = ShibbolethLogin(
+            _ILIAS_URL,
             self._auth,
             section.tfa_auth(authenticators),
         )
-
-    # We repeat this as the login method in shibboleth doesn't handle I/O errors.
-    # Shibboleth is quite reliable as well, the repeat is likely not critical here.
-    @_iorepeat(3, "Login", failure_is_error=True)
-    async def _authenticate(self) -> None:
-        await self._shibboleth_login.login(self.session)
-
-
-class KitShibbolethLogin:
-    """
-    Login via KIT's shibboleth system.
-    """
-
-    def __init__(self, authenticator: Authenticator, tfa_authenticator: Optional[Authenticator]) -> None:
-        self._auth = authenticator
-        self._tfa_auth = tfa_authenticator
-
-    async def login(self, sess: aiohttp.ClientSession) -> None:
-        """
-        Performs the ILIAS Shibboleth authentication dance and saves the login
-        cookies it receieves.
-
-        This function should only be called whenever it is detected that you're
-        not logged in. The cookies obtained should be good for a few minutes,
-        maybe even an hour or two.
-        """
-
-        # Equivalent: Click on "Mit KIT-Account anmelden" button in
-        # https://ilias.studium.kit.edu/login.php
-        url = f"{_ILIAS_URL}/shib_login.php"
-        data = {
-            "sendLogin": "1",
-            "idp_selection": "https://idp.scc.kit.edu/idp/shibboleth",
-            "il_target": "",
-            "home_organization_selection": "Weiter",
-        }
-        soup: Union[BeautifulSoup, KitShibbolethBackgroundLoginSuccessful] = await _shib_post(sess, url, data)
-
-        if isinstance(soup, KitShibbolethBackgroundLoginSuccessful):
-            return
-
-        # Attempt to login using credentials, if necessary
-        while not self._login_successful(soup):
-            # Searching the form here so that this fails before asking for
-            # credentials rather than after asking.
-            form = soup.find("form", {"class": "full content", "method": "post"})
-            action = form["action"]
-
-            csrf_token = form.find("input", {"name": "csrf_token"})["value"]
-
-            # Equivalent: Enter credentials in
-            # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
-            url = "https://idp.scc.kit.edu" + action
-            username, password = await self._auth.credentials()
-            data = {
-                "_eventId_proceed": "",
-                "j_username": username,
-                "j_password": password,
-                "csrf_token": csrf_token
-            }
-            soup = await _post(sess, url, data)
-
-            if soup.find(id="attributeRelease"):
-                raise CrawlError(
-                    "ILIAS Shibboleth entitlements changed! "
-                    "Please log in once in your browser and review them"
-                )
-
-            if self._tfa_required(soup):
-                soup = await self._authenticate_tfa(sess, soup)
-
-            if not self._login_successful(soup):
-                self._auth.invalidate_credentials()
-
-        # Equivalent: Being redirected via JS automatically
-        # (or clicking "Continue" if you have JS disabled)
-        relay_state = soup.find("input", {"name": "RelayState"})
-        saml_response = soup.find("input", {"name": "SAMLResponse"})
-        url = f"{_ILIAS_URL}/Shibboleth.sso/SAML2/POST"
-        data = {  # using the info obtained in the while loop above
-            "RelayState": relay_state["value"],
-            "SAMLResponse": saml_response["value"],
-        }
-        await sess.post(url, data=data)
-
-    async def _authenticate_tfa(
-        self,
-        session: aiohttp.ClientSession,
-        soup: BeautifulSoup
-    ) -> BeautifulSoup:
-        if not self._tfa_auth:
-            self._tfa_auth = TfaAuthenticator("ilias-anon-tfa")
-
-        tfa_token = await self._tfa_auth.password()
-
-        # Searching the form here so that this fails before asking for
-        # credentials rather than after asking.
-        form = soup.find("form", {"method": "post"})
-        action = form["action"]
-        csrf_token = form.find("input", {"name": "csrf_token"})["value"]
-
-        # Equivalent: Enter token in
-        # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
-        url = "https://idp.scc.kit.edu" + action
-        data = {
-            "_eventId_proceed": "",
-            "j_tokenNumber": tfa_token,
-            "csrf_token": csrf_token
-        }
-        return await _post(session, url, data)
-
-    @staticmethod
-    def _login_successful(soup: BeautifulSoup) -> bool:
-        relay_state = soup.find("input", {"name": "RelayState"})
-        saml_response = soup.find("input", {"name": "SAMLResponse"})
-        return relay_state is not None and saml_response is not None
-
-    @staticmethod
-    def _tfa_required(soup: BeautifulSoup) -> bool:
-        return soup.find(id="j_tokenNumber") is not None
-
-
-async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup:
-    async with session.post(url, data=data) as response:
-        return soupify(await response.read())
-
-
-async def _shib_post(
-    session: aiohttp.ClientSession,
-    url: str,
-    data: Any
-) -> Union[BeautifulSoup, KitShibbolethBackgroundLoginSuccessful]:
-    """
-    aiohttp unescapes '/' and ':' in URL query parameters which is not RFC compliant and rejected
-    by Shibboleth. Thanks a lot. So now we unroll the requests manually, parse location headers and
-    build encoded URL objects ourselves... Who thought mangling location header was a good idea??
-    """
-    log.explain_topic("Shib login POST")
-    async with session.post(url, data=data, allow_redirects=False) as response:
-        location = response.headers.get("location")
-        log.explain(f"Got location {location!r}")
-        if not location:
-            raise CrawlWarning(f"Login failed (1), no location header present at {url}")
-        correct_url = yarl.URL(location, encoded=True)
-        log.explain(f"Corrected location to {correct_url!r}")
-
-        if str(correct_url).startswith(_ILIAS_URL):
-            log.explain("ILIAS recognized our shib token and logged us in in the background, returning")
-            return KitShibbolethBackgroundLoginSuccessful()
-
-        async with session.get(correct_url, allow_redirects=False) as response:
-            location = response.headers.get("location")
-            log.explain(f"Redirected to {location!r} with status {response.status}")
-            # If shib still has a valid session, it will directly respond to the request
-            if location is None:
-                log.explain("Shib recognized us, returning its response directly")
-                return soupify(await response.read())
-
-            as_yarl = yarl.URL(response.url)
-            # Probably not needed anymore, but might catch a few weird situations with a nicer message
-            if not location or not as_yarl.host:
-                raise CrawlWarning(f"Login failed (2), no location header present at {correct_url}")
-
-            correct_url = yarl.URL.build(
-                scheme=as_yarl.scheme,
-                host=as_yarl.host,
-                path=location,
-                encoded=True
-            )
-            log.explain(f"Corrected location to {correct_url!r}")
-
-            async with session.get(correct_url, allow_redirects=False) as response:
-                return soupify(await response.read())
diff --git a/PFERD/crawl/ilias/shibboleth_login.py b/PFERD/crawl/ilias/shibboleth_login.py
new file mode 100644
index 0000000..d57820e
--- /dev/null
+++ b/PFERD/crawl/ilias/shibboleth_login.py
@@ -0,0 +1,128 @@
+from typing import Any, Optional
+
+import aiohttp
+import yarl
+from bs4 import BeautifulSoup
+
+from ...auth import Authenticator, TfaAuthenticator
+from ...logging import log
+from ...utils import soupify
+from ..crawler import CrawlError
+
+
+class ShibbolethLogin:
+    """
+    Login via shibboleth system.
+    """
+
+    def __init__(
+        self, ilias_url: str, authenticator: Authenticator, tfa_authenticator: Optional[Authenticator]
+    ) -> None:
+        self._ilias_url = ilias_url
+        self._auth = authenticator
+        self._tfa_auth = tfa_authenticator
+
+    async def login(self, sess: aiohttp.ClientSession) -> None:
+        """
+        Performs the ILIAS Shibboleth authentication dance and saves the login
+        cookies it receieves.
+
+        This function should only be called whenever it is detected that you're
+        not logged in. The cookies obtained should be good for a few minutes,
+        maybe even an hour or two.
+        """
+
+        # Equivalent: Click on "Mit KIT-Account anmelden" button in
+        # https://ilias.studium.kit.edu/login.php
+        url = f"{self._ilias_url}/shib_login.php"
+        async with sess.get(url) as response:
+            shib_url = response.url
+            if str(shib_url).startswith(self._ilias_url):
+                log.explain(
+                    "ILIAS recognized our shib token and logged us in in the background, returning"
+                )
+                return
+            soup: BeautifulSoup = soupify(await response.read())
+
+        # Attempt to login using credentials, if necessary
+        while not self._login_successful(soup):
+            # Searching the form here so that this fails before asking for
+            # credentials rather than after asking.
+            form = soup.find("form", {"method": "post"})
+            action = form["action"]
+
+            # Equivalent: Enter credentials in
+            # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
+            url = str(shib_url.origin()) + action
+            username, password = await self._auth.credentials()
+            data = {
+                "_eventId_proceed": "",
+                "j_username": username,
+                "j_password": password,
+            }
+            if csrf_token_input := form.find("input", {"name": "csrf_token"}):
+                data["csrf_token"] = csrf_token_input["value"]
+            soup = await _post(sess, url, data)
+
+            if soup.find(id="attributeRelease"):
+                raise CrawlError(
+                    "ILIAS Shibboleth entitlements changed! "
+                    "Please log in once in your browser and review them"
+                )
+
+            if self._tfa_required(soup):
+                soup = await self._authenticate_tfa(sess, soup, shib_url)
+
+            if not self._login_successful(soup):
+                self._auth.invalidate_credentials()
+
+        # Equivalent: Being redirected via JS automatically
+        # (or clicking "Continue" if you have JS disabled)
+        relay_state = soup.find("input", {"name": "RelayState"})
+        saml_response = soup.find("input", {"name": "SAMLResponse"})
+        url = form = soup.find("form", {"method": "post"})["action"]
+        data = {  # using the info obtained in the while loop above
+            "RelayState": relay_state["value"],
+            "SAMLResponse": saml_response["value"],
+        }
+        await sess.post(url, data=data)
+
+    async def _authenticate_tfa(
+        self, session: aiohttp.ClientSession, soup: BeautifulSoup, shib_url: yarl.URL
+    ) -> BeautifulSoup:
+        if not self._tfa_auth:
+            self._tfa_auth = TfaAuthenticator("ilias-anon-tfa")
+
+        tfa_token = await self._tfa_auth.password()
+
+        # Searching the form here so that this fails before asking for
+        # credentials rather than after asking.
+        form = soup.find("form", {"method": "post"})
+        action = form["action"]
+
+        # Equivalent: Enter token in
+        # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
+        url = str(shib_url.origin()) + action
+        username, password = await self._auth.credentials()
+        data = {
+            "_eventId_proceed": "",
+            "j_tokenNumber": tfa_token,
+        }
+        if csrf_token_input := form.find("input", {"name": "csrf_token"}):
+            data["csrf_token"] = csrf_token_input["value"]
+        return await _post(session, url, data)
+
+    @staticmethod
+    def _login_successful(soup: BeautifulSoup) -> bool:
+        relay_state = soup.find("input", {"name": "RelayState"})
+        saml_response = soup.find("input", {"name": "SAMLResponse"})
+        return relay_state is not None and saml_response is not None
+
+    @staticmethod
+    def _tfa_required(soup: BeautifulSoup) -> bool:
+        return soup.find(id="j_tokenNumber") is not None
+
+
+async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup:
+    async with session.post(url, data=data) as response:
+        return soupify(await response.read())

From 6dda4c55a8bdd0afba9126f39e7402df7dc59479 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 5 Nov 2024 18:36:21 +0100
Subject: [PATCH 102/147] Add doctype header to forum threads

This should fix mimetype detection on most systems and is more relevant
now that the report is clickable
---
 CHANGELOG.md                           | 1 +
 PFERD/crawl/ilias/ilias_web_crawler.py | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8024bba..5206b20 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -38,6 +38,7 @@ ambiguous situations.
 - Crawling of nested courses
 - Downloading of links with no target URL
 - Handle row flex on description pages
+- Add `<!DOCTYPE html>` heading to forum threads to fix mime type detection
 
 ## 3.6.0 - 2024-10-23
 
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index a6c68f1..2fc399d 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -750,7 +750,8 @@ instance's greatest bottleneck.
             return
 
         async with maybe_dl as (bar, sink):
-            content = element.title_tag.prettify()
+            content = "<!DOCTYPE html>\n"
+            content += element.title_tag.prettify()
             content += element.content_tag.prettify()
             sink.file.write(content.encode("utf-8"))
             sink.done()

From 712217e95962a383ee95c58fd85c61980ef1fc14 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 11 Nov 2024 12:52:55 +0100
Subject: [PATCH 103/147] Handle groups in cards

---
 CHANGELOG.md                        | 1 +
 PFERD/crawl/ilias/kit_ilias_html.py | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5206b20..095442d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -39,6 +39,7 @@ ambiguous situations.
 - Downloading of links with no target URL
 - Handle row flex on description pages
 - Add `<!DOCTYPE html>` heading to forum threads to fix mime type detection
+- Handle groups in cards
 
 ## 3.6.0 - 2024-10-23
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index e0c87ad..57c81e5 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -998,6 +998,8 @@ class IliasPage:
             return IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED
         if "exc" in icon["class"]:
             return IliasElementType.EXERCISE
+        if "grp" in icon["class"]:
+            return IliasElementType.FOLDER
         if "webr" in icon["class"]:
             return IliasElementType.LINK
         if "book" in icon["class"]:

From 287173b0b114f708cb34db4f3fef247962fccc3d Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 13 Nov 2024 20:38:27 +0100
Subject: [PATCH 104/147] Bump version to 3.7.0

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 095442d..e18f88a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.7.0 - 2024-11-13
+
 ### Added
 - Support for MOB videos in page descriptions
 - Clickable links in the report to directly open new/modified/not-deleted files
diff --git a/PFERD/version.py b/PFERD/version.py
index 0bf695b..21118d3 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.6.0"
+VERSION = "3.7.0"

From 678283d341294d3fefe69242d8f8b87d58a2b5c0 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 14 Nov 2024 20:06:13 +0100
Subject: [PATCH 105/147] Use Python facilities to convert paths to file://
 urls

---
 CHANGELOG.md   | 3 +++
 PFERD/pferd.py | 3 +--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e18f88a..bbd2dd6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+## Fixed
+- File links in report on Windows
+
 ## 3.7.0 - 2024-11-13
 
 ### Added
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index 850e68e..ca2e5b7 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -1,6 +1,5 @@
 from pathlib import Path, PurePath
 from typing import Dict, List, Optional
-from urllib.parse import quote
 
 from rich.markup import escape
 
@@ -171,7 +170,7 @@ class Pferd:
 
             def fmt_path_link(relative_path: PurePath) -> str:
                 # We need to URL-encode the path because it might contain spaces or special characters
-                link = f"file://{quote(str(crawler.output_dir.resolve(relative_path).absolute()))}"
+                link = crawler.output_dir.resolve(relative_path).absolute().as_uri()
                 return f"[link={link}]{fmt_path(relative_path)}[/link]"
 
             something_changed = False

From 16a2dd5b15561f91134bc2a3b31a92483921e021 Mon Sep 17 00:00:00 2001
From: Aurelia <mail@libaurea.de>
Date: Wed, 12 Feb 2025 21:48:05 +0100
Subject: [PATCH 106/147] fix: totp

---
 CHANGELOG.md                          | 1 +
 PFERD/crawl/ilias/shibboleth_login.py | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bbd2dd6..2ff98bc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ ambiguous situations.
 
 ## Fixed
 - File links in report on Windows
+- TOTP authentication in KIT Shibboleth
 
 ## 3.7.0 - 2024-11-13
 
diff --git a/PFERD/crawl/ilias/shibboleth_login.py b/PFERD/crawl/ilias/shibboleth_login.py
index d57820e..ab59f25 100644
--- a/PFERD/crawl/ilias/shibboleth_login.py
+++ b/PFERD/crawl/ilias/shibboleth_login.py
@@ -59,6 +59,7 @@ class ShibbolethLogin:
                 "_eventId_proceed": "",
                 "j_username": username,
                 "j_password": password,
+                "fudis_web_authn_assertion_input": "",
             }
             if csrf_token_input := form.find("input", {"name": "csrf_token"}):
                 data["csrf_token"] = csrf_token_input["value"]
@@ -106,7 +107,7 @@ class ShibbolethLogin:
         username, password = await self._auth.credentials()
         data = {
             "_eventId_proceed": "",
-            "j_tokenNumber": tfa_token,
+            "fudis_otp_input": tfa_token,
         }
         if csrf_token_input := form.find("input", {"name": "csrf_token"}):
             data["csrf_token"] = csrf_token_input["value"]
@@ -120,7 +121,7 @@ class ShibbolethLogin:
 
     @staticmethod
     def _tfa_required(soup: BeautifulSoup) -> bool:
-        return soup.find(id="j_tokenNumber") is not None
+        return soup.find(id="fudiscr-form") is not None
 
 
 async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup:

From bd9d7efe646b63f607dc1c2b5c23c6e9b5bd0466 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 12 Feb 2025 22:41:43 +0100
Subject: [PATCH 107/147] "Fix" mypy errors

Thank you mypy, very cool. These types make things *so much better*.
They don't just complicate everything and don't really help because they
can not detect that an element queried by a tag is no navigable
string...
---
 PFERD/auth/keyring.py                   |   4 +-
 PFERD/crawl/http_crawler.py             |   7 +-
 PFERD/crawl/ilias/file_templates.py     |  10 +-
 PFERD/crawl/ilias/ilias_html_cleaner.py |  14 +-
 PFERD/crawl/ilias/ilias_web_crawler.py  |  34 +--
 PFERD/crawl/ilias/kit_ilias_html.py     | 323 ++++++++++++------------
 PFERD/crawl/ilias/shibboleth_login.py   |  28 +-
 PFERD/crawl/kit_ipd_crawler.py          |   8 +-
 8 files changed, 224 insertions(+), 204 deletions(-)

diff --git a/PFERD/auth/keyring.py b/PFERD/auth/keyring.py
index c14f6fb..02a9269 100644
--- a/PFERD/auth/keyring.py
+++ b/PFERD/auth/keyring.py
@@ -1,4 +1,4 @@
-from typing import Optional, Tuple
+from typing import Optional, Tuple, cast
 
 import keyring
 
@@ -13,7 +13,7 @@ class KeyringAuthSection(AuthSection):
         return self.s.get("username")
 
     def keyring_name(self) -> str:
-        return self.s.get("keyring_name", fallback=NAME)
+        return cast(str, self.s.get("keyring_name", fallback=NAME))
 
 
 class KeyringAuthenticator(Authenticator):
diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index 2cc97e1..1c4631c 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -3,7 +3,7 @@ import http.cookies
 import ssl
 from datetime import datetime
 from pathlib import Path, PurePath
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 import aiohttp
 import certifi
@@ -187,12 +187,12 @@ class HttpCrawler(Crawler):
             if level == 0 or (level == 1 and drop_h1):
                 return PurePath()
 
-            level_heading = tag.find_previous(name=f"h{level}")
+            level_heading = cast(Optional[Tag], tag.find_previous(name=f"h{level}"))
 
             if level_heading is None:
                 return find_associated_headings(tag, level - 1)
 
-            folder_name = level_heading.getText().strip()
+            folder_name = level_heading.get_text().strip()
             return find_associated_headings(level_heading, level - 1) / folder_name
 
         # start at level <h3> because paragraph-level headings are usually too granular for folder names
@@ -231,6 +231,7 @@ class HttpCrawler(Crawler):
 
                 etag_header = resp.headers.get("ETag")
                 last_modified_header = resp.headers.get("Last-Modified")
+                last_modified = None
 
                 if last_modified_header:
                     try:
diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index b206461..0a72199 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -1,5 +1,5 @@
 from enum import Enum
-from typing import Optional
+from typing import Optional, cast
 
 import bs4
 
@@ -139,13 +139,13 @@ def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next
         </div>
     """
     if prev and body.select_one(".ilc_page_lnav_LeftNavigation"):
-        text = body.select_one(".ilc_page_lnav_LeftNavigation").getText().strip()
+        text = cast(bs4.Tag, body.select_one(".ilc_page_lnav_LeftNavigation")).get_text().strip()
         left = f'<a href="{prev}">{text}</a>'
     else:
         left = "<span></span>"
 
     if next and body.select_one(".ilc_page_rnav_RightNavigation"):
-        text = body.select_one(".ilc_page_rnav_RightNavigation").getText().strip()
+        text = cast(bs4.Tag, body.select_one(".ilc_page_rnav_RightNavigation")).get_text().strip()
         right = f'<a href="{next}">{text}</a>'
     else:
         right = "<span></span>"
@@ -160,8 +160,8 @@ def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next
             "{{left}}", left).replace("{{right}}", right).encode())
         )
 
-    body = body.prettify()
-    return _learning_module_template.replace("{{body}}", body).replace("{{name}}", name)
+    body_str = cast(str, body.prettify())
+    return _learning_module_template.replace("{{body}}", body_str).replace("{{name}}", name)
 
 
 class Links(Enum):
diff --git a/PFERD/crawl/ilias/ilias_html_cleaner.py b/PFERD/crawl/ilias/ilias_html_cleaner.py
index e82906f..fb35bc0 100644
--- a/PFERD/crawl/ilias/ilias_html_cleaner.py
+++ b/PFERD/crawl/ilias/ilias_html_cleaner.py
@@ -1,3 +1,5 @@
+from typing import cast
+
 from bs4 import BeautifulSoup, Comment, Tag
 
 _STYLE_TAG_CONTENT = """
@@ -70,18 +72,18 @@ def insert_base_markup(soup: BeautifulSoup) -> BeautifulSoup:
 
 
 def clean(soup: BeautifulSoup) -> BeautifulSoup:
-    for block in soup.find_all(class_=lambda x: x in _ARTICLE_WORTHY_CLASSES):
+    for block in cast(list[Tag], soup.find_all(class_=lambda x: x in _ARTICLE_WORTHY_CLASSES)):
         block.name = "article"
 
-    for block in soup.find_all("h3"):
+    for block in cast(list[Tag], soup.find_all("h3")):
         block.name = "div"
 
-    for block in soup.find_all("h1"):
+    for block in cast(list[Tag], soup.find_all("h1")):
         block.name = "h3"
 
-    for block in soup.find_all(class_="ilc_va_ihcap_VAccordIHeadCap"):
+    for block in cast(list[Tag], soup.find_all(class_="ilc_va_ihcap_VAccordIHeadCap")):
         block.name = "h3"
-        block["class"] += ["accordion-head"]
+        block["class"] += ["accordion-head"]  # type: ignore
 
     for dummy in soup.select(".ilc_text_block_Standard.ilc_Paragraph"):
         children = list(dummy.children)
@@ -97,7 +99,7 @@ def clean(soup: BeautifulSoup) -> BeautifulSoup:
         if figure := video.find_parent("figure"):
             figure.decompose()
 
-    for hrule_imposter in soup.find_all(class_="ilc_section_Separator"):
+    for hrule_imposter in cast(list[Tag], soup.find_all(class_="ilc_section_Separator")):
         hrule_imposter.insert(0, soup.new_tag("hr"))
 
     return soup
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 2fc399d..557150c 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -257,6 +257,7 @@ instance's greatest bottleneck.
             async with cl:
                 next_stage_url: Optional[str] = url
                 current_parent = current_element
+                page = None
 
                 while next_stage_url:
                     soup = await self._get_page(next_stage_url)
@@ -278,6 +279,7 @@ instance's greatest bottleneck.
                     else:
                         next_stage_url = None
 
+                page = cast(IliasPage, page)
                 elements.extend(page.get_child_elements())
                 if description_string := page.get_description():
                     description.append(description_string)
@@ -461,10 +463,10 @@ instance's greatest bottleneck.
         if not dl:
             return
 
-        async with dl as (bar, sink):
+        async with dl as (_bar, sink):
             description = clean(insert_base_markup(description))
-            description = await self.internalize_images(description)
-            sink.file.write(description.prettify().encode("utf-8"))
+            description_tag = await self.internalize_images(description)
+            sink.file.write(cast(str, description_tag.prettify()).encode("utf-8"))
             sink.done()
 
     @anoncritical
@@ -483,7 +485,7 @@ instance's greatest bottleneck.
             async with self.session.get(export_url, allow_redirects=False) as resp:
                 # No redirect means we were authenticated
                 if hdrs.LOCATION not in resp.headers:
-                    return soupify(await resp.read()).select_one("a").get("href").strip()
+                    return soupify(await resp.read()).select_one("a").get("href").strip()  # type: ignore
                 # We are either unauthenticated or the link is not active
                 new_url = resp.headers[hdrs.LOCATION].lower()
                 if "baseclass=illinkresourcehandlergui" in new_url and "cmd=infoscreen" in new_url:
@@ -707,6 +709,8 @@ instance's greatest bottleneck.
 
         async with cl:
             next_stage_url = element.url
+            page = None
+
             while next_stage_url:
                 log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
                 log.explain(f"URL: {next_stage_url}")
@@ -719,7 +723,7 @@ instance's greatest bottleneck.
                 else:
                     break
 
-            download_data = page.get_download_forum_data()
+            download_data = cast(IliasPage, page).get_download_forum_data()
             if not download_data:
                 raise CrawlWarning("Failed to extract forum data")
             if download_data.empty:
@@ -751,8 +755,8 @@ instance's greatest bottleneck.
 
         async with maybe_dl as (bar, sink):
             content = "<!DOCTYPE html>\n"
-            content += element.title_tag.prettify()
-            content += element.content_tag.prettify()
+            content += cast(str, element.title_tag.prettify())
+            content += cast(str, element.content_tag.prettify())
             sink.file.write(content.encode("utf-8"))
             sink.done()
 
@@ -877,15 +881,15 @@ instance's greatest bottleneck.
                 continue
             if elem.name == "img":
                 if src := elem.attrs.get("src", None):
-                    url = urljoin(self._base_url, src)
+                    url = urljoin(self._base_url, cast(str, src))
                     if not url.startswith(self._base_url):
                         continue
                     log.explain(f"Internalizing {url!r}")
                     img = await self._get_authenticated(url)
                     elem.attrs["src"] = "data:;base64," + base64.b64encode(img).decode()
-            if elem.name == "iframe" and elem.attrs.get("src", "").startswith("//"):
+            if elem.name == "iframe" and cast(str, elem.attrs.get("src", "")).startswith("//"):
                 # For unknown reasons the protocol seems to be stripped.
-                elem.attrs["src"] = "https:" + elem.attrs["src"]
+                elem.attrs["src"] = "https:" + cast(str, elem.attrs["src"])
         return tag
 
     def _ensure_not_seen(self, element: IliasPageElement, parent_path: PurePath) -> None:
@@ -979,11 +983,11 @@ instance's greatest bottleneck.
             async with self.session.get(urljoin(self._base_url, "/login.php"), params=params) as request:
                 login_page = soupify(await request.read())
 
-            login_form = login_page.find("form", attrs={"name": "formlogin"})
+            login_form = cast(Optional[Tag], login_page.find("form", attrs={"name": "formlogin"}))
             if login_form is None:
                 raise CrawlError("Could not find the login form! Specified client id might be invalid.")
 
-            login_url = login_form.attrs.get("action")
+            login_url = cast(Optional[str], login_form.attrs.get("action"))
             if login_url is None:
                 raise CrawlError("Could not find the action URL in the login form!")
 
@@ -1004,14 +1008,14 @@ instance's greatest bottleneck.
     @staticmethod
     def _is_logged_in(soup: BeautifulSoup) -> bool:
         # Normal ILIAS pages
-        mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")
+        mainbar = cast(Optional[Tag], soup.find(class_="il-maincontrols-metabar"))
         if mainbar is not None:
-            login_button = mainbar.find(attrs={"href": lambda x: x and "login.php" in x})
+            login_button = mainbar.find(attrs={"href": lambda x: x is not None and "login.php" in x})
             shib_login = soup.find(id="button_shib_login")
             return not login_button and not shib_login
 
         # Personal Desktop
-        if soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x}):
+        if soup.find("a", attrs={"href": lambda x: x is not None and "block_type=pditems" in x}):
             return True
 
         # Video listing embeds do not have complete ILIAS html. Try to match them by
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 57c81e5..ee61cab 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -3,7 +3,7 @@ import re
 from dataclasses import dataclass
 from datetime import date, datetime, timedelta
 from enum import Enum
-from typing import Dict, List, Optional, Union, cast
+from typing import Dict, Optional, Union, cast
 from urllib.parse import urljoin, urlparse
 
 from bs4 import BeautifulSoup, Tag
@@ -117,7 +117,7 @@ class IliasPageElement:
 @dataclass
 class IliasDownloadForumData:
     url: str
-    form_data: Dict[str, Union[str, List[str]]]
+    form_data: Dict[str, Union[str, list[str]]]
     empty: bool
 
 
@@ -151,7 +151,7 @@ class IliasPage:
             return "goto.php?target=root_" in permalink
         return False
 
-    def get_child_elements(self) -> List[IliasPageElement]:
+    def get_child_elements(self) -> list[IliasPageElement]:
         """
         Return all child page elements you can find here.
         """
@@ -177,10 +177,10 @@ class IliasPage:
         return self._find_normal_entries()
 
     def get_info_tab(self) -> Optional[IliasPageElement]:
-        tab: Optional[Tag] = self._soup.find(
+        tab: Optional[Tag] = cast(Optional[Tag], self._soup.find(
             name="a",
-            attrs={"href": lambda x: x and "cmdClass=ilinfoscreengui" in x}
-        )
+            attrs={"href": lambda x: x is not None and "cmdClass=ilinfoscreengui" in x}
+        ))
         if tab is not None:
             return IliasPageElement.create_new(
                 IliasElementType.INFO_TAB,
@@ -193,7 +193,7 @@ class IliasPage:
         def is_interesting_class(name: str) -> bool:
             return name in ["ilCOPageSection", "ilc_Paragraph", "ilc_va_ihcap_VAccordIHeadCap"]
 
-        paragraphs: List[Tag] = self._soup.findAll(class_=is_interesting_class)
+        paragraphs: list[Tag] = cast(list[Tag], self._soup.find_all(class_=is_interesting_class))
         if not paragraphs:
             return None
 
@@ -217,8 +217,8 @@ class IliasPage:
     def get_learning_module_data(self) -> Optional[IliasLearningModulePage]:
         if not self._is_learning_module_page():
             return None
-        content = self._soup.select_one("#ilLMPageContent")
-        title = self._soup.select_one(".ilc_page_title_PageTitle").getText().strip()
+        content = cast(Tag, self._soup.select_one("#ilLMPageContent"))
+        title = cast(Tag, self._soup.select_one(".ilc_page_title_PageTitle")).get_text().strip()
         return IliasLearningModulePage(
             title=title,
             content=content,
@@ -243,15 +243,18 @@ class IliasPage:
         return None
 
     def get_download_forum_data(self) -> Optional[IliasDownloadForumData]:
-        form = self._soup.find("form", attrs={"action": lambda x: x and "fallbackCmd=showThreads" in x})
+        form = cast(Optional[Tag], self._soup.find(
+            "form",
+            attrs={"action": lambda x: x is not None and "fallbackCmd=showThreads" in x}
+        ))
         if not form:
             return None
-        post_url = self._abs_url_from_relative(form["action"])
+        post_url = self._abs_url_from_relative(cast(str, form["action"]))
 
-        thread_ids = [f["value"] for f in form.find_all(attrs={"name": "thread_ids[]"})]
+        thread_ids = [f["value"] for f in cast(list[Tag], form.find_all(attrs={"name": "thread_ids[]"}))]
 
-        form_data: Dict[str, Union[str, List[str]]] = {
-            "thread_ids[]": thread_ids,
+        form_data: Dict[str, Union[str, list[str]]] = {
+            "thread_ids[]": cast(list[str], thread_ids),
             "selected_cmd2": "html",
             "select_cmd2": "Ausführen",
             "selected_cmd": "",
@@ -285,7 +288,7 @@ class IliasPage:
     def _is_forum_page(self) -> bool:
         read_more_btn = self._soup.find(
             "button",
-            attrs={"onclick": lambda x: x and "cmdClass=ilobjforumgui&cmd=markAllRead" in x}
+            attrs={"onclick": lambda x: x is not None and "cmdClass=ilobjforumgui&cmd=markAllRead" in x}
         )
         return read_more_btn is not None
 
@@ -297,7 +300,7 @@ class IliasPage:
             return True
 
         # Raw listing without ILIAS fluff
-        video_element_table: Tag = self._soup.find(
+        video_element_table = self._soup.find(
             name="table", id=re.compile(r"tbl_xoct_.+")
         )
         return video_element_table is not None
@@ -305,8 +308,8 @@ class IliasPage:
     def _is_ilias_opencast_embedding(self) -> bool:
         # ILIAS fluff around the real opencast html
         if self._soup.find(id="headerimage"):
-            element: Tag = self._soup.find(id="headerimage")
-            if "opencast" in element.attrs["src"].lower():
+            element: Tag = cast(Tag, self._soup.find(id="headerimage"))
+            if "opencast" in cast(str, element.attrs["src"]).lower():
                 return True
         return False
 
@@ -317,8 +320,8 @@ class IliasPage:
 
         # We have no suitable parent - let's guesss
         if self._soup.find(id="headerimage"):
-            element: Tag = self._soup.find(id="headerimage")
-            if "exc" in element.attrs["src"].lower():
+            element: Tag = cast(Tag, self._soup.find(id="headerimage"))
+            if "exc" in cast(str, element.attrs["src"]).lower():
                 return True
 
         return False
@@ -340,10 +343,10 @@ class IliasPage:
         return self._uncollapse_future_meetings_url() is not None
 
     def _uncollapse_future_meetings_url(self) -> Optional[IliasPageElement]:
-        element = self._soup.find(
+        element = cast(Optional[Tag], self._soup.find(
             "a",
-            attrs={"href": lambda x: x and ("crs_next_sess=1" in x or "crs_prev_sess=1" in x)}
-        )
+            attrs={"href": lambda x: x is not None and ("crs_next_sess=1" in x or "crs_prev_sess=1" in x)}
+        ))
         if not element:
             return None
         link = self._abs_url_from_link(element)
@@ -360,24 +363,24 @@ class IliasPage:
         return "baseClass=ilmembershipoverviewgui" in self._page_url
 
     def _select_content_page_url(self) -> Optional[IliasPageElement]:
-        tab = self._soup.find(
+        tab = cast(Optional[Tag], self._soup.find(
             id="tab_view_content",
             attrs={"class": lambda x: x is not None and "active" not in x}
-        )
+        ))
         # Already selected (or not found)
         if not tab:
             return None
-        link = tab.find("a")
+        link = cast(Optional[Tag], tab.find("a"))
         if link:
-            link = self._abs_url_from_link(link)
-            return IliasPageElement.create_new(IliasElementType.FOLDER, link, "select content page")
+            link_str = self._abs_url_from_link(link)
+            return IliasPageElement.create_new(IliasElementType.FOLDER, link_str, "select content page")
 
         _unexpected_html_warning()
         log.warn_contd(f"Could not find content tab URL on {self._page_url!r}.")
         log.warn_contd("PFERD might not find content on the course's main page.")
         return None
 
-    def _player_to_video(self) -> List[IliasPageElement]:
+    def _player_to_video(self) -> list[IliasPageElement]:
         # Fetch the actual video page. This is a small wrapper page initializing a javscript
         # player. Sadly we can not execute that JS. The actual video stream url is nowhere
         # on the page, but defined in a JS object inside a script tag, passed to the player
@@ -414,10 +417,10 @@ class IliasPage:
         return items
 
     def _get_show_max_forum_entries_per_page_url(self) -> Optional[IliasPageElement]:
-        correct_link = self._soup.find(
+        correct_link = cast(Optional[Tag], self._soup.find(
             "a",
-            attrs={"href": lambda x: x and "trows=800" in x and "cmd=showThreads" in x}
-        )
+            attrs={"href": lambda x: x is not None and "trows=800" in x and "cmd=showThreads" in x}
+        ))
 
         if not correct_link:
             return None
@@ -426,15 +429,15 @@ class IliasPage:
 
         return IliasPageElement.create_new(IliasElementType.FORUM, link, "show all forum threads")
 
-    def _find_personal_desktop_entries(self) -> List[IliasPageElement]:
-        items: List[IliasPageElement] = []
+    def _find_personal_desktop_entries(self) -> list[IliasPageElement]:
+        items: list[IliasPageElement] = []
 
-        titles: List[Tag] = self._soup.select("#block_pditems_0 .il-item-title")
+        titles: list[Tag] = self._soup.select("#block_pditems_0 .il-item-title")
         for title in titles:
-            link = title.find("a")
+            link = cast(Optional[Tag], title.find("a"))
 
             if not link:
-                log.explain(f"Skipping offline item: {title.getText().strip()!r}")
+                log.explain(f"Skipping offline item: {title.get_text().strip()!r}")
                 continue
 
             name = _sanitize_path_name(link.text.strip())
@@ -460,13 +463,13 @@ class IliasPage:
 
         return items
 
-    def _find_copa_entries(self) -> List[IliasPageElement]:
-        items: List[IliasPageElement] = []
-        links: List[Tag] = self._soup.findAll(class_="ilc_flist_a_FileListItemLink")
+    def _find_copa_entries(self) -> list[IliasPageElement]:
+        items: list[IliasPageElement] = []
+        links: list[Tag] = cast(list[Tag], self._soup.find_all(class_="ilc_flist_a_FileListItemLink"))
 
         for link in links:
             url = self._abs_url_from_link(link)
-            name = re.sub(r"\([\d,.]+ [MK]B\)", "", link.getText()).strip().replace("\t", "")
+            name = re.sub(r"\([\d,.]+ [MK]B\)", "", link.get_text()).strip().replace("\t", "")
             name = _sanitize_path_name(name)
 
             if "file_id" not in url:
@@ -478,9 +481,9 @@ class IliasPage:
 
         return items
 
-    def _find_info_tab_entries(self) -> List[IliasPageElement]:
+    def _find_info_tab_entries(self) -> list[IliasPageElement]:
         items = []
-        links: List[Tag] = self._soup.select("a.il_ContainerItemCommand")
+        links: list[Tag] = self._soup.select("a.il_ContainerItemCommand")
 
         for link in links:
             if "cmdClass=ilobjcoursegui" not in link["href"]:
@@ -490,12 +493,12 @@ class IliasPage:
             items.append(IliasPageElement.create_new(
                 IliasElementType.FILE,
                 self._abs_url_from_link(link),
-                _sanitize_path_name(link.getText())
+                _sanitize_path_name(link.get_text())
             ))
 
         return items
 
-    def _find_opencast_video_entries(self) -> List[IliasPageElement]:
+    def _find_opencast_video_entries(self) -> list[IliasPageElement]:
         # ILIAS has three stages for video pages
         # 1. The initial dummy page without any videos. This page contains the link to the listing
         # 2. The video listing which might be paginated
@@ -503,14 +506,14 @@ class IliasPage:
         #
         # We need to figure out where we are.
 
-        video_element_table: Tag = self._soup.find(
+        video_element_table = cast(Optional[Tag], self._soup.find(
             name="table", id=re.compile(r"tbl_xoct_.+")
-        )
+        ))
 
         if video_element_table is None:
             # We are in stage 1
             # The page is actually emtpy but contains the link to stage 2
-            content_link: Tag = self._soup.select_one("#tab_series a")
+            content_link: Tag = cast(Tag, self._soup.select_one("#tab_series a"))
             url: str = self._abs_url_from_link(content_link)
             query_params = {"limit": "800", "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
             url = url_set_query_params(url, query_params)
@@ -527,14 +530,14 @@ class IliasPage:
 
         return self._find_opencast_video_entries_no_paging()
 
-    def _find_opencast_video_entries_paginated(self) -> List[IliasPageElement]:
-        table_element: Tag = self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
+    def _find_opencast_video_entries_paginated(self) -> list[IliasPageElement]:
+        table_element = cast(Optional[Tag], self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+")))
 
         if table_element is None:
             log.warn("Couldn't increase elements per page (table not found). I might miss elements.")
             return self._find_opencast_video_entries_no_paging()
 
-        id_match = re.match(r"tbl_xoct_(.+)", table_element.attrs["id"])
+        id_match = re.match(r"tbl_xoct_(.+)", cast(str, table_element.attrs["id"]))
         if id_match is None:
             log.warn("Couldn't increase elements per page (table id not found). I might miss elements.")
             return self._find_opencast_video_entries_no_paging()
@@ -548,16 +551,16 @@ class IliasPage:
         log.explain("Disabled pagination, retrying folder as a new entry")
         return [IliasPageElement.create_new(IliasElementType.OPENCAST_VIDEO_FOLDER, url, "")]
 
-    def _find_opencast_video_entries_no_paging(self) -> List[IliasPageElement]:
+    def _find_opencast_video_entries_no_paging(self) -> list[IliasPageElement]:
         """
         Crawls the "second stage" video page. This page contains the actual video urls.
         """
         # Video start links are marked with an "Abspielen" link
-        video_links: List[Tag] = self._soup.findAll(
+        video_links = cast(list[Tag], self._soup.find_all(
             name="a", text=re.compile(r"\s*(Abspielen|Play)\s*")
-        )
+        ))
 
-        results: List[IliasPageElement] = []
+        results: list[IliasPageElement] = []
 
         for link in video_links:
             results.append(self._listed_opencast_video_to_element(link))
@@ -569,12 +572,12 @@ class IliasPage:
         # 6th or 7th child (1 indexed) is the modification time string. Try to find it
         # by parsing backwards from the end and finding something that looks like a date
         modification_time = None
-        row: Tag = link.parent.parent.parent
+        row: Tag = link.parent.parent.parent  # type: ignore
         column_count = len(row.select("td.std"))
         for index in range(column_count, 0, -1):
-            modification_string = link.parent.parent.parent.select_one(
+            modification_string = link.parent.parent.parent.select_one(  # type: ignore
                 f"td.std:nth-child({index})"
-            ).getText().strip()
+            ).get_text().strip()
             if match := re.search(r"\d+\.\d+.\d+ \d+:\d+", modification_string):
                 modification_time = datetime.strptime(match.group(0), "%d.%m.%Y %H:%M")
                 break
@@ -583,7 +586,7 @@ class IliasPage:
             log.warn(f"Could not determine upload time for {link}")
             modification_time = datetime.now()
 
-        title = link.parent.parent.parent.select_one("td.std:nth-child(3)").getText().strip()
+        title = link.parent.parent.parent.select_one("td.std:nth-child(3)").get_text().strip()  # type: ignore
         title += ".mp4"
 
         video_name: str = _sanitize_path_name(title)
@@ -595,33 +598,34 @@ class IliasPage:
             IliasElementType.OPENCAST_VIDEO_PLAYER, video_url, video_name, modification_time
         )
 
-    def _find_exercise_entries(self) -> List[IliasPageElement]:
+    def _find_exercise_entries(self) -> list[IliasPageElement]:
         if self._soup.find(id="tab_submission"):
             log.explain("Found submission tab. This is an exercise detail page")
             return self._find_exercise_entries_detail_page()
         log.explain("Found no submission tab. This is an exercise root page")
         return self._find_exercise_entries_root_page()
 
-    def _find_exercise_entries_detail_page(self) -> List[IliasPageElement]:
-        results: List[IliasPageElement] = []
+    def _find_exercise_entries_detail_page(self) -> list[IliasPageElement]:
+        results: list[IliasPageElement] = []
 
         # Find all download links in the container (this will contain all the files)
-        download_links: List[Tag] = self._soup.findAll(
+        download_links = cast(list[Tag], self._soup.find_all(
             name="a",
             # download links contain the given command class
-            attrs={"href": lambda x: x and "cmd=download" in x},
+            attrs={"href": lambda x: x is not None and "cmd=download" in x},
             text="Download"
-        )
+        ))
 
         for link in download_links:
-            parent_row: Tag = link.findParent("tr")
-            children: List[Tag] = parent_row.findChildren("td")
+            parent_row: Tag = cast(Tag, link.find_parent("tr"))
+            children = cast(list[Tag], parent_row.find_all("td"))
 
-            name = _sanitize_path_name(children[1].getText().strip())
+            name = _sanitize_path_name(children[1].get_text().strip())
             log.explain(f"Found exercise detail entry {name!r}")
 
+            date = None
             for child in reversed(children):
-                date = demangle_date(child.getText().strip(), fail_silently=True)
+                date = demangle_date(child.get_text().strip(), fail_silently=True)
                 if date is not None:
                     break
             if date is None:
@@ -636,30 +640,33 @@ class IliasPage:
 
         return results
 
-    def _find_exercise_entries_root_page(self) -> List[IliasPageElement]:
-        results: List[IliasPageElement] = []
+    def _find_exercise_entries_root_page(self) -> list[IliasPageElement]:
+        results: list[IliasPageElement] = []
 
         # Each assignment is in an accordion container
-        assignment_containers: List[Tag] = self._soup.select(".il_VAccordionInnerContainer")
+        assignment_containers: list[Tag] = self._soup.select(".il_VAccordionInnerContainer")
 
         for container in assignment_containers:
             # Fetch the container name out of the header to use it in the path
-            container_name = container.select_one(".ilAssignmentHeader").getText().strip()
+            container_name = cast(Tag, container.select_one(".ilAssignmentHeader")).get_text().strip()
             log.explain(f"Found exercise container {container_name!r}")
 
             # Find all download links in the container (this will contain all the files)
-            files: List[Tag] = container.findAll(
+            files = cast(list[Tag], container.find_all(
                 name="a",
                 # download links contain the given command class
-                attrs={"href": lambda x: x and "cmdClass=ilexsubmissiongui" in x},
+                attrs={"href": lambda x: x is not None and "cmdClass=ilexsubmissiongui" in x},
                 text="Download"
-            )
+            ))
 
             # Grab each file as you now have the link
             for file_link in files:
                 # Two divs, side by side. Left is the name, right is the link ==> get left
                 # sibling
-                file_name = file_link.parent.findPrevious(name="div").getText().strip()
+                file_name = cast(
+                    Tag,
+                    cast(Tag, file_link.parent).find_previous(name="div")
+                ).get_text().strip()
                 url = self._abs_url_from_link(file_link)
 
                 log.explain(f"Found exercise entry {file_name!r}")
@@ -672,21 +679,21 @@ class IliasPage:
                 ))
 
             # Find all links to file listings (e.g. "Submitted Files" for groups)
-            file_listings: List[Tag] = container.findAll(
+            file_listings = cast(list[Tag], container.find_all(
                 name="a",
                 # download links contain the given command class
-                attrs={"href": lambda x: x and "cmdclass=ilexsubmissionfilegui" in x.lower()}
-            )
+                attrs={"href": lambda x: x is not None and "cmdclass=ilexsubmissionfilegui" in x.lower()}
+            ))
 
             # Add each listing as a new
             for listing in file_listings:
-                parent_container: Tag = listing.findParent(
-                    "div", attrs={"class": lambda x: x and "form-group" in x}
-                )
-                label_container: Tag = parent_container.find(
-                    attrs={"class": lambda x: x and "control-label" in x}
-                )
-                file_name = label_container.getText().strip()
+                parent_container = cast(Tag, listing.find_parent(
+                    "div", attrs={"class": lambda x: x is not None and "form-group" in x}
+                ))
+                label_container = cast(Tag, parent_container.find(
+                    attrs={"class": lambda x: x is not None and "control-label" in x}
+                ))
+                file_name = label_container.get_text().strip()
                 url = self._abs_url_from_link(listing)
                 log.explain(f"Found exercise detail {file_name!r} at {url}")
                 results.append(IliasPageElement.create_new(
@@ -699,10 +706,10 @@ class IliasPage:
 
         return results
 
-    def _find_normal_entries(self) -> List[IliasPageElement]:
-        result: List[IliasPageElement] = []
+    def _find_normal_entries(self) -> list[IliasPageElement]:
+        result: list[IliasPageElement] = []
 
-        links: List[Tag] = []
+        links: list[Tag] = []
         # Fetch all links and throw them to the general interpreter
         if self._is_course_overview_page():
             log.explain("Page is a course overview page, adjusting link selector")
@@ -716,9 +723,9 @@ class IliasPage:
             parents = [_sanitize_path_name(x) for x in self._find_upwards_folder_hierarchy(link)]
 
             if parents:
-                element_name = "/".join(parents) + "/" + _sanitize_path_name(link.getText())
+                element_name = "/".join(parents) + "/" + _sanitize_path_name(link.get_text())
             else:
-                element_name = _sanitize_path_name(link.getText())
+                element_name = _sanitize_path_name(link.get_text())
 
             element_type = self._find_type_from_link(element_name, link, abs_url)
             description = self._find_link_description(link)
@@ -750,17 +757,17 @@ class IliasPage:
 
         return result
 
-    def _find_mediacast_videos(self) -> List[IliasPageElement]:
-        videos: List[IliasPageElement] = []
+    def _find_mediacast_videos(self) -> list[IliasPageElement]:
+        videos: list[IliasPageElement] = []
 
-        for elem in cast(List[Tag], self._soup.select(".ilPlayerPreviewOverlayOuter")):
+        for elem in cast(list[Tag], self._soup.select(".ilPlayerPreviewOverlayOuter")):
             element_name = _sanitize_path_name(
-                elem.select_one(".ilPlayerPreviewDescription").getText().strip()
+                cast(Tag, elem.select_one(".ilPlayerPreviewDescription")).get_text().strip()
             )
             if not element_name.endswith(".mp4"):
                 # just to make sure it has some kinda-alrightish ending
                 element_name = element_name + ".mp4"
-            video_element = elem.find(name="video")
+            video_element = cast(Optional[Tag], elem.find(name="video"))
             if not video_element:
                 _unexpected_html_warning()
                 log.warn_contd(f"No <video> element found for mediacast video '{element_name}'")
@@ -768,18 +775,18 @@ class IliasPage:
 
             videos.append(IliasPageElement.create_new(
                 typ=IliasElementType.MEDIACAST_VIDEO,
-                url=self._abs_url_from_relative(video_element.get("src")),
+                url=self._abs_url_from_relative(cast(str, video_element.get("src"))),
                 name=element_name,
-                mtime=self._find_mediacast_video_mtime(elem.findParent(name="td"))
+                mtime=self._find_mediacast_video_mtime(cast(Tag, elem.find_parent(name="td")))
             ))
 
         return videos
 
-    def _find_mob_videos(self) -> List[IliasPageElement]:
-        videos: List[IliasPageElement] = []
+    def _find_mob_videos(self) -> list[IliasPageElement]:
+        videos: list[IliasPageElement] = []
 
         for figure in self._soup.select("figure.ilc_media_cont_MediaContainerHighlighted"):
-            title = figure.select_one("figcaption").getText().strip() + ".mp4"
+            title = cast(Tag, figure.select_one("figcaption")).get_text().strip() + ".mp4"
             video_element = figure.select_one("video")
             if not video_element:
                 _unexpected_html_warning()
@@ -789,7 +796,7 @@ class IliasPage:
             url = None
             for source in video_element.select("source"):
                 if source.get("type", "") == "video/mp4":
-                    url = source.get("src")
+                    url = cast(Optional[str], source.get("src"))
                     break
 
             if url is None:
@@ -807,15 +814,15 @@ class IliasPage:
         return videos
 
     def _find_mediacast_video_mtime(self, enclosing_td: Tag) -> Optional[datetime]:
-        description_td: Tag = enclosing_td.findPreviousSibling("td")
+        description_td = cast(Tag, enclosing_td.find_previous_sibling("td"))
         if not description_td:
             return None
 
-        meta_tag: Tag = description_td.find_all("p")[-1]
+        meta_tag = cast(Optional[Tag], description_td.find_all("p")[-1])
         if not meta_tag:
             return None
 
-        updated_str = meta_tag.getText().strip().replace("\n", " ")
+        updated_str = meta_tag.get_text().strip().replace("\n", " ")
         updated_str = re.sub(".+?: ", "", updated_str)
         return demangle_date(updated_str)
 
@@ -826,20 +833,20 @@ class IliasPage:
         It is in the same general div and this whole thing is guesswork.
         Therefore, you should check for meetings before passing them in this function.
         """
-        parents: List[Tag] = list(tag.parents)
+        parents: list[Tag] = list(tag.parents)
         for parent in parents:
             if not parent.get("class"):
                 continue
 
             # We should not crawl files under meetings
-            if "ilContainerListItemContentCB" in parent.get("class"):
-                link: Tag = parent.parent.find("a")
+            if "ilContainerListItemContentCB" in cast(str, parent.get("class")):
+                link: Tag = parent.parent.find("a")  # type: ignore
                 type = IliasPage._find_type_from_folder_like(link, self._page_url)
                 return type == IliasElementType.MEETING
 
         return False
 
-    def _find_upwards_folder_hierarchy(self, tag: Tag) -> List[str]:
+    def _find_upwards_folder_hierarchy(self, tag: Tag) -> list[str]:
         """
         Interprets accordions and expandable blocks as virtual folders and returns them
         in order. This allows us to find a file named "Test" in an accordion "Acc" as "Acc/Test"
@@ -848,7 +855,7 @@ class IliasPage:
 
         outer_accordion_content: Optional[Tag] = None
 
-        parents: List[Tag] = list(tag.parents)
+        parents: list[Tag] = list(tag.parents)
         for parent in parents:
             if not parent.get("class"):
                 continue
@@ -857,57 +864,63 @@ class IliasPage:
             # but some JS later transforms them into an accordion.
 
             # This is for these weird JS-y blocks and custom item groups
-            if "ilContainerItemsContainer" in parent.get("class"):
-                data_store_url = parent.parent.get("data-store-url", "").lower()
+            if "ilContainerItemsContainer" in cast(str, parent.get("class")):
+                data_store_url = parent.parent.get("data-store-url", "").lower()  # type: ignore
                 is_custom_item_group = "baseclass=ilcontainerblockpropertiesstoragegui" in data_store_url \
                                        and "cont_block_id=" in data_store_url
                 # I am currently under the impression that *only* those JS blocks have an
                 # ilNoDisplay class.
-                if not is_custom_item_group and "ilNoDisplay" not in parent.get("class"):
+                if not is_custom_item_group and "ilNoDisplay" not in cast(str, parent.get("class")):
                     continue
-                prev: Tag = parent.findPreviousSibling("div")
-                if "ilContainerBlockHeader" in prev.get("class"):
+                prev = cast(Tag, parent.find_previous_sibling("div"))
+                if "ilContainerBlockHeader" in cast(str, prev.get("class")):
                     if prev.find("h3"):
-                        found_titles.append(prev.find("h3").getText().strip())
+                        found_titles.append(cast(Tag, prev.find("h3")).get_text().strip())
                     else:
-                        found_titles.append(prev.find("h2").getText().strip())
+                        found_titles.append(cast(Tag, prev.find("h2")).get_text().strip())
 
             # And this for real accordions
-            if "il_VAccordionContentDef" in parent.get("class"):
+            if "il_VAccordionContentDef" in cast(str, parent.get("class")):
                 outer_accordion_content = parent
                 break
 
         if outer_accordion_content:
-            accordion_tag: Tag = outer_accordion_content.parent
-            head_tag: Tag = accordion_tag.find(attrs={
-                "class": lambda x: x and "ilc_va_ihead_VAccordIHead" in x
-            })
-            found_titles.append(head_tag.getText().strip())
+            accordion_tag = cast(Tag, outer_accordion_content.parent)
+            head_tag = cast(Tag, accordion_tag.find(attrs={
+                "class": lambda x: x is not None and "ilc_va_ihead_VAccordIHead" in x
+            }))
+            found_titles.append(head_tag.get_text().strip())
 
         return [_sanitize_path_name(x) for x in reversed(found_titles)]
 
     def _find_link_description(self, link: Tag) -> Optional[str]:
-        tile: Tag = link.findParent("div", {"class": lambda x: x and "il_ContainerListItem" in x})
+        tile = cast(
+            Tag,
+            link.find_parent("div", {"class": lambda x: x is not None and "il_ContainerListItem" in x})
+        )
         if not tile:
             return None
-        description_element: Tag = tile.find("div", {"class": lambda x: x and "il_Description" in x})
+        description_element = cast(
+            Tag,
+            tile.find("div", {"class": lambda x: x is not None and "il_Description" in x})
+        )
         if not description_element:
             return None
-        return description_element.getText().strip()
+        return description_element.get_text().strip()
 
     def _file_to_element(self, name: str, url: str, link_element: Tag) -> IliasPageElement:
         # Files have a list of properties (type, modification date, size, etc.)
         # In a series of divs.
         # Find the parent containing all those divs, so we can filter our what we need
-        properties_parent: Tag = link_element.findParent(
+        properties_parent = cast(Tag, cast(Tag, link_element.find_parent(
             "div", {"class": lambda x: "il_ContainerListItem" in x}
-        ).select_one(".il_ItemProperties")
+        )).select_one(".il_ItemProperties"))
         # The first one is always the filetype
-        file_type = properties_parent.select_one("span.il_ItemProperty").getText().strip()
+        file_type = cast(Tag, properties_parent.select_one("span.il_ItemProperty")).get_text().strip()
 
         # The rest does not have a stable order. Grab the whole text and reg-ex the date
         # out of it
-        all_properties_text = properties_parent.getText().strip()
+        all_properties_text = properties_parent.get_text().strip()
         modification_date_match = re.search(
             r"(((\d+\. \w+ \d+)|(Gestern|Yesterday)|(Heute|Today)|(Morgen|Tomorrow)), \d+:\d+)",
             all_properties_text
@@ -927,14 +940,14 @@ class IliasPage:
             IliasElementType.FILE, url, full_path, modification_date, skip_sanitize=True
         )
 
-    def _find_cards(self) -> List[IliasPageElement]:
-        result: List[IliasPageElement] = []
+    def _find_cards(self) -> list[IliasPageElement]:
+        result: list[IliasPageElement] = []
 
-        card_titles: List[Tag] = self._soup.select(".card-title a")
+        card_titles: list[Tag] = self._soup.select(".card-title a")
 
         for title in card_titles:
             url = self._abs_url_from_link(title)
-            name = _sanitize_path_name(title.getText().strip())
+            name = _sanitize_path_name(title.get_text().strip())
             type = self._find_type_from_card(title)
 
             if not type:
@@ -944,25 +957,25 @@ class IliasPage:
 
             result.append(IliasPageElement.create_new(type, url, name))
 
-        card_button_tiles: List[Tag] = self._soup.select(".card-title button")
+        card_button_tiles: list[Tag] = self._soup.select(".card-title button")
 
         for button in card_button_tiles:
-            regex = re.compile(button["id"] + r".*window.open\(['\"](.+?)['\"]")
+            regex = re.compile(button["id"] + r".*window.open\(['\"](.+?)['\"]")  # type: ignore
             res = regex.search(str(self._soup))
             if not res:
                 _unexpected_html_warning()
                 log.warn_contd(f"Could not find click handler target for {button}")
                 continue
             url = self._abs_url_from_relative(res.group(1))
-            name = _sanitize_path_name(button.getText().strip())
+            name = _sanitize_path_name(button.get_text().strip())
             type = self._find_type_from_card(button)
-            caption_parent = button.findParent(
+            caption_parent = cast(Tag, button.find_parent(
                 "div",
-                attrs={"class": lambda x: x and "caption" in x},
-            )
+                attrs={"class": lambda x: x is not None and "caption" in x},
+            ))
             caption_container = caption_parent.find_next_sibling("div")
             if caption_container:
-                description = caption_container.getText().strip()
+                description = caption_container.get_text().strip()
             else:
                 description = None
 
@@ -992,7 +1005,7 @@ class IliasPage:
             log.warn_contd(f"Tried to figure out element type, but did not find an icon for {card_title}")
             return None
 
-        icon: Tag = card_root.select_one(".il-card-repository-head .icon")
+        icon = cast(Tag, card_root.select_one(".il-card-repository-head .icon"))
 
         if "opencast" in icon["class"] or "xoct" in icon["class"]:
             return IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED
@@ -1125,7 +1138,7 @@ class IliasPage:
 
         is_session_expansion_button = found_parent.find(
             "a",
-            attrs={"href": lambda x: x and ("crs_next_sess=" in x or "crs_prev_sess=" in x)}
+            attrs={"href": lambda x: x is not None and ("crs_next_sess=" in x or "crs_prev_sess=" in x)}
         )
         if img_tag is None and is_session_expansion_button:
             log.explain("Found session expansion button, skipping it as it has no content")
@@ -1168,19 +1181,19 @@ class IliasPage:
     @staticmethod
     def is_logged_in(soup: BeautifulSoup) -> bool:
         # Normal ILIAS pages
-        mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")
+        mainbar = cast(Optional[Tag], soup.find(class_="il-maincontrols-metabar"))
         if mainbar is not None:
-            login_button = mainbar.find(attrs={"href": lambda x: x and "login.php" in x})
+            login_button = mainbar.find(attrs={"href": lambda x: x is not None and "login.php" in x})
             shib_login = soup.find(id="button_shib_login")
             return not login_button and not shib_login
 
         # Personal Desktop
-        if soup.find("a", attrs={"href": lambda x: x and "block_type=pditems" in x}):
+        if soup.find("a", attrs={"href": lambda x: x is not None and "block_type=pditems" in x}):
             return True
 
         # Empty personal desktop has zero (0) markers. Match on the text...
         if alert := soup.select_one(".alert-info"):
-            text = alert.getText().lower()
+            text = alert.get_text().lower()
             if "you have not yet selected any favourites" in text:
                 return True
             if "sie haben aktuell noch keine favoriten ausgewählt" in text:
@@ -1208,7 +1221,7 @@ class IliasPage:
         """
         Create an absolute url from an <a> tag.
         """
-        return self._abs_url_from_relative(link_tag.get("href"))
+        return self._abs_url_from_relative(cast(str, link_tag.get("href")))
 
     def _abs_url_from_relative(self, relative_url: str) -> str:
         """
@@ -1218,10 +1231,10 @@ class IliasPage:
 
     @staticmethod
     def get_soup_permalink(soup: BeautifulSoup) -> Optional[str]:
-        perma_link_element: Tag = soup.select_one(".il-footer-permanent-url > a")
+        perma_link_element = cast(Tag, soup.select_one(".il-footer-permanent-url > a"))
         if not perma_link_element or not perma_link_element.get("href"):
             return None
-        return perma_link_element.get("href")
+        return cast(Optional[str], perma_link_element.get("href"))
 
 
 def _unexpected_html_warning() -> None:
@@ -1298,11 +1311,11 @@ def _sanitize_path_name(name: str) -> str:
     return name.replace("/", "-").replace("\\", "-").strip()
 
 
-def parse_ilias_forum_export(forum_export: BeautifulSoup) -> List[IliasForumThread]:
+def parse_ilias_forum_export(forum_export: BeautifulSoup) -> list[IliasForumThread]:
     elements = []
     for p in forum_export.select("body > p"):
         title_tag = p
-        content_tag = p.find_next_sibling("ul")
+        content_tag = cast(Optional[Tag], p.find_next_sibling("ul"))
 
         if not content_tag:
             # ILIAS allows users to delete the initial post while keeping the thread open
@@ -1310,7 +1323,7 @@ def parse_ilias_forum_export(forum_export: BeautifulSoup) -> List[IliasForumThre
             # I am not sure why you would want this, but ILIAS makes it easy to do.
             continue
 
-        title = p.find("b").text
+        title = cast(Tag, p.find("b")).text
         if ":" in title:
             title = title[title.find(":") + 1:]
         title = title.strip()
@@ -1321,7 +1334,7 @@ def parse_ilias_forum_export(forum_export: BeautifulSoup) -> List[IliasForumThre
 
 
 def _guess_timestamp_from_forum_post_content(content: Tag) -> Optional[datetime]:
-    posts: Optional[Tag] = content.select(".ilFrmPostHeader > span.small")
+    posts = cast(Optional[Tag], content.select(".ilFrmPostHeader > span.small"))
     if not posts:
         return None
 
diff --git a/PFERD/crawl/ilias/shibboleth_login.py b/PFERD/crawl/ilias/shibboleth_login.py
index ab59f25..7e725f0 100644
--- a/PFERD/crawl/ilias/shibboleth_login.py
+++ b/PFERD/crawl/ilias/shibboleth_login.py
@@ -1,8 +1,8 @@
-from typing import Any, Optional
+from typing import Any, Optional, cast
 
 import aiohttp
 import yarl
-from bs4 import BeautifulSoup
+from bs4 import BeautifulSoup, Tag
 
 from ...auth import Authenticator, TfaAuthenticator
 from ...logging import log
@@ -48,8 +48,8 @@ class ShibbolethLogin:
         while not self._login_successful(soup):
             # Searching the form here so that this fails before asking for
             # credentials rather than after asking.
-            form = soup.find("form", {"method": "post"})
-            action = form["action"]
+            form = cast(Tag, soup.find("form", {"method": "post"}))
+            action = cast(str, form["action"])
 
             # Equivalent: Enter credentials in
             # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
@@ -62,7 +62,7 @@ class ShibbolethLogin:
                 "fudis_web_authn_assertion_input": "",
             }
             if csrf_token_input := form.find("input", {"name": "csrf_token"}):
-                data["csrf_token"] = csrf_token_input["value"]
+                data["csrf_token"] = csrf_token_input["value"]  # type: ignore
             soup = await _post(sess, url, data)
 
             if soup.find(id="attributeRelease"):
@@ -79,14 +79,14 @@ class ShibbolethLogin:
 
         # Equivalent: Being redirected via JS automatically
         # (or clicking "Continue" if you have JS disabled)
-        relay_state = soup.find("input", {"name": "RelayState"})
-        saml_response = soup.find("input", {"name": "SAMLResponse"})
-        url = form = soup.find("form", {"method": "post"})["action"]
+        relay_state = cast(Tag, soup.find("input", {"name": "RelayState"}))
+        saml_response = cast(Tag, soup.find("input", {"name": "SAMLResponse"}))
+        url = form = soup.find("form", {"method": "post"})["action"]  # type: ignore
         data = {  # using the info obtained in the while loop above
-            "RelayState": relay_state["value"],
-            "SAMLResponse": saml_response["value"],
+            "RelayState": cast(str, relay_state["value"]),
+            "SAMLResponse": cast(str, saml_response["value"]),
         }
-        await sess.post(url, data=data)
+        await sess.post(cast(str, url), data=data)
 
     async def _authenticate_tfa(
         self, session: aiohttp.ClientSession, soup: BeautifulSoup, shib_url: yarl.URL
@@ -98,8 +98,8 @@ class ShibbolethLogin:
 
         # Searching the form here so that this fails before asking for
         # credentials rather than after asking.
-        form = soup.find("form", {"method": "post"})
-        action = form["action"]
+        form = cast(Tag, soup.find("form", {"method": "post"}))
+        action = cast(str, form["action"])
 
         # Equivalent: Enter token in
         # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO
@@ -110,7 +110,7 @@ class ShibbolethLogin:
             "fudis_otp_input": tfa_token,
         }
         if csrf_token_input := form.find("input", {"name": "csrf_token"}):
-            data["csrf_token"] = csrf_token_input["value"]
+            data["csrf_token"] = csrf_token_input["value"]  # type: ignore
         return await _post(session, url, data)
 
     @staticmethod
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index e1d13a7..21d9dec 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -3,7 +3,7 @@ import re
 from dataclasses import dataclass
 from datetime import datetime
 from pathlib import PurePath
-from typing import Any, Awaitable, Generator, Iterable, List, Optional, Pattern, Tuple, Union
+from typing import Any, Awaitable, Generator, Iterable, List, Optional, Pattern, Tuple, Union, cast
 from urllib.parse import urljoin
 
 from bs4 import BeautifulSoup, Tag
@@ -156,11 +156,11 @@ class KitIpdCrawler(HttpCrawler):
         name = os.path.basename(url)
         return KitIpdFile(name, url)
 
-    def _find_file_links(self, tag: Union[Tag, BeautifulSoup]) -> List[Tag]:
-        return tag.findAll(name="a", attrs={"href": self._file_regex})
+    def _find_file_links(self, tag: Union[Tag, BeautifulSoup]) -> list[Tag]:
+        return cast(list[Tag], tag.find_all(name="a", attrs={"href": self._file_regex}))
 
     def _abs_url_from_link(self, url: str, link_tag: Tag) -> str:
-        return urljoin(url, link_tag.get("href"))
+        return urljoin(url, cast(str, link_tag.get("href")))
 
     async def _stream_from_url(self, url: str, path: PurePath, sink: FileSink, bar: ProgressBar) -> None:
         async with self.session.get(url, allow_redirects=False) as resp:

From 5f88539f7ed9bd06838de662a85dfa6027fb91a6 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 19 Feb 2025 12:05:20 +0100
Subject: [PATCH 108/147] Fix page size increase for forum threads

---
 PFERD/crawl/ilias/kit_ilias_html.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index ee61cab..f6fa423 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -286,11 +286,9 @@ class IliasPage:
         return None
 
     def _is_forum_page(self) -> bool:
-        read_more_btn = self._soup.find(
-            "button",
-            attrs={"onclick": lambda x: x is not None and "cmdClass=ilobjforumgui&cmd=markAllRead" in x}
-        )
-        return read_more_btn is not None
+        if perma_link := self.get_permalink():
+            return "target=frm_" in perma_link
+        return False
 
     def _is_video_player(self) -> bool:
         return "paella_config_file" in str(self._soup)

From 2f0e792670559fe98572eb937feafa95de41e9bd Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 19 Feb 2025 12:06:07 +0100
Subject: [PATCH 109/147] Increase default http timeout to 30

Otherwise larger forums will fail to download in time
---
 PFERD/crawl/http_crawler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index 1c4631c..471bf1e 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -22,7 +22,7 @@ ETAGS_CUSTOM_REPORT_VALUE_KEY = "etags"
 
 class HttpCrawlerSection(CrawlerSection):
     def http_timeout(self) -> float:
-        return self.s.getfloat("http_timeout", fallback=20)
+        return self.s.getfloat("http_timeout", fallback=30)
 
 
 class HttpCrawler(Crawler):

From ba2833dba5669e9b748cb64bbf98890f5d1299ae Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 19 Feb 2025 12:07:19 +0100
Subject: [PATCH 110/147] Crawl all threads in a forum

Before this patch the row count was unconditionally changed to 800. This
patch tries to detect how many rows the forum has and then fetches this
amount, if it is larger than 800.
---
 CHANGELOG.md                        |  1 +
 PFERD/crawl/ilias/kit_ilias_html.py | 37 ++++++++++++++++++++++++++---
 2 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2ff98bc..572f8c7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ ambiguous situations.
 ## Fixed
 - File links in report on Windows
 - TOTP authentication in KIT Shibboleth
+- Forum crawling only considering the first 20 entries
 
 ## 3.7.0 - 2024-11-13
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index f6fa423..a194856 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -264,10 +264,22 @@ class IliasPage:
 
     def get_next_stage_element(self) -> Optional[IliasPageElement]:
         if self._is_forum_page():
-            if "trows=800" in self._page_url:
+            if "trows=" in self._page_url:
+                log.explain("Manual row override detected, accepting it as good")
                 return None
             log.explain("Requesting *all* forum threads")
-            return self._get_show_max_forum_entries_per_page_url()
+            thread_count = self._get_forum_thread_count()
+            if thread_count is not None and thread_count > 400:
+                log.warn(
+                    "Forum has more than 400 threads, fetching all threads will take a while. "
+                    "You might need to adjust your http_timeout config option."
+                )
+
+            # Fetch at least 400 in case we detect it wrong
+            if thread_count is not None and thread_count < 400:
+                thread_count = 400
+
+            return self._get_show_max_forum_entries_per_page_url(thread_count)
         if self._is_ilias_opencast_embedding():
             log.explain("Unwrapping opencast embedding")
             return self.get_child_elements()[0]
@@ -414,7 +426,9 @@ class IliasPage:
 
         return items
 
-    def _get_show_max_forum_entries_per_page_url(self) -> Optional[IliasPageElement]:
+    def _get_show_max_forum_entries_per_page_url(
+        self, wanted_max: Optional[int] = None
+    ) -> Optional[IliasPageElement]:
         correct_link = cast(Optional[Tag], self._soup.find(
             "a",
             attrs={"href": lambda x: x is not None and "trows=800" in x and "cmd=showThreads" in x}
@@ -424,9 +438,26 @@ class IliasPage:
             return None
 
         link = self._abs_url_from_link(correct_link)
+        if wanted_max is not None:
+            link = link.replace("trows=800", f"trows={wanted_max}")
 
         return IliasPageElement.create_new(IliasElementType.FORUM, link, "show all forum threads")
 
+    def _get_forum_thread_count(self) -> Optional[int]:
+        log.explain_topic("Trying to find forum thread count")
+
+        candidates = cast(list[Tag], self._soup.select(".ilTableFootLight"))
+        extract_regex = re.compile(r"\s(?P<max>\d+)\s*\)")
+
+        for candidate in candidates:
+            log.explain(f"Found thread count candidate: {candidate}")
+            if match := extract_regex.search(candidate.get_text()):
+                return int(match.group("max"))
+        else:
+            log.explain("Found no candidates to extract thread count from")
+
+        return None
+
     def _find_personal_desktop_entries(self) -> list[IliasPageElement]:
         items: list[IliasPageElement] = []
 

From be175f9347ea73160839de643e089db328cf78df Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 19 Feb 2025 14:18:43 +0100
Subject: [PATCH 111/147] Download only new/updated forum threads

---
 PFERD/crawl/crawler.py                 | 29 +++++++++++
 PFERD/crawl/ilias/ilias_web_crawler.py | 60 +++++++++++++++++------
 PFERD/crawl/ilias/kit_ilias_html.py    | 67 +++++++++++++++++++++-----
 PFERD/output_dir.py                    | 16 ++++++
 4 files changed, 145 insertions(+), 27 deletions(-)

diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index fda1307..74616e0 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -294,6 +294,35 @@ class Crawler(ABC):
         log.explain("Answer: Yes")
         return CrawlToken(self._limiter, path)
 
+    def should_try_download(
+            self,
+            path: PurePath,
+            *,
+            etag_differs: Optional[bool] = None,
+            mtime: Optional[datetime] = None,
+            redownload: Optional[Redownload] = None,
+            on_conflict: Optional[OnConflict] = None,
+    ) -> bool:
+        log.explain_topic(f"Decision: Should Download {fmt_path(path)}")
+
+        if self._transformer.transform(path) is None:
+            log.explain("Answer: No (ignored)")
+            return False
+
+        should_download = self._output_dir.should_try_download(
+            path,
+            etag_differs=etag_differs,
+            mtime=mtime,
+            redownload=redownload,
+            on_conflict=on_conflict
+        )
+        if should_download:
+            log.explain("Answer: Yes")
+            return True
+        else:
+            log.explain("Answer: No")
+            return False
+
     async def download(
             self,
             path: PurePath,
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 557150c..7351593 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -723,20 +723,52 @@ instance's greatest bottleneck.
                 else:
                     break
 
-            download_data = cast(IliasPage, page).get_download_forum_data()
-            if not download_data:
-                raise CrawlWarning("Failed to extract forum data")
-            if download_data.empty:
+            forum_threads: list[tuple[IliasPageElement, bool]] = []
+            for entry in cast(IliasPage, page).get_forum_entries():
+                path = cl.path / (_sanitize_path_name(entry.name) + ".html")
+                forum_threads.append((entry, self.should_try_download(path, mtime=entry.mtime)))
+
+            # Sort the ids. The forum download will *preserve* this ordering
+            forum_threads.sort(key=lambda elem: elem[0].id())
+
+            if not forum_threads:
                 log.explain("Forum had no threads")
                 return
-            html = await self._post_authenticated(download_data.url, download_data.form_data)
-            elements = parse_ilias_forum_export(soupify(html))
 
-        elements.sort(key=lambda elem: elem.title)
+            download_data = cast(IliasPage, page).get_download_forum_data(
+                [thread.id() for thread, download in forum_threads if download]
+            )
+            if not download_data:
+                raise CrawlWarning("Failed to extract forum data")
+
+            if not download_data.empty:
+                html = await self._post_authenticated(download_data.url, download_data.form_data)
+                elements = parse_ilias_forum_export(soupify(html))
+            else:
+                elements = []
+
+        # Verify that ILIAS does not change the order, as we depend on it later. Otherwise, we could not call
+        # download in the correct order, potentially messing up duplication handling.
+        expected_element_titles = [thread.name for thread, download in forum_threads if download]
+        actual_element_titles = [_sanitize_path_name(thread.name) for thread in elements]
+        if expected_element_titles != actual_element_titles:
+            raise CrawlWarning(
+                f"Forum thread order mismatch: {expected_element_titles} != {actual_element_titles}"
+            )
 
         tasks: List[Awaitable[None]] = []
-        for elem in elements:
-            tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, elem)))
+        for thread, download in forum_threads:
+            if download:
+                # This only works because ILIAS keeps the order in the export
+                elem = elements.pop(0)
+                tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, elem)))
+            else:
+                # We only downloaded the threads we "should_try_download"ed. This can be an
+                # over-approximation and all will be fine.
+                # If we selected too few, e.g. because there was a duplicate title and the mtime of the
+                # original is newer than the update of the duplicate.
+                # This causes stale data locally, but I consider this problem acceptable right now.
+                tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, thread)))
 
         # And execute them
         await self.gather(tasks)
@@ -746,17 +778,17 @@ instance's greatest bottleneck.
     async def _download_forum_thread(
         self,
         parent_path: PurePath,
-        element: IliasForumThread,
+        element: Union[IliasForumThread, IliasPageElement]
     ) -> None:
-        path = parent_path / (_sanitize_path_name(element.title) + ".html")
+        path = parent_path / (_sanitize_path_name(element.name) + ".html")
         maybe_dl = await self.download(path, mtime=element.mtime)
-        if not maybe_dl:
+        if not maybe_dl or not isinstance(element, IliasForumThread):
             return
 
         async with maybe_dl as (bar, sink):
             content = "<!DOCTYPE html>\n"
-            content += cast(str, element.title_tag.prettify())
-            content += cast(str, element.content_tag.prettify())
+            content += cast(str, element.name_tag.prettify())
+            content += cast(str, await self.internalize_images(element.content_tag.prettify()))
             sink.file.write(content.encode("utf-8"))
             sink.done()
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index a194856..7956b00 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -22,6 +22,7 @@ class IliasElementType(Enum):
     FILE = "file"
     FOLDER = "folder"
     FORUM = "forum"
+    FORUM_THREAD = "forum_thread"
     INFO_TAB = "info_tab"
     LEARNING_MODULE = "learning_module"
     LINK = "link"
@@ -54,6 +55,7 @@ class IliasPageElement:
             r"fold_(?P<id>\d+)",
             r"frm_(?P<id>\d+)",
             r"exc_(?P<id>\d+)",
+            r"thr_pk=(?P<id>\d+)",  # forums
             r"ref_id=(?P<id>\d+)",
             r"target=[a-z]+_(?P<id>\d+)",
             r"mm_(?P<id>\d+)"
@@ -123,8 +125,8 @@ class IliasDownloadForumData:
 
 @dataclass
 class IliasForumThread:
-    title: str
-    title_tag: Tag
+    name: str
+    name_tag: Tag
     content_tag: Tag
     mtime: Optional[datetime]
 
@@ -242,7 +244,36 @@ class IliasPage:
             return url
         return None
 
-    def get_download_forum_data(self) -> Optional[IliasDownloadForumData]:
+    def get_forum_entries(self) -> list[IliasPageElement]:
+        form = self._get_forum_form()
+        if not form:
+            return []
+        threads = []
+
+        for row in cast(list[Tag], form.select("table > tbody > tr")):
+            url_tag = cast(
+                Optional[Tag],
+                row.find(name="a", attrs={"href": lambda x: x is not None and "cmd=viewthread" in x.lower()})
+            )
+            if url_tag is None:
+                log.explain(f"Skipping row without URL: {row}")
+                continue
+            name = url_tag.get_text().strip()
+            columns = [td.get_text().strip() for td in cast(list[Tag], row.find_all(name="td"))]
+            potential_dates_opt = [IliasPage._find_date_in_text(column) for column in columns]
+            potential_dates = [x for x in potential_dates_opt if x is not None]
+            mtime = max(potential_dates) if potential_dates else None
+
+            threads.append(IliasPageElement.create_new(
+                IliasElementType.FORUM_THREAD,
+                self._abs_url_from_link(url_tag),
+                name,
+                mtime=mtime
+            ))
+
+        return threads
+
+    def get_download_forum_data(self, thread_ids: list[str]) -> Optional[IliasDownloadForumData]:
         form = cast(Optional[Tag], self._soup.find(
             "form",
             attrs={"action": lambda x: x is not None and "fallbackCmd=showThreads" in x}
@@ -251,7 +282,7 @@ class IliasPage:
             return None
         post_url = self._abs_url_from_relative(cast(str, form["action"]))
 
-        thread_ids = [f["value"] for f in cast(list[Tag], form.find_all(attrs={"name": "thread_ids[]"}))]
+        log.explain(f"Fetching forum threads {thread_ids}")
 
         form_data: Dict[str, Union[str, list[str]]] = {
             "thread_ids[]": cast(list[str], thread_ids),
@@ -262,6 +293,12 @@ class IliasPage:
 
         return IliasDownloadForumData(url=post_url, form_data=form_data, empty=len(thread_ids) == 0)
 
+    def _get_forum_form(self) -> Optional[Tag]:
+        return cast(Optional[Tag], self._soup.find(
+            "form",
+            attrs={"action": lambda x: x is not None and "fallbackCmd=showThreads" in x}
+        ))
+
     def get_next_stage_element(self) -> Optional[IliasPageElement]:
         if self._is_forum_page():
             if "trows=" in self._page_url:
@@ -950,16 +987,9 @@ class IliasPage:
         # The rest does not have a stable order. Grab the whole text and reg-ex the date
         # out of it
         all_properties_text = properties_parent.get_text().strip()
-        modification_date_match = re.search(
-            r"(((\d+\. \w+ \d+)|(Gestern|Yesterday)|(Heute|Today)|(Morgen|Tomorrow)), \d+:\d+)",
-            all_properties_text
-        )
-        if modification_date_match is None:
-            modification_date = None
+        modification_date = IliasPage._find_date_in_text(all_properties_text)
+        if modification_date is None:
             log.explain(f"Element {name} at {url} has no date.")
-        else:
-            modification_date_str = modification_date_match.group(1)
-            modification_date = demangle_date(modification_date_str)
 
         # Grab the name from the link text
         full_path = name + "." + file_type
@@ -1243,6 +1273,17 @@ class IliasPage:
             return True
         return False
 
+    @staticmethod
+    def _find_date_in_text(text: str) -> Optional[datetime]:
+        modification_date_match = re.search(
+            r"(((\d+\. \w+ \d+)|(Gestern|Yesterday)|(Heute|Today)|(Morgen|Tomorrow)), \d+:\d+)",
+            text
+        )
+        if modification_date_match is not None:
+            modification_date_str = modification_date_match.group(1)
+            return demangle_date(modification_date_str)
+        return None
+
     def get_permalink(self) -> Optional[str]:
         return IliasPage.get_soup_permalink(self._soup)
 
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 09cf133..94337b6 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -371,6 +371,22 @@ class OutputDirectory:
 
         raise OutputDirError("Failed to create temporary file")
 
+    def should_try_download(
+        self,
+        path: PurePath,
+        *,
+        etag_differs: Optional[bool] = None,
+        mtime: Optional[datetime] = None,
+        redownload: Optional[Redownload] = None,
+        on_conflict: Optional[OnConflict] = None,
+    ) -> bool:
+        heuristics = Heuristics(etag_differs, mtime)
+        redownload = self._redownload if redownload is None else redownload
+        on_conflict = self._on_conflict if on_conflict is None else on_conflict
+        local_path = self.resolve(path)
+
+        return self._should_download(local_path, heuristics, redownload, on_conflict)
+
     async def download(
             self,
             remote_path: PurePath,

From 72cd0f77e2d7e58a3505961f3c58b94636e24156 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 19 Feb 2025 14:29:37 +0100
Subject: [PATCH 112/147] Prettify forum thread exports

Co-authored-by: Tim <me@scriptim.dev>
---
 CHANGELOG.md                           |  3 +
 PFERD/crawl/ilias/file_templates.py    | 89 ++++++++++++++++++++++++++
 PFERD/crawl/ilias/ilias_web_crawler.py | 12 ++--
 3 files changed, 99 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 572f8c7..ae82e4f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+### Changed
+- Added prettier CSS to forum threads
+
 ## Fixed
 - File links in report on Windows
 - TOTP authentication in KIT Shibboleth
diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index 0a72199..e148875 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -126,6 +126,88 @@ _learning_module_template = """
 </html>
 """
 
+_forum_thread_template = """
+<!DOCTYPE html>
+<html lang="en">
+    <head>
+        <meta charset="UTF-8">
+        <title>ILIAS - Forum: {{name}}</title>
+        <style>
+            * {
+                box-sizing: border-box;
+            }
+            body {
+                font-family: 'Open Sans', Verdana, Arial, Helvetica, sans-serif;
+                padding: 8px;
+            }
+            ul, ol, p {
+                margin: 1.2em 0;
+            }
+            p {
+                margin-top: 8px;
+                margin-bottom: 8px;
+            }
+            a {
+                color: #00876c;
+                text-decoration: none;
+                cursor: pointer;
+            }
+            a:hover {
+                text-decoration: underline;
+            }
+            body > p:first-child > span:first-child {
+                font-size: 1.6em;
+            }
+            body > p:first-child > span:first-child ~ span.default {
+                display: inline-block;
+                font-size: 1.2em;
+                padding-bottom: 8px;
+            }
+            .ilFrmPostContent {
+                margin-top: 8px;
+                max-width: 64em;
+            }
+            .ilFrmPostContent > *:first-child {
+                margin-top: 0px;
+            }
+            .ilFrmPostTitle {
+                margin-top: 24px;
+                color: #00876c;
+                font-weight: bold;
+            }
+            #ilFrmPostList {
+                list-style: none;
+                padding-left: 0;
+            }
+            li.ilFrmPostRow {
+                padding: 3px 0 3px 3px;
+                margin-bottom: 24px;
+                border-left: 6px solid #dddddd;
+            }
+            .ilFrmPostRow > div {
+                display: flex;
+            }
+            .ilFrmPostImage img {
+                margin: 0 !important;
+                padding: 6px 9px 9px 6px;
+            }
+            .ilUserIcon {
+                width: 115px;
+            }
+            .small {
+                text-decoration: none;
+                font-size: 0.75rem;
+                color: #6f6f6f;
+            }
+        </style>
+    </head>
+    <body>
+    {{heading}}
+    {{content}}
+    </body>
+</html>
+""".strip()  # noqa: E501 line too long
+
 
 def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next: Optional[str]) -> str:
     # Seems to be comments, ignore those.
@@ -164,6 +246,13 @@ def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next
     return _learning_module_template.replace("{{body}}", body_str).replace("{{name}}", name)
 
 
+def forum_thread_template(name: str, heading: bs4.Tag, content: bs4.Tag) -> str:
+    return _forum_thread_template \
+        .replace("{{name}}", name) \
+        .replace("{{heading}}", cast(str, heading.prettify())) \
+        .replace("{{content}}", cast(str, content.prettify()))
+
+
 class Links(Enum):
     IGNORE = "ignore"
     PLAINTEXT = "plaintext"
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 7351593..bc90991 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -19,7 +19,7 @@ from ...utils import fmt_path, soupify, url_set_query_param
 from ..crawler import CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
 from ..http_crawler import HttpCrawler, HttpCrawlerSection
 from .async_helper import _iorepeat
-from .file_templates import Links, learning_module_template
+from .file_templates import Links, forum_thread_template, learning_module_template
 from .ilias_html_cleaner import clean, insert_base_markup
 from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
                              IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
@@ -786,10 +786,12 @@ instance's greatest bottleneck.
             return
 
         async with maybe_dl as (bar, sink):
-            content = "<!DOCTYPE html>\n"
-            content += cast(str, element.name_tag.prettify())
-            content += cast(str, await self.internalize_images(element.content_tag.prettify()))
-            sink.file.write(content.encode("utf-8"))
+            rendered = forum_thread_template(
+                element.name,
+                element.name_tag,
+                element.content_tag
+            )
+            sink.file.write(rendered.encode("utf-8"))
             sink.done()
 
     async def _handle_learning_module(

From edc482cdf487e4a3bcf93e2dad7d69cbd3f32974 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 19 Feb 2025 14:45:18 +0100
Subject: [PATCH 113/147] Internalize images in forum threads

---
 PFERD/crawl/ilias/ilias_web_crawler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index bc90991..76cbe6b 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -789,7 +789,7 @@ instance's greatest bottleneck.
             rendered = forum_thread_template(
                 element.name,
                 element.name_tag,
-                element.content_tag
+                await self.internalize_images(element.content_tag)
             )
             sink.file.write(rendered.encode("utf-8"))
             sink.done()

From c8eff04ae0dc4b6528e5e8f25fb38b52fe6fd249 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 19 Feb 2025 14:56:32 +0100
Subject: [PATCH 114/147] Make thread titles link to original ILIAS thread

---
 PFERD/crawl/ilias/file_templates.py    |  4 +++-
 PFERD/crawl/ilias/ilias_web_crawler.py | 20 +++++++++++---------
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index e148875..ae8bb1e 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -246,7 +246,9 @@ def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next
     return _learning_module_template.replace("{{body}}", body_str).replace("{{name}}", name)
 
 
-def forum_thread_template(name: str, heading: bs4.Tag, content: bs4.Tag) -> str:
+def forum_thread_template(name: str, url: str, heading: bs4.Tag, content: bs4.Tag) -> str:
+    if title := cast(Optional[bs4.Tag], heading.find(name="b")):
+        title.wrap(bs4.Tag(name="a", attrs={"href": url}))
     return _forum_thread_template \
         .replace("{{name}}", name) \
         .replace("{{heading}}", cast(str, heading.prettify())) \
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 76cbe6b..add49ee 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -761,14 +761,14 @@ instance's greatest bottleneck.
             if download:
                 # This only works because ILIAS keeps the order in the export
                 elem = elements.pop(0)
-                tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, elem)))
+                tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, elem, thread)))
             else:
                 # We only downloaded the threads we "should_try_download"ed. This can be an
                 # over-approximation and all will be fine.
                 # If we selected too few, e.g. because there was a duplicate title and the mtime of the
                 # original is newer than the update of the duplicate.
                 # This causes stale data locally, but I consider this problem acceptable right now.
-                tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, thread)))
+                tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, thread, thread)))
 
         # And execute them
         await self.gather(tasks)
@@ -778,18 +778,20 @@ instance's greatest bottleneck.
     async def _download_forum_thread(
         self,
         parent_path: PurePath,
-        element: Union[IliasForumThread, IliasPageElement]
+        thread: Union[IliasForumThread, IliasPageElement],
+        element: IliasPageElement
     ) -> None:
-        path = parent_path / (_sanitize_path_name(element.name) + ".html")
-        maybe_dl = await self.download(path, mtime=element.mtime)
-        if not maybe_dl or not isinstance(element, IliasForumThread):
+        path = parent_path / (_sanitize_path_name(thread.name) + ".html")
+        maybe_dl = await self.download(path, mtime=thread.mtime)
+        if not maybe_dl or not isinstance(thread, IliasForumThread):
             return
 
         async with maybe_dl as (bar, sink):
             rendered = forum_thread_template(
-                element.name,
-                element.name_tag,
-                await self.internalize_images(element.content_tag)
+                thread.name,
+                element.url,
+                thread.name_tag,
+                await self.internalize_images(thread.content_tag)
             )
             sink.file.write(rendered.encode("utf-8"))
             sink.done()

From 63f25277b04a46e415da4f994f17e2b211ddbaf9 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 9 Mar 2025 23:44:25 +0100
Subject: [PATCH 115/147] Fix crawling of empty forum threads

---
 PFERD/crawl/ilias/kit_ilias_html.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 7956b00..963ab05 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -1387,16 +1387,18 @@ def parse_ilias_forum_export(forum_export: BeautifulSoup) -> list[IliasForumThre
         title_tag = p
         content_tag = cast(Optional[Tag], p.find_next_sibling("ul"))
 
-        if not content_tag:
-            # ILIAS allows users to delete the initial post while keeping the thread open
-            # This produces empty threads without *any* content.
-            # I am not sure why you would want this, but ILIAS makes it easy to do.
-            continue
-
         title = cast(Tag, p.find("b")).text
         if ":" in title:
             title = title[title.find(":") + 1:]
         title = title.strip()
+
+        if not content_tag or content_tag.find_previous_sibling("p") != title_tag:
+            # ILIAS allows users to delete the initial post while keeping the thread open
+            # This produces empty threads without *any* content.
+            # I am not sure why you would want this, but ILIAS makes it easy to do.
+            elements.append(IliasForumThread(title, title_tag, forum_export.new_tag("ul"), None))
+            continue
+
         mtime = _guess_timestamp_from_forum_post_content(content_tag)
         elements.append(IliasForumThread(title, title_tag, content_tag, mtime))
 

From 477234ad0d1827c0ec3c7e0e7783af365639a943 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sat, 12 Apr 2025 14:54:58 +0200
Subject: [PATCH 116/147] Support ILIAS 9

---
 CHANGELOG.md                           |   6 +
 PFERD/crawl/ilias/ilias_web_crawler.py | 238 ++++----
 PFERD/crawl/ilias/kit_ilias_html.py    | 759 +++++++++++++++----------
 3 files changed, 571 insertions(+), 432 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ae82e4f..0a26913 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,8 +22,14 @@ ambiguous situations.
 
 ## Unreleased
 
+### Added
+- Support for ILIAS 9
+
 ### Changed
 - Added prettier CSS to forum threads
+- Downloaded forum threads now link to the forum instead of the ILIAS thread
+- Increase minimum supported Python version to 3.11
+- Do not crawl nested courses (courses linked in other courses)
 
 ## Fixed
 - File links in report on Windows
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index add49ee..52ecf92 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -22,7 +22,7 @@ from .async_helper import _iorepeat
 from .file_templates import Links, forum_thread_template, learning_module_template
 from .ilias_html_cleaner import clean, insert_base_markup
 from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
-                             IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
+                             IliasPageElement, IliasSoup, _sanitize_path_name, parse_ilias_forum_export)
 from .shibboleth_login import ShibbolethLogin
 
 TargetType = Union[str, int]
@@ -105,7 +105,6 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
 
 
 _DIRECTORY_PAGES: Set[IliasElementType] = {
-    IliasElementType.COURSE,
     IliasElementType.EXERCISE,
     IliasElementType.EXERCISE_FILES,
     IliasElementType.FOLDER,
@@ -217,11 +216,19 @@ instance's greatest bottleneck.
 
     async def _crawl_desktop(self) -> None:
         await self._crawl_url(
-            urljoin(self._base_url, "/ilias.php?baseClass=ilDashboardGUI&cmd=show")
+            urljoin(self._base_url, "/ilias.php?baseClass=ilDashboardGUI&cmd=show"),
+            crawl_nested_courses=True
         )
 
-    async def _crawl_url(self, url: str, expected_id: Optional[int] = None) -> None:
-        if awaitable := await self._handle_ilias_page(url, None, PurePath("."), expected_id):
+    async def _crawl_url(
+        self,
+        url: str,
+        expected_id: Optional[int] = None,
+        crawl_nested_courses: bool = False
+    ) -> None:
+        if awaitable := await self._handle_ilias_page(
+            url, None, PurePath("."), expected_id, crawl_nested_courses
+        ):
             await awaitable
 
     async def _handle_ilias_page(
@@ -230,6 +237,7 @@ instance's greatest bottleneck.
         current_element: Optional[IliasPageElement],
         path: PurePath,
         expected_course_id: Optional[int] = None,
+        crawl_nested_courses: bool = False
     ) -> Optional[Coroutine[Any, Any, None]]:
         maybe_cl = await self.crawl(path)
         if not maybe_cl:
@@ -237,7 +245,9 @@ instance's greatest bottleneck.
         if current_element:
             self._ensure_not_seen(current_element, path)
 
-        return self._crawl_ilias_page(url, current_element, maybe_cl, expected_course_id)
+        return self._crawl_ilias_page(
+            url, current_element, maybe_cl, expected_course_id, crawl_nested_courses
+        )
 
     @anoncritical
     async def _crawl_ilias_page(
@@ -246,6 +256,7 @@ instance's greatest bottleneck.
         current_element: Optional[IliasPageElement],
         cl: CrawlToken,
         expected_course_id: Optional[int] = None,
+        crawl_nested_courses: bool = False,
     ) -> None:
         elements: List[IliasPageElement] = []
         # A list as variable redefinitions are not propagated to outer scopes
@@ -267,12 +278,12 @@ instance's greatest bottleneck.
                     # If we expect to find a root course, enforce it
                     if current_parent is None and expected_course_id is not None:
                         perma_link = IliasPage.get_soup_permalink(soup)
-                        if not perma_link or "crs_" not in perma_link:
+                        if not perma_link or "crs/" not in perma_link:
                             raise CrawlError("Invalid course id? Didn't find anything looking like a course")
                         if str(expected_course_id) not in perma_link:
                             raise CrawlError(f"Expected course id {expected_course_id} but got {perma_link}")
 
-                    page = IliasPage(soup, next_stage_url, current_parent)
+                    page = IliasPage(soup, current_parent)
                     if next_element := page.get_next_stage_element():
                         current_parent = next_element
                         next_stage_url = next_element.url
@@ -294,7 +305,7 @@ instance's greatest bottleneck.
 
         tasks: List[Awaitable[None]] = []
         for element in elements:
-            if handle := await self._handle_ilias_element(cl.path, element):
+            if handle := await self._handle_ilias_element(cl.path, element, crawl_nested_courses):
                 tasks.append(asyncio.create_task(handle))
 
         # And execute them
@@ -310,6 +321,7 @@ instance's greatest bottleneck.
         self,
         parent_path: PurePath,
         element: IliasPageElement,
+        crawl_nested_courses: bool = False
     ) -> Optional[Coroutine[Any, Any, None]]:
         # element.name might contain `/` if the crawler created nested elements,
         # so we can not sanitize it here. We trust in the output dir to thwart worst-case
@@ -362,6 +374,56 @@ instance's greatest bottleneck.
                 "[bright_black](scorm learning modules are not supported)"
             )
             return None
+        elif element.type == IliasElementType.LITERATURE_LIST:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](literature lists are not currently supported)"
+            )
+            return None
+        elif element.type == IliasElementType.LEARNING_MODULE_HTML:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](HTML learning modules are not supported)"
+            )
+            return None
+        elif element.type == IliasElementType.BLOG:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](blogs are not currently supported)"
+            )
+            return None
+        elif element.type == IliasElementType.DCL_RECORD_LIST:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](dcl record lists are not currently supported)"
+            )
+            return None
+        elif element.type == IliasElementType.MEDIA_POOL:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](media pools are not currently supported)"
+            )
+            return None
+        elif element.type == IliasElementType.COURSE:
+            if crawl_nested_courses:
+                return await self._handle_ilias_page(element.url, element, element_path)
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](not descending into linked course)"
+            )
+            return None
         elif element.type == IliasElementType.LEARNING_MODULE:
             return await self._handle_learning_module(element, element_path)
         elif element.type == IliasElementType.LINK:
@@ -590,7 +652,7 @@ instance's greatest bottleneck.
             )
 
         async with dl as (bar, sink):
-            page = IliasPage(await self._get_page(element.url), element.url, element)
+            page = IliasPage(await self._get_page(element.url), element)
             stream_elements = page.get_child_elements()
 
             if len(stream_elements) > 1:
@@ -600,7 +662,7 @@ instance's greatest bottleneck.
                 stream_element = stream_elements[0]
 
                 # We do not have a local cache yet
-                await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
+                await self._stream_from_url(stream_element, sink, bar, is_video=True)
                 add_to_report([str(self._transformer.transform(dl.path))])
                 return
 
@@ -615,7 +677,7 @@ instance's greatest bottleneck.
             async with maybe_dl as (bar, sink):
                 log.explain(f"Streaming video from real url {stream_element.url}")
                 contained_video_paths.append(str(self._transformer.transform(maybe_dl.path)))
-                await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
+                await self._stream_from_url(stream_element, sink, bar, is_video=True)
 
         add_to_report(contained_video_paths)
 
@@ -637,12 +699,19 @@ instance's greatest bottleneck.
     async def _download_file(self, element: IliasPageElement, dl: DownloadToken, is_video: bool) -> None:
         assert dl  # The function is only reached when dl is not None
         async with dl as (bar, sink):
-            await self._stream_from_url(element.url, sink, bar, is_video)
+            await self._stream_from_url(element, sink, bar, is_video)
+
+    async def _stream_from_url(
+        self,
+        element: IliasPageElement,
+        sink: FileSink,
+        bar: ProgressBar,
+        is_video: bool
+    ) -> None:
+        url = element.url
 
-    async def _stream_from_url(self, url: str, sink: FileSink, bar: ProgressBar, is_video: bool) -> None:
         async def try_stream() -> bool:
             next_url = url
-
             # Normal files redirect to the magazine if we are not authenticated. As files could be HTML,
             # we can not match on the content type here. Instead, we disallow redirects and inspect the
             # new location. If we are redirected anywhere but the ILIAS 8 "sendfile" command, we assume
@@ -690,7 +759,7 @@ instance's greatest bottleneck.
         await self.authenticate(auth_id)
 
         if not await try_stream():
-            raise CrawlError("File streaming failed after authenticate()")
+            raise CrawlError(f"File streaming failed after authenticate() {element!r}")
 
     async def _handle_forum(
         self,
@@ -705,70 +774,23 @@ instance's greatest bottleneck.
     @_iorepeat(3, "crawling forum")
     @anoncritical
     async def _crawl_forum(self, element: IliasPageElement, cl: CrawlToken) -> None:
-        elements: List[IliasForumThread] = []
-
         async with cl:
-            next_stage_url = element.url
-            page = None
-
-            while next_stage_url:
-                log.explain_topic(f"Parsing HTML page for {fmt_path(cl.path)}")
-                log.explain(f"URL: {next_stage_url}")
-
-                soup = await self._get_page(next_stage_url)
-                page = IliasPage(soup, next_stage_url, element)
-
-                if next := page.get_next_stage_element():
-                    next_stage_url = next.url
-                else:
-                    break
-
-            forum_threads: list[tuple[IliasPageElement, bool]] = []
-            for entry in cast(IliasPage, page).get_forum_entries():
-                path = cl.path / (_sanitize_path_name(entry.name) + ".html")
-                forum_threads.append((entry, self.should_try_download(path, mtime=entry.mtime)))
-
-            # Sort the ids. The forum download will *preserve* this ordering
-            forum_threads.sort(key=lambda elem: elem[0].id())
-
-            if not forum_threads:
-                log.explain("Forum had no threads")
+            inner = IliasPage(await self._get_page(element.url), element)
+            export_url = inner.get_forum_export_url()
+            if not export_url:
+                log.warn("Could not extract forum export url")
                 return
 
-            download_data = cast(IliasPage, page).get_download_forum_data(
-                [thread.id() for thread, download in forum_threads if download]
-            )
-            if not download_data:
-                raise CrawlWarning("Failed to extract forum data")
+            export = await self._post(export_url, {
+                "format": "html",
+                "cmd[createExportFile]": ""
+            })
 
-            if not download_data.empty:
-                html = await self._post_authenticated(download_data.url, download_data.form_data)
-                elements = parse_ilias_forum_export(soupify(html))
-            else:
-                elements = []
-
-        # Verify that ILIAS does not change the order, as we depend on it later. Otherwise, we could not call
-        # download in the correct order, potentially messing up duplication handling.
-        expected_element_titles = [thread.name for thread, download in forum_threads if download]
-        actual_element_titles = [_sanitize_path_name(thread.name) for thread in elements]
-        if expected_element_titles != actual_element_titles:
-            raise CrawlWarning(
-                f"Forum thread order mismatch: {expected_element_titles} != {actual_element_titles}"
-            )
+            elements = parse_ilias_forum_export(soupify(export))
 
         tasks: List[Awaitable[None]] = []
-        for thread, download in forum_threads:
-            if download:
-                # This only works because ILIAS keeps the order in the export
-                elem = elements.pop(0)
-                tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, elem, thread)))
-            else:
-                # We only downloaded the threads we "should_try_download"ed. This can be an
-                # over-approximation and all will be fine.
-                # If we selected too few, e.g. because there was a duplicate title and the mtime of the
-                # original is newer than the update of the duplicate.
-                # This causes stale data locally, but I consider this problem acceptable right now.
-                tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, thread, thread)))
+        for thread in elements:
+            tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, thread, element.url)))
 
         # And execute them
         await self.gather(tasks)
@@ -779,7 +801,7 @@ instance's greatest bottleneck.
         self,
         parent_path: PurePath,
         thread: Union[IliasForumThread, IliasPageElement],
-        element: IliasPageElement
+        forum_url: str
     ) -> None:
         path = parent_path / (_sanitize_path_name(thread.name) + ".html")
         maybe_dl = await self.download(path, mtime=thread.mtime)
@@ -789,7 +811,7 @@ instance's greatest bottleneck.
         async with maybe_dl as (bar, sink):
             rendered = forum_thread_template(
                 thread.name,
-                element.url,
+                forum_url,
                 thread.name_tag,
                 await self.internalize_images(thread.content_tag)
             )
@@ -817,7 +839,7 @@ instance's greatest bottleneck.
             log.explain_topic(f"Parsing initial HTML page for {fmt_path(cl.path)}")
             log.explain(f"URL: {element.url}")
             soup = await self._get_page(element.url)
-            page = IliasPage(soup, element.url, element)
+            page = IliasPage(soup, element)
             if next := page.get_learning_module_data():
                 elements.extend(await self._crawl_learning_module_direction(
                     cl.path, next.previous_url, "left", element
@@ -860,7 +882,7 @@ instance's greatest bottleneck.
             log.explain_topic(f"Parsing HTML page for {fmt_path(path)} ({dir}-{counter})")
             log.explain(f"URL: {next_element_url}")
             soup = await self._get_page(next_element_url)
-            page = IliasPage(soup, next_element_url, parent_element)
+            page = IliasPage(soup, parent_element)
             if next := page.get_learning_module_data():
                 elements.append(next)
                 if dir == "left":
@@ -891,13 +913,13 @@ instance's greatest bottleneck.
         if prev:
             prev_p = self._transformer.transform(parent_path / (_sanitize_path_name(prev) + ".html"))
             if prev_p:
-                prev = os.path.relpath(prev_p, my_path.parent)
+                prev = cast(str, os.path.relpath(prev_p, my_path.parent))
             else:
                 prev = None
         if next:
             next_p = self._transformer.transform(parent_path / (_sanitize_path_name(next) + ".html"))
             if next_p:
-                next = os.path.relpath(next_p, my_path.parent)
+                next = cast(str, os.path.relpath(next_p, my_path.parent))
             else:
                 next = None
 
@@ -937,10 +959,10 @@ instance's greatest bottleneck.
             )
         self._visited_urls[element.url] = parent_path
 
-    async def _get_page(self, url: str, root_page_allowed: bool = False) -> BeautifulSoup:
+    async def _get_page(self, url: str, root_page_allowed: bool = False) -> IliasSoup:
         auth_id = await self._current_auth_id()
         async with self.session.get(url) as request:
-            soup = soupify(await request.read())
+            soup = IliasSoup(soupify(await request.read()), str(request.url))
             if IliasPage.is_logged_in(soup):
                 return self._verify_page(soup, url, root_page_allowed)
 
@@ -949,13 +971,13 @@ instance's greatest bottleneck.
 
         # Retry once after authenticating. If this fails, we will die.
         async with self.session.get(url) as request:
-            soup = soupify(await request.read())
+            soup = IliasSoup(soupify(await request.read()), str(request.url))
             if IliasPage.is_logged_in(soup):
                 return self._verify_page(soup, url, root_page_allowed)
         raise CrawlError(f"get_page failed even after authenticating on {url!r}")
 
     @staticmethod
-    def _verify_page(soup: BeautifulSoup, url: str, root_page_allowed: bool) -> BeautifulSoup:
+    def _verify_page(soup: IliasSoup, url: str, root_page_allowed: bool) -> IliasSoup:
         if IliasPage.is_root_page(soup) and not root_page_allowed:
             raise CrawlError(
                 "Unexpectedly encountered ILIAS root page. "
@@ -967,29 +989,19 @@ instance's greatest bottleneck.
             )
         return soup
 
-    async def _post_authenticated(
+    async def _post(
         self,
         url: str,
         data: dict[str, Union[str, List[str]]]
     ) -> bytes:
-        auth_id = await self._current_auth_id()
-
         form_data = aiohttp.FormData()
         for key, val in data.items():
             form_data.add_field(key, val)
 
-        async with self.session.post(url, data=form_data(), allow_redirects=False) as request:
+        async with self.session.post(url, data=form_data()) as request:
             if request.status == 200:
                 return await request.read()
-
-        # We weren't authenticated, so try to do that
-        await self.authenticate(auth_id)
-
-        # Retry once after authenticating. If this fails, we will die.
-        async with self.session.post(url, data=data, allow_redirects=False) as request:
-            if request.status == 200:
-                return await request.read()
-        raise CrawlError("post_authenticated failed even after authenticating")
+            raise CrawlError(f"post failed with status {request.status}")
 
     async def _get_authenticated(self, url: str) -> bytes:
         auth_id = await self._current_auth_id()
@@ -1037,34 +1049,6 @@ instance's greatest bottleneck.
 
             # do the actual login
             async with self.session.post(urljoin(self._base_url, login_url), data=login_data) as request:
-                soup = soupify(await request.read())
-                if not self._is_logged_in(soup):
+                soup = IliasSoup(soupify(await request.read()), str(request.url))
+                if not IliasPage.is_logged_in(soup):
                     self._auth.invalidate_credentials()
-
-    @staticmethod
-    def _is_logged_in(soup: BeautifulSoup) -> bool:
-        # Normal ILIAS pages
-        mainbar = cast(Optional[Tag], soup.find(class_="il-maincontrols-metabar"))
-        if mainbar is not None:
-            login_button = mainbar.find(attrs={"href": lambda x: x is not None and "login.php" in x})
-            shib_login = soup.find(id="button_shib_login")
-            return not login_button and not shib_login
-
-        # Personal Desktop
-        if soup.find("a", attrs={"href": lambda x: x is not None and "block_type=pditems" in x}):
-            return True
-
-        # Video listing embeds do not have complete ILIAS html. Try to match them by
-        # their video listing table
-        video_table = soup.find(
-            recursive=True,
-            name="table",
-            attrs={"id": lambda x: x is not None and x.startswith("tbl_xoct")}
-        )
-        if video_table is not None:
-            return True
-        # The individual video player wrapper page has nothing of the above.
-        # Match it by its playerContainer.
-        if soup.select_one("#playerContainer") is not None:
-            return True
-        return False
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 963ab05..5ea17d6 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -3,20 +3,100 @@ import re
 from dataclasses import dataclass
 from datetime import date, datetime, timedelta
 from enum import Enum
-from typing import Dict, Optional, Union, cast
+from typing import Callable, Dict, Optional, Union, cast
 from urllib.parse import urljoin, urlparse
 
 from bs4 import BeautifulSoup, Tag
 
+from PFERD.crawl import CrawlError
+from PFERD.crawl.crawler import CrawlWarning
 from PFERD.logging import log
 from PFERD.utils import url_set_query_params
 
 TargetType = Union[str, int]
 
 
+class TypeMatcher:
+    class UrlPath:
+        path: str
+
+        def __init__(self, path: str):
+            self.path = path
+
+    class UrlParameter:
+        query: str
+
+        def __init__(self, query: str):
+            self.query = query
+
+    class ImgSrc:
+        src: str
+
+        def __init__(self, src: str):
+            self.src = src
+
+    class ImgAlt:
+        alt: str
+
+        def __init__(self, alt: str):
+            self.alt = alt
+
+    class All:
+        matchers: list['IliasElementMatcher']
+
+        def __init__(self, matchers: list['IliasElementMatcher']):
+            self.matchers = matchers
+
+    class Any:
+        matchers: list['IliasElementMatcher']
+
+        def __init__(self, matchers: list['IliasElementMatcher']):
+            self.matchers = matchers
+
+    @staticmethod
+    def path(path: str) -> UrlPath:
+        return TypeMatcher.UrlPath(path)
+
+    @staticmethod
+    def query(query: str) -> UrlParameter:
+        return TypeMatcher.UrlParameter(query)
+
+    @staticmethod
+    def img_src(src: str) -> ImgSrc:
+        return TypeMatcher.ImgSrc(src)
+
+    @staticmethod
+    def img_alt(alt: str) -> ImgAlt:
+        return TypeMatcher.ImgAlt(alt)
+
+    @staticmethod
+    def all(*matchers: 'IliasElementMatcher') -> All:
+        return TypeMatcher.All(list(matchers))
+
+    @staticmethod
+    def any(*matchers: 'IliasElementMatcher') -> Any:
+        return TypeMatcher.Any(list(matchers))
+
+    @staticmethod
+    def never() -> Any:
+        return TypeMatcher.Any([])
+
+
+IliasElementMatcher = (
+    TypeMatcher.UrlPath
+    | TypeMatcher.UrlParameter
+    | TypeMatcher.ImgSrc
+    | TypeMatcher.ImgAlt
+    | TypeMatcher.All
+    | TypeMatcher.Any
+)
+
+
 class IliasElementType(Enum):
+    BLOG = "blog"
     BOOKING = "booking"
     COURSE = "course"
+    DCL_RECORD_LIST = "dcl_record_list"
     EXERCISE = "exercise"
     EXERCISE_FILES = "exercise_files"  # own submitted files
     FILE = "file"
@@ -25,7 +105,10 @@ class IliasElementType(Enum):
     FORUM_THREAD = "forum_thread"
     INFO_TAB = "info_tab"
     LEARNING_MODULE = "learning_module"
+    LEARNING_MODULE_HTML = "learning_module_html"
+    LITERATURE_LIST = "literature_list"
     LINK = "link"
+    MEDIA_POOL = "media_pool"
     MEDIACAST_VIDEO = "mediacast_video"
     MEDIACAST_VIDEO_FOLDER = "mediacast_video_folder"
     MEETING = "meeting"
@@ -38,6 +121,131 @@ class IliasElementType(Enum):
     SURVEY = "survey"
     TEST = "test"  # an online test. Will be ignored currently.
 
+    def matcher(self) -> IliasElementMatcher:
+        match self:
+            case IliasElementType.BLOG:
+                return TypeMatcher.any(
+                    TypeMatcher.img_src("_blog.svg")
+                )
+            case IliasElementType.BOOKING:
+                return TypeMatcher.any(
+                    TypeMatcher.path("/book/"),
+                    TypeMatcher.img_src("_book.svg")
+                )
+            case IliasElementType.COURSE:
+                return TypeMatcher.any(TypeMatcher.path("/crs/"), TypeMatcher.img_src("_crsr.svg"))
+            case IliasElementType.DCL_RECORD_LIST:
+                return TypeMatcher.any(
+                    TypeMatcher.img_src("_dcl.svg"),
+                    TypeMatcher.query("cmdclass=ildclrecordlistgui")
+                )
+            case IliasElementType.EXERCISE:
+                return TypeMatcher.any(
+                    TypeMatcher.path("/exc/"),
+                    TypeMatcher.path("_exc_"),
+                    TypeMatcher.img_src("_exc.svg"),
+                )
+            case IliasElementType.EXERCISE_FILES:
+                return TypeMatcher.never()
+            case IliasElementType.FILE:
+                return TypeMatcher.any(
+                    TypeMatcher.query("cmd=sendfile"),
+                    TypeMatcher.path("_file_"),
+                    TypeMatcher.img_src("/filedelivery/"),
+                )
+            case IliasElementType.FOLDER:
+                return TypeMatcher.any(
+                    TypeMatcher.path("/fold/"),
+                    TypeMatcher.img_src("_fold.svg"),
+
+                    TypeMatcher.path("/grp/"),
+                    TypeMatcher.img_src("_grp.svg"),
+
+                    TypeMatcher.path("/copa/"),
+                    TypeMatcher.path("_copa_"),
+                    TypeMatcher.img_src("_copa.svg"),
+
+                    # Not supported right now but warn users
+                    # TypeMatcher.query("baseclass=ilmediapoolpresentationgui"),
+                    # TypeMatcher.img_alt("medienpool"),
+                    # TypeMatcher.img_src("_mep.svg"),
+                )
+            case IliasElementType.FORUM:
+                return TypeMatcher.any(
+                    TypeMatcher.path("/frm/"),
+                    TypeMatcher.path("_frm_"),
+                    TypeMatcher.img_src("_frm.svg"),
+                )
+            case IliasElementType.FORUM_THREAD:
+                return TypeMatcher.never()
+            case IliasElementType.INFO_TAB:
+                return TypeMatcher.never()
+            case IliasElementType.LITERATURE_LIST:
+                return TypeMatcher.img_src("_bibl.svg")
+            case IliasElementType.LEARNING_MODULE:
+                return TypeMatcher.any(
+                    TypeMatcher.path("/lm/"),
+                    TypeMatcher.img_src("_lm.svg")
+                )
+            case IliasElementType.LEARNING_MODULE_HTML:
+                return TypeMatcher.any(
+                    TypeMatcher.query("baseclass=ilhtlmpresentationgui"),
+                    TypeMatcher.img_src("_htlm.svg")
+                )
+            case IliasElementType.LINK:
+                return TypeMatcher.any(
+                    TypeMatcher.all(
+                        TypeMatcher.query("baseclass=illinkresourcehandlergui"),
+                        TypeMatcher.query("calldirectlink"),
+                    ),
+                    TypeMatcher.img_src("_webr.svg")
+                )
+            case IliasElementType.MEDIA_POOL:
+                return TypeMatcher.any(
+                    TypeMatcher.query("baseclass=ilmediapoolpresentationgui"),
+                    TypeMatcher.img_src("_mep.svg")
+                )
+            case IliasElementType.MEDIACAST_VIDEO:
+                return TypeMatcher.never()
+            case IliasElementType.MEDIACAST_VIDEO_FOLDER:
+                return TypeMatcher.any(
+                    TypeMatcher.path("/mcst/"),
+                    TypeMatcher.query("baseclass=ilmediacasthandlergui"),
+                    TypeMatcher.img_src("_mcst.svg")
+                )
+            case IliasElementType.MEETING:
+                return TypeMatcher.any(
+                    TypeMatcher.img_src("_sess.svg")
+                )
+            case IliasElementType.MOB_VIDEO:
+                return TypeMatcher.never()
+            case IliasElementType.OPENCAST_VIDEO:
+                return TypeMatcher.never()
+            case IliasElementType.OPENCAST_VIDEO_FOLDER:
+                return TypeMatcher.never()
+            case IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED:
+                return TypeMatcher.img_alt("opencast")
+            case IliasElementType.OPENCAST_VIDEO_PLAYER:
+                return TypeMatcher.never()
+            case IliasElementType.SCORM_LEARNING_MODULE:
+                return TypeMatcher.any(
+                    TypeMatcher.query("baseclass=ilsahspresentationgui"),
+                    TypeMatcher.img_src("_sahs.svg")
+                )
+            case IliasElementType.SURVEY:
+                return TypeMatcher.any(
+                    TypeMatcher.path("/svy/"),
+                    TypeMatcher.img_src("svy.svg")
+                )
+            case IliasElementType.TEST:
+                return TypeMatcher.any(
+                    TypeMatcher.query("cmdclass=ilobjtestgui"),
+                    TypeMatcher.query("cmdclass=iltestscreengui"),
+                    TypeMatcher.img_src("_tst.svg")
+                )
+
+        raise CrawlWarning(f"Unknown matcher {self}")
+
 
 @dataclass
 class IliasPageElement:
@@ -50,11 +258,21 @@ class IliasPageElement:
     def id(self) -> str:
         regexes = [
             r"eid=(?P<id>[0-9a-z\-]+)",
-            r"file_(?P<id>\d+)",
-            r"copa_(?P<id>\d+)",
-            r"fold_(?P<id>\d+)",
-            r"frm_(?P<id>\d+)",
-            r"exc_(?P<id>\d+)",
+            r"book/(?P<id>\d+)",  # booking
+            r"cat/(?P<id>\d+)",
+            r"copa/(?P<id>\d+)",  # content page
+            r"crs/(?P<id>\d+)",  # course
+            r"exc/(?P<id>\d+)",  # exercise
+            r"file/(?P<id>\d+)",  # file
+            r"fold/(?P<id>\d+)",  # folder
+            r"frm/(?P<id>\d+)",  # forum
+            r"grp/(?P<id>\d+)",  # group
+            r"lm/(?P<id>\d+)",  # learning module
+            r"mcst/(?P<id>\d+)",  # mediacast
+            r"pg/(?P<id>(\d|_)+)",  # page?
+            r"svy/(?P<id>\d+)",  # survey
+            r"sess/(?P<id>\d+)",  # session
+            r"webr/(?P<id>\d+)",  # web referene (link)
             r"thr_pk=(?P<id>\d+)",  # forums
             r"ref_id=(?P<id>\d+)",
             r"target=[a-z]+_(?P<id>\d+)",
@@ -139,18 +357,28 @@ class IliasLearningModulePage:
     previous_url: Optional[str]
 
 
+class IliasSoup:
+    soup: BeautifulSoup
+    page_url: str
+
+    def __init__(self, soup: BeautifulSoup, page_url: str):
+        self.soup = soup
+        self.page_url = page_url
+
+
 class IliasPage:
 
-    def __init__(self, soup: BeautifulSoup, _page_url: str, source_element: Optional[IliasPageElement]):
-        self._soup = soup
-        self._page_url = _page_url
+    def __init__(self, ilias_soup: IliasSoup, source_element: Optional[IliasPageElement]):
+        self._ilias_soup = ilias_soup
+        self._soup = ilias_soup.soup
+        self._page_url = ilias_soup.page_url
         self._page_type = source_element.type if source_element else None
         self._source_name = source_element.name if source_element else ""
 
     @staticmethod
-    def is_root_page(soup: BeautifulSoup) -> bool:
+    def is_root_page(soup: IliasSoup) -> bool:
         if permalink := IliasPage.get_soup_permalink(soup):
-            return "goto.php?target=root_" in permalink
+            return "goto.php/root/" in permalink
         return False
 
     def get_child_elements(self) -> list[IliasPageElement]:
@@ -193,7 +421,10 @@ class IliasPage:
 
     def get_description(self) -> Optional[BeautifulSoup]:
         def is_interesting_class(name: str) -> bool:
-            return name in ["ilCOPageSection", "ilc_Paragraph", "ilc_va_ihcap_VAccordIHeadCap"]
+            return name in [
+                "ilCOPageSection", "ilc_Paragraph", "ilc_va_ihcap_VAccordIHeadCap",
+                "ilc_va_ihcap_AccordIHeadCap", "ilc_media_cont_MediaContainer"
+            ]
 
         paragraphs: list[Tag] = cast(list[Tag], self._soup.find_all(class_=is_interesting_class))
         if not paragraphs:
@@ -206,6 +437,21 @@ class IliasPage:
         for p in paragraphs:
             if p.find_parent(class_=is_interesting_class):
                 continue
+            if "ilc_media_cont_MediaContainer" in p["class"]:
+                # We have an embedded video which should be downloaded by _find_mob_videos
+                if video := p.select_one("video"):
+                    url, title = self._find_mob_video_url_title(video, p)
+                    raw_html += '<div style="min-width: 100px; min-height: 100px; border: 1px solid black;'
+                    raw_html += 'display: flex; justify-content: center; align-items: center;'
+                    raw_html += ' margin: 0.5rem;">'
+                    if url is not None and urlparse(url).hostname != urlparse(self._page_url).hostname:
+                        if url.startswith("//"):
+                            url = "https:" + url
+                        raw_html += f'<a href="{url}" target="_blank">External Video: {title}</a>'
+                    else:
+                        raw_html += f"Video elided. Filename: '{title}'."
+                    raw_html += "</div>\n"
+                    continue
 
             # Ignore special listings (like folder groupings)
             if "ilc_section_Special" in p["class"]:
@@ -244,79 +490,31 @@ class IliasPage:
             return url
         return None
 
-    def get_forum_entries(self) -> list[IliasPageElement]:
-        form = self._get_forum_form()
-        if not form:
-            return []
-        threads = []
-
-        for row in cast(list[Tag], form.select("table > tbody > tr")):
-            url_tag = cast(
-                Optional[Tag],
-                row.find(name="a", attrs={"href": lambda x: x is not None and "cmd=viewthread" in x.lower()})
-            )
-            if url_tag is None:
-                log.explain(f"Skipping row without URL: {row}")
-                continue
-            name = url_tag.get_text().strip()
-            columns = [td.get_text().strip() for td in cast(list[Tag], row.find_all(name="td"))]
-            potential_dates_opt = [IliasPage._find_date_in_text(column) for column in columns]
-            potential_dates = [x for x in potential_dates_opt if x is not None]
-            mtime = max(potential_dates) if potential_dates else None
-
-            threads.append(IliasPageElement.create_new(
-                IliasElementType.FORUM_THREAD,
-                self._abs_url_from_link(url_tag),
-                name,
-                mtime=mtime
-            ))
-
-        return threads
-
-    def get_download_forum_data(self, thread_ids: list[str]) -> Optional[IliasDownloadForumData]:
-        form = cast(Optional[Tag], self._soup.find(
-            "form",
-            attrs={"action": lambda x: x is not None and "fallbackCmd=showThreads" in x}
-        ))
-        if not form:
+    def get_forum_export_url(self) -> Optional[str]:
+        forum_link = self._soup.select_one("#tab_forums_threads > a")
+        if not forum_link:
+            log.explain("Found no forum link")
             return None
-        post_url = self._abs_url_from_relative(cast(str, form["action"]))
 
-        log.explain(f"Fetching forum threads {thread_ids}")
+        base_url = self._abs_url_from_link(forum_link)
+        base_url = re.sub(r"cmd=\w+", "cmd=post", base_url)
+        base_url = re.sub(r"cmdClass=\w+", "cmdClass=ilExportGUI", base_url)
 
-        form_data: Dict[str, Union[str, list[str]]] = {
-            "thread_ids[]": cast(list[str], thread_ids),
-            "selected_cmd2": "html",
-            "select_cmd2": "Ausführen",
-            "selected_cmd": "",
-        }
+        rtoken_form = cast(
+            Optional[Tag],
+            self._soup.find("form", attrs={"action": lambda x: x is not None and "rtoken=" in x})
+        )
+        if not rtoken_form:
+            log.explain("Found no rtoken anywhere")
+            return None
+        match = cast(re.Match[str], re.search(r"rtoken=(\w+)", str(rtoken_form.attrs["action"])))
+        rtoken = match.group(1)
 
-        return IliasDownloadForumData(url=post_url, form_data=form_data, empty=len(thread_ids) == 0)
+        base_url = base_url + "&rtoken=" + rtoken
 
-    def _get_forum_form(self) -> Optional[Tag]:
-        return cast(Optional[Tag], self._soup.find(
-            "form",
-            attrs={"action": lambda x: x is not None and "fallbackCmd=showThreads" in x}
-        ))
+        return base_url
 
     def get_next_stage_element(self) -> Optional[IliasPageElement]:
-        if self._is_forum_page():
-            if "trows=" in self._page_url:
-                log.explain("Manual row override detected, accepting it as good")
-                return None
-            log.explain("Requesting *all* forum threads")
-            thread_count = self._get_forum_thread_count()
-            if thread_count is not None and thread_count > 400:
-                log.warn(
-                    "Forum has more than 400 threads, fetching all threads will take a while. "
-                    "You might need to adjust your http_timeout config option."
-                )
-
-            # Fetch at least 400 in case we detect it wrong
-            if thread_count is not None and thread_count < 400:
-                thread_count = 400
-
-            return self._get_show_max_forum_entries_per_page_url(thread_count)
         if self._is_ilias_opencast_embedding():
             log.explain("Unwrapping opencast embedding")
             return self.get_child_elements()[0]
@@ -334,11 +532,6 @@ class IliasPage:
                 log.explain("Crawling info tab, skipping content select")
         return None
 
-    def _is_forum_page(self) -> bool:
-        if perma_link := self.get_permalink():
-            return "target=frm_" in perma_link
-        return False
-
     def _is_video_player(self) -> bool:
         return "paella_config_file" in str(self._soup)
 
@@ -378,7 +571,7 @@ class IliasPage:
 
     def _is_content_page(self) -> bool:
         if link := self.get_permalink():
-            return "target=copa_" in link
+            return "/copa/" in link
         return False
 
     def _is_learning_module_page(self) -> bool:
@@ -513,19 +706,17 @@ class IliasPage:
                 # Configure button/link does not have anything interesting
                 continue
 
-            type = self._find_type_from_link(name, link, url)
-            if not type:
+            typ = IliasPage._find_type_for_element(
+                name, url, lambda: IliasPage._find_icon_for_folder_entry(link)
+            )
+            if not typ:
                 _unexpected_html_warning()
                 log.warn_contd(f"Could not extract type for {link}")
                 continue
 
-            log.explain(f"Found {name!r}")
+            log.explain(f"Found {name!r} of type {typ}")
 
-            if type == IliasElementType.FILE and "_download" not in url:
-                url = re.sub(r"(target=file_\d+)", r"\1_download", url)
-                log.explain("Rewired file URL to include download part")
-
-            items.append(IliasPageElement.create_new(type, url, name))
+            items.append(IliasPageElement.create_new(typ, url, name))
 
         return items
 
@@ -786,15 +977,17 @@ class IliasPage:
         for link in links:
             abs_url = self._abs_url_from_link(link)
             # Make sure parents are sanitized. We do not want accidental parents
-            parents = [_sanitize_path_name(x) for x in self._find_upwards_folder_hierarchy(link)]
+            parents = [_sanitize_path_name(x) for x in IliasPage._find_upwards_folder_hierarchy(link)]
 
             if parents:
                 element_name = "/".join(parents) + "/" + _sanitize_path_name(link.get_text())
             else:
                 element_name = _sanitize_path_name(link.get_text())
 
-            element_type = self._find_type_from_link(element_name, link, abs_url)
-            description = self._find_link_description(link)
+            element_type = IliasPage._find_type_for_element(
+                element_name, abs_url, lambda: IliasPage._find_icon_for_folder_entry(link)
+            )
+            description = IliasPage._find_link_description(link)
 
             # The last meeting on every page is expanded by default.
             # Its content is then shown inline *and* in the meeting page itself.
@@ -805,10 +998,10 @@ class IliasPage:
             if not element_type:
                 continue
             elif element_type == IliasElementType.FILE:
-                result.append(self._file_to_element(element_name, abs_url, link))
+                result.append(IliasPage._file_to_element(element_name, abs_url, link))
                 continue
 
-            log.explain(f"Found {element_name!r}")
+            log.explain(f"Found {element_name!r} of type {element_type}")
             result.append(IliasPageElement.create_new(
                 element_type,
                 abs_url,
@@ -826,71 +1019,92 @@ class IliasPage:
     def _find_mediacast_videos(self) -> list[IliasPageElement]:
         videos: list[IliasPageElement] = []
 
-        for elem in cast(list[Tag], self._soup.select(".ilPlayerPreviewOverlayOuter")):
-            element_name = _sanitize_path_name(
-                cast(Tag, elem.select_one(".ilPlayerPreviewDescription")).get_text().strip()
-            )
-            if not element_name.endswith(".mp4"):
-                # just to make sure it has some kinda-alrightish ending
-                element_name = element_name + ".mp4"
-            video_element = cast(Optional[Tag], elem.find(name="video"))
-            if not video_element:
-                _unexpected_html_warning()
-                log.warn_contd(f"No <video> element found for mediacast video '{element_name}'")
-                continue
+        regex = re.compile(r"il\.VideoPlaylist\.init.+?\[(.+?)], ")
+        for script in cast(list[Tag], self._soup.find_all("script")):
+            for match in regex.finditer(script.text):
+                try:
+                    playlist = json.loads("[" + match.group(1) + "]")
+                except json.JSONDecodeError:
+                    log.warn("Could not decode playlist json")
+                    log.warn_contd(f"Playlist json: [{match.group(1)}]")
+                    continue
+                for elem in playlist:
+                    title = elem.get("title", None)
+                    description = elem.get("description", None)
+                    url = elem.get("resource", None)
+                    if title is None or description is None or url is None:
+                        log.explain(f"Mediacast json: {match.group(1)}")
+                        log.warn("Mediacast video json was not complete")
+                    if title is None:
+                        log.warn_contd("Missing title")
+                    if description is None:
+                        log.warn_contd("Missing description")
+                    if url is None:
+                        log.warn_contd("Missing URL")
 
-            videos.append(IliasPageElement.create_new(
-                typ=IliasElementType.MEDIACAST_VIDEO,
-                url=self._abs_url_from_relative(cast(str, video_element.get("src"))),
-                name=element_name,
-                mtime=self._find_mediacast_video_mtime(cast(Tag, elem.find_parent(name="td")))
-            ))
+                    if not title.endswith(".mp4") and not title.endswith(".webm"):
+                        # just to make sure it has some kinda-alrightish ending
+                        title = title + ".mp4"
+                    videos.append(IliasPageElement.create_new(
+                        typ=IliasElementType.MEDIACAST_VIDEO,
+                        url=self._abs_url_from_relative(cast(str, url)),
+                        name=_sanitize_path_name(title)
+                    ))
 
         return videos
 
     def _find_mob_videos(self) -> list[IliasPageElement]:
         videos: list[IliasPageElement] = []
 
-        for figure in self._soup.select("figure.ilc_media_cont_MediaContainerHighlighted"):
-            title = cast(Tag, figure.select_one("figcaption")).get_text().strip() + ".mp4"
+        selector = "figure.ilc_media_cont_MediaContainerHighlighted,figure.ilc_media_cont_MediaContainer"
+        for figure in self._soup.select(selector):
             video_element = figure.select_one("video")
             if not video_element:
-                _unexpected_html_warning()
-                log.warn_contd(f"No <video> element found for mob video '{title}'")
                 continue
 
-            url = None
-            for source in video_element.select("source"):
-                if source.get("type", "") == "video/mp4":
-                    url = cast(Optional[str], source.get("src"))
-                    break
+            url, title = self._find_mob_video_url_title(video_element, figure)
 
             if url is None:
                 _unexpected_html_warning()
                 log.warn_contd(f"No <source> element found for mob video '{title}'")
                 continue
 
+            if urlparse(url).hostname != urlparse(self._page_url).hostname:
+                log.explain(f"Found external video at {url}, ignoring")
+                continue
+
             videos.append(IliasPageElement.create_new(
                 typ=IliasElementType.MOB_VIDEO,
-                url=self._abs_url_from_relative(url),
+                url=url,
                 name=_sanitize_path_name(title),
                 mtime=None
             ))
 
         return videos
 
-    def _find_mediacast_video_mtime(self, enclosing_td: Tag) -> Optional[datetime]:
-        description_td = cast(Tag, enclosing_td.find_previous_sibling("td"))
-        if not description_td:
-            return None
+    def _find_mob_video_url_title(self, video_element: Tag, figure: Tag) -> tuple[Optional[str], str]:
+        url = None
+        for source in video_element.select("source"):
+            if source.get("type", "") == "video/mp4":
+                url = cast(Optional[str], source.get("src"))
+                break
 
-        meta_tag = cast(Optional[Tag], description_td.find_all("p")[-1])
-        if not meta_tag:
-            return None
+        if url is None and video_element.get("src"):
+            url = cast(Optional[str], video_element.get("src"))
 
-        updated_str = meta_tag.get_text().strip().replace("\n", " ")
-        updated_str = re.sub(".+?: ", "", updated_str)
-        return demangle_date(updated_str)
+        fig_caption = cast(Optional[Tag], figure.select_one("figcaption"))
+        if fig_caption:
+            title = cast(Tag, figure.select_one("figcaption")).get_text().strip() + ".mp4"
+        elif url is not None:
+            path = urlparse(self._abs_url_from_relative(url)).path
+            title = path.rsplit("/", 1)[-1]
+        else:
+            title = f"unknown video {figure}"
+
+        if url:
+            url = self._abs_url_from_relative(url)
+
+        return url, title
 
     def _is_in_expanded_meeting(self, tag: Tag) -> bool:
         """
@@ -907,12 +1121,17 @@ class IliasPage:
             # We should not crawl files under meetings
             if "ilContainerListItemContentCB" in cast(str, parent.get("class")):
                 link: Tag = parent.parent.find("a")  # type: ignore
-                type = IliasPage._find_type_from_folder_like(link, self._page_url)
-                return type == IliasElementType.MEETING
+                typ = IliasPage._find_type_for_element(
+                    "meeting",
+                    self._abs_url_from_link(link),
+                    lambda: IliasPage._find_icon_for_folder_entry(link)
+                )
+                return typ == IliasElementType.MEETING
 
         return False
 
-    def _find_upwards_folder_hierarchy(self, tag: Tag) -> list[str]:
+    @staticmethod
+    def _find_upwards_folder_hierarchy(tag: Tag) -> list[str]:
         """
         Interprets accordions and expandable blocks as virtual folders and returns them
         in order. This allows us to find a file named "Test" in an accordion "Acc" as "Acc/Test"
@@ -953,13 +1172,16 @@ class IliasPage:
         if outer_accordion_content:
             accordion_tag = cast(Tag, outer_accordion_content.parent)
             head_tag = cast(Tag, accordion_tag.find(attrs={
-                "class": lambda x: x is not None and "ilc_va_ihead_VAccordIHead" in x
+                "class": lambda x: x is not None and (
+                    "ilc_va_ihead_VAccordIHead" in x or "ilc_va_ihead_AccordIHead" in x
+                )
             }))
             found_titles.append(head_tag.get_text().strip())
 
         return [_sanitize_path_name(x) for x in reversed(found_titles)]
 
-    def _find_link_description(self, link: Tag) -> Optional[str]:
+    @staticmethod
+    def _find_link_description(link: Tag) -> Optional[str]:
         tile = cast(
             Tag,
             link.find_parent("div", {"class": lambda x: x is not None and "il_ContainerListItem" in x})
@@ -974,7 +1196,8 @@ class IliasPage:
             return None
         return description_element.get_text().strip()
 
-    def _file_to_element(self, name: str, url: str, link_element: Tag) -> IliasPageElement:
+    @staticmethod
+    def _file_to_element(name: str, url: str, link_element: Tag) -> IliasPageElement:
         # Files have a list of properties (type, modification date, size, etc.)
         # In a series of divs.
         # Find the parent containing all those divs, so we can filter our what we need
@@ -1007,27 +1230,38 @@ class IliasPage:
         for title in card_titles:
             url = self._abs_url_from_link(title)
             name = _sanitize_path_name(title.get_text().strip())
-            type = self._find_type_from_card(title)
+            typ = IliasPage._find_type_for_element(
+                name, url, lambda: IliasPage._find_icon_from_card(title)
+            )
 
-            if not type:
+            if not typ:
                 _unexpected_html_warning()
                 log.warn_contd(f"Could not extract type for {title}")
                 continue
 
-            result.append(IliasPageElement.create_new(type, url, name))
+            result.append(IliasPageElement.create_new(typ, url, name))
 
         card_button_tiles: list[Tag] = self._soup.select(".card-title button")
 
         for button in card_button_tiles:
-            regex = re.compile(button["id"] + r".*window.open\(['\"](.+?)['\"]")  # type: ignore
-            res = regex.search(str(self._soup))
-            if not res:
+            signal_regex = re.compile("#" + str(button["id"]) + r"[\s\S]*?\.trigger\('(.+?)'")
+            signal_match = signal_regex.search(str(self._soup))
+            if not signal_match:
                 _unexpected_html_warning()
-                log.warn_contd(f"Could not find click handler target for {button}")
+                log.warn_contd(f"Could not find click handler signal for {button}")
                 continue
-            url = self._abs_url_from_relative(res.group(1))
+            signal = signal_match.group(1)
+            open_regex = re.compile(r"\.on\('" + signal + r"[\s\S]*?window.open\(['\"](.+?)['\"]")
+            open_match = open_regex.search(str(self._soup))
+            if not open_match:
+                _unexpected_html_warning()
+                log.warn_contd(f"Could not find click handler target for signal {signal} for {button}")
+                continue
+            url = self._abs_url_from_relative(open_match.group(1))
             name = _sanitize_path_name(button.get_text().strip())
-            type = self._find_type_from_card(button)
+            typ = IliasPage._find_type_for_element(
+                name, url, lambda: IliasPage._find_icon_from_card(button)
+            )
             caption_parent = cast(Tag, button.find_parent(
                 "div",
                 attrs={"class": lambda x: x is not None and "caption" in x},
@@ -1038,143 +1272,59 @@ class IliasPage:
             else:
                 description = None
 
-            if not type:
+            if not typ:
                 _unexpected_html_warning()
                 log.warn_contd(f"Could not extract type for {button}")
                 continue
 
-            result.append(IliasPageElement.create_new(type, url, name, description=description))
+            result.append(IliasPageElement.create_new(typ, url, name, description=description))
 
         return result
 
-    def _find_type_from_card(self, card_title: Tag) -> Optional[IliasElementType]:
-        def is_card_root(element: Tag) -> bool:
-            return "il-card" in element["class"] and "thumbnail" in element["class"]
-
-        card_root: Optional[Tag] = None
-
-        # We look for the card root
-        for parent in card_title.parents:
-            if is_card_root(parent):
-                card_root = parent
-                break
-
-        if card_root is None:
-            _unexpected_html_warning()
-            log.warn_contd(f"Tried to figure out element type, but did not find an icon for {card_title}")
-            return None
-
-        icon = cast(Tag, card_root.select_one(".il-card-repository-head .icon"))
-
-        if "opencast" in icon["class"] or "xoct" in icon["class"]:
-            return IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED
-        if "exc" in icon["class"]:
-            return IliasElementType.EXERCISE
-        if "grp" in icon["class"]:
-            return IliasElementType.FOLDER
-        if "webr" in icon["class"]:
-            return IliasElementType.LINK
-        if "book" in icon["class"]:
-            return IliasElementType.BOOKING
-        if "crsr" in icon["class"]:
-            return IliasElementType.COURSE
-        if "frm" in icon["class"]:
-            return IliasElementType.FORUM
-        if "sess" in icon["class"]:
-            return IliasElementType.MEETING
-        if "tst" in icon["class"]:
-            return IliasElementType.TEST
-        if "fold" in icon["class"]:
-            return IliasElementType.FOLDER
-        if "copa" in icon["class"]:
-            return IliasElementType.FOLDER
-        if "svy" in icon["class"]:
-            return IliasElementType.SURVEY
-        if "file" in icon["class"]:
-            return IliasElementType.FILE
-        if "mcst" in icon["class"]:
-            return IliasElementType.MEDIACAST_VIDEO_FOLDER
-
-        _unexpected_html_warning()
-        log.warn_contd(f"Could not extract type from {icon} for card title {card_title}")
-        return None
-
     @staticmethod
-    def _find_type_from_link(
+    def _find_type_for_element(
         element_name: str,
-        link_element: Tag,
-        url: str
+        url: str,
+        icon_for_element: Callable[[], Optional[Tag]],
     ) -> Optional[IliasElementType]:
         """
         Decides which sub crawler to use for a given top level element.
         """
         parsed_url = urlparse(url)
+        icon = icon_for_element()
 
-        # file URLs contain "target=file"
-        if "target=file_" in parsed_url.query:
-            return IliasElementType.FILE
+        def try_matcher(matcher: IliasElementMatcher) -> bool:
+            match matcher:
+                case TypeMatcher.All(matchers=ms):
+                    return all(try_matcher(m) for m in ms)
+                case TypeMatcher.Any(matchers=ms):
+                    return any(try_matcher(m) for m in ms)
+                case TypeMatcher.ImgAlt(alt=alt):
+                    return icon is not None and alt in str(icon["alt"]).lower()
+                case TypeMatcher.ImgSrc(src=src):
+                    return icon is not None and src in str(icon["src"]).lower()
+                case TypeMatcher.UrlPath(path=path):
+                    return path in parsed_url.path.lower()
+                case TypeMatcher.UrlParameter(query=query):
+                    return query in parsed_url.query.lower()
 
-        if "target=grp_" in parsed_url.query:
-            return IliasElementType.FOLDER
+            raise CrawlError(f"Unknown matcher {matcher}")
 
-        if "target=crs_" in parsed_url.query:
-            return IliasElementType.FOLDER
-
-        if "baseClass=ilExerciseHandlerGUI" in parsed_url.query:
-            return IliasElementType.EXERCISE
-
-        if "baseClass=ilLinkResourceHandlerGUI" in parsed_url.query and "calldirectlink" in parsed_url.query:
-            return IliasElementType.LINK
-
-        if "cmd=showThreads" in parsed_url.query or "target=frm_" in parsed_url.query:
-            return IliasElementType.FORUM
-
-        if "cmdClass=ilobjtestgui" in parsed_url.query:
-            return IliasElementType.TEST
-
-        if "baseClass=ilLMPresentationGUI" in parsed_url.query:
-            return IliasElementType.LEARNING_MODULE
-
-        if "baseClass=ilMediaCastHandlerGUI" in parsed_url.query:
-            return IliasElementType.MEDIACAST_VIDEO_FOLDER
-
-        if "baseClass=ilSAHSPresentationGUI" in parsed_url.query:
-            return IliasElementType.SCORM_LEARNING_MODULE
-
-        # other universities might have content type specified in URL path
-        if "_file_" in parsed_url.path:
-            return IliasElementType.FILE
-
-        if "_fold_" in parsed_url.path or "_copa_" in parsed_url.path:
-            return IliasElementType.FOLDER
-
-        if "_frm_" in parsed_url.path:
-            return IliasElementType.FORUM
-
-        if "_exc_" in parsed_url.path:
-            return IliasElementType.EXERCISE
-
-        # Booking and Meeting can not be detected based on the link. They do have a ref_id though, so
-        # try to guess it from the image.
-
-        # Everything with a ref_id can *probably* be opened to reveal nested things
-        # video groups, directories, exercises, etc
-        if "ref_id=" in parsed_url.query or "goto.php" in parsed_url.path:
-            return IliasPage._find_type_from_folder_like(link_element, url)
+        for typ in IliasElementType:
+            if try_matcher(typ.matcher()):
+                return typ
 
         _unexpected_html_warning()
-        log.warn_contd(
-            f"Tried to figure out element type, but failed for {element_name!r} / {link_element!r})"
-        )
+        log.warn_contd(f"Tried to figure out element type, but failed for {element_name!r} / {url!r})")
+
+        if "ref_id=" in parsed_url.query.lower() or "goto.php" in parsed_url.path.lower():
+            log.warn_contd("Defaulting to FOLDER as it contains a ref_id/goto")
+            return IliasElementType.FOLDER
+
         return None
 
     @staticmethod
-    def _find_type_from_folder_like(link_element: Tag, url: str) -> Optional[IliasElementType]:
-        """
-        Try crawling something that looks like a folder.
-        """
-        # pylint: disable=too-many-return-statements
-
+    def _find_icon_for_folder_entry(link_element: Tag) -> Optional[Tag]:
         found_parent: Optional[Tag] = None
 
         # We look for the outer div of our inner link, to find information around it
@@ -1186,7 +1336,9 @@ class IliasPage:
 
         if found_parent is None:
             _unexpected_html_warning()
-            log.warn_contd(f"Tried to figure out element type, but did not find an icon for {url}")
+            log.warn_contd(
+                f"Tried to figure out element type, but did not find an icon for {link_element!r}"
+            )
             return None
 
         # Find the small descriptive icon to figure out the type
@@ -1203,42 +1355,35 @@ class IliasPage:
             log.explain("Found session expansion button, skipping it as it has no content")
             return None
 
-        if img_tag is None:
-            _unexpected_html_warning()
-            log.warn_contd(f"Tried to figure out element type, but did not find an image for {url}")
-            return None
+        if img_tag is not None:
+            return img_tag
 
-        if "opencast" in str(img_tag["alt"]).lower():
-            return IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED
-
-        if str(img_tag["src"]).endswith("icon_exc.svg"):
-            return IliasElementType.EXERCISE
-
-        if str(img_tag["src"]).endswith("icon_webr.svg"):
-            return IliasElementType.LINK
-
-        if str(img_tag["src"]).endswith("icon_book.svg"):
-            return IliasElementType.BOOKING
-
-        if str(img_tag["src"]).endswith("frm.svg"):
-            return IliasElementType.FORUM
-
-        if str(img_tag["src"]).endswith("sess.svg"):
-            return IliasElementType.MEETING
-
-        if str(img_tag["src"]).endswith("icon_tst.svg"):
-            return IliasElementType.TEST
-
-        if str(img_tag["src"]).endswith("icon_mcst.svg"):
-            return IliasElementType.MEDIACAST_VIDEO_FOLDER
-
-        if str(img_tag["src"]).endswith("icon_sahs.svg"):
-            return IliasElementType.SCORM_LEARNING_MODULE
-
-        return IliasElementType.FOLDER
+        log.explain(f"Tried to figure out element type, but did not find an image for {link_element!r}")
+        return None
 
     @staticmethod
-    def is_logged_in(soup: BeautifulSoup) -> bool:
+    def _find_icon_from_card(card_title: Tag) -> Optional[Tag]:
+        def is_card_root(element: Tag) -> bool:
+            return "il-card" in element["class"] and "thumbnail" in element["class"]
+
+        card_root: Optional[Tag] = None
+
+        # We look for the card root
+        for parent in card_title.parents:
+            if is_card_root(parent):
+                card_root = parent
+                break
+
+        if card_root is None:
+            _unexpected_html_warning()
+            log.warn_contd(f"Tried to figure out element type, but did not find an icon for {card_title}")
+            return None
+
+        return cast(Tag, card_root.select_one(".il-card-repository-head .icon"))
+
+    @staticmethod
+    def is_logged_in(ilias_soup: IliasSoup) -> bool:
+        soup = ilias_soup.soup
         # Normal ILIAS pages
         mainbar = cast(Optional[Tag], soup.find(class_="il-maincontrols-metabar"))
         if mainbar is not None:
@@ -1285,7 +1430,7 @@ class IliasPage:
         return None
 
     def get_permalink(self) -> Optional[str]:
-        return IliasPage.get_soup_permalink(self._soup)
+        return IliasPage.get_soup_permalink(self._ilias_soup)
 
     def _abs_url_from_link(self, link_tag: Tag) -> str:
         """
@@ -1300,11 +1445,15 @@ class IliasPage:
         return urljoin(self._page_url, relative_url)
 
     @staticmethod
-    def get_soup_permalink(soup: BeautifulSoup) -> Optional[str]:
-        perma_link_element = cast(Tag, soup.select_one(".il-footer-permanent-url > a"))
-        if not perma_link_element or not perma_link_element.get("href"):
-            return None
-        return cast(Optional[str], perma_link_element.get("href"))
+    def get_soup_permalink(ilias_soup: IliasSoup) -> Optional[str]:
+        scripts = cast(list[Tag], ilias_soup.soup.find_all("script"))
+        pattern = re.compile(r"il\.Footer\.permalink\.copyText\(\"(.+?)\"\)")
+        for script in scripts:
+            if match := pattern.search(script.text):
+                url = match.group(1)
+                url = url.replace(r"\/", "/")
+                return url
+        return None
 
 
 def _unexpected_html_warning() -> None:

From b97b6fae6b3b1563609db393850b99261c34bc5b Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 14 Apr 2025 21:13:25 +0200
Subject: [PATCH 117/147] Update minimum Python version to 3.11

---
 .github/workflows/build-and-release.yml |  2 +-
 PFERD/crawl/crawler.py                  |  4 +---
 PFERD/logging.py                        |  9 ++++-----
 PFERD/report.py                         | 11 +----------
 README.md                               |  2 +-
 flake.lock                              |  8 ++++----
 flake.nix                               |  2 +-
 pyproject.toml                          |  2 +-
 8 files changed, 14 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/build-and-release.yml b/.github/workflows/build-and-release.yml
index 1f60c59..0117222 100644
--- a/.github/workflows/build-and-release.yml
+++ b/.github/workflows/build-and-release.yml
@@ -14,7 +14,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, windows-latest, macos-13, macos-latest]
-        python: ["3.9"]
+        python: ["3.11"]
     steps:
       - uses: actions/checkout@v4
 
diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index 74616e0..7ef5fe4 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -149,9 +149,7 @@ class CrawlerSection(Section):
         return self.s.getboolean("skip", fallback=False)
 
     def output_dir(self, name: str) -> Path:
-        # TODO Use removeprefix() after switching to 3.9
-        if name.startswith("crawl:"):
-            name = name[len("crawl:"):]
+        name = name.removeprefix("crawl:")
         return Path(self.s.get("output_dir", name)).expanduser()
 
     def redownload(self) -> Redownload:
diff --git a/PFERD/logging.py b/PFERD/logging.py
index b958fb2..c19e4a0 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -1,9 +1,8 @@
 import asyncio
 import sys
 import traceback
-from contextlib import asynccontextmanager, contextmanager
-# TODO In Python 3.9 and above, ContextManager is deprecated
-from typing import AsyncIterator, ContextManager, Iterator, List, Optional
+from contextlib import AbstractContextManager, asynccontextmanager, contextmanager
+from typing import AsyncIterator, Iterator, List, Optional
 
 from rich.console import Console, Group
 from rich.live import Live
@@ -261,7 +260,7 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
             action: str,
             text: str,
             total: Optional[float] = None,
-    ) -> ContextManager[ProgressBar]:
+    ) -> AbstractContextManager[ProgressBar]:
         """
         Allows markup in the "style" argument which will be applied to the
         "action" string.
@@ -277,7 +276,7 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
             action: str,
             text: str,
             total: Optional[float] = None,
-    ) -> ContextManager[ProgressBar]:
+    ) -> AbstractContextManager[ProgressBar]:
         """
         Allows markup in the "style" argument which will be applied to the
         "action" string.
diff --git a/PFERD/report.py b/PFERD/report.py
index 0eaaca9..72e2727 100644
--- a/PFERD/report.py
+++ b/PFERD/report.py
@@ -34,15 +34,6 @@ class MarkConflictError(Exception):
         self.collides_with = collides_with
 
 
-# TODO Use PurePath.is_relative_to when updating to 3.9
-def is_relative_to(a: PurePath, b: PurePath) -> bool:
-    try:
-        a.relative_to(b)
-        return True
-    except ValueError:
-        return False
-
-
 class Report:
     """
     A report of a synchronization. Includes all files found by the crawler, as
@@ -173,7 +164,7 @@ class Report:
             if path == other:
                 raise MarkDuplicateError(path)
 
-            if is_relative_to(path, other) or is_relative_to(other, path):
+            if path.is_relative_to(other) or other.is_relative_to(path):
                 raise MarkConflictError(path, other)
 
         self.known_files.add(path)
diff --git a/README.md b/README.md
index d5d7980..c96fea0 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@ Binaries for Linux, Windows and Mac can be downloaded directly from the
 
 ### With pip
 
-Ensure you have at least Python 3.9 installed. Run the following command to
+Ensure you have at least Python 3.11 installed. Run the following command to
 install PFERD or upgrade it to the latest version:
 
 ```
diff --git a/flake.lock b/flake.lock
index 6428667..d9326af 100644
--- a/flake.lock
+++ b/flake.lock
@@ -2,16 +2,16 @@
   "nodes": {
     "nixpkgs": {
       "locked": {
-        "lastModified": 1708979614,
-        "narHash": "sha256-FWLWmYojIg6TeqxSnHkKpHu5SGnFP5um1uUjH+wRV6g=",
+        "lastModified": 1744440957,
+        "narHash": "sha256-FHlSkNqFmPxPJvy+6fNLaNeWnF1lZSgqVCl/eWaJRc4=",
         "owner": "NixOS",
         "repo": "nixpkgs",
-        "rev": "b7ee09cf5614b02d289cd86fcfa6f24d4e078c2a",
+        "rev": "26d499fc9f1d567283d5d56fcf367edd815dba1d",
         "type": "github"
       },
       "original": {
         "owner": "NixOS",
-        "ref": "nixos-23.11",
+        "ref": "nixos-24.11",
         "repo": "nixpkgs",
         "type": "github"
       }
diff --git a/flake.nix b/flake.nix
index 4fc47b2..c8dbe0c 100644
--- a/flake.nix
+++ b/flake.nix
@@ -2,7 +2,7 @@
   description = "Tool for downloading course-related files from ILIAS";
 
   inputs = {
-    nixpkgs.url = "github:NixOS/nixpkgs/nixos-23.11";
+    nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11";
   };
 
   outputs = { self, nixpkgs }:
diff --git a/pyproject.toml b/pyproject.toml
index bc67e1c..e22fe85 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,7 +12,7 @@ dependencies = [
   "certifi>=2021.10.8"
 ]
 dynamic = ["version"]
-requires-python = ">=3.9"
+requires-python = ">=3.11"
 
 [project.scripts]
 pferd = "PFERD.__main__:main"

From 3f60638d335e4c65e4eda434f2d4f72731773066 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 16 Apr 2025 00:47:05 +0200
Subject: [PATCH 118/147] Bump version to 3.8.0

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0a26913..f3854f2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.8.0 - 2025-04-16
+
 ### Added
 - Support for ILIAS 9
 
diff --git a/PFERD/version.py b/PFERD/version.py
index 21118d3..77c0c6c 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.7.0"
+VERSION = "3.8.0"

From 653bf139f0055536e5c7c59fe138082d49be6ed3 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 16 Apr 2025 10:45:06 +0200
Subject: [PATCH 119/147] Fix encoding of descriptions and force images to
 light mode

---
 CHANGELOG.md                            |  4 ++++
 PFERD/crawl/ilias/ilias_html_cleaner.py | 11 +++++++----
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f3854f2..4dbd832 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,10 @@ ambiguous situations.
 
 ## Unreleased
 
+## Fixed
+- Description html files now specify at UTF-8 encoding
+- Images in descriptions now always have a white background
+
 ## 3.8.0 - 2025-04-16
 
 ### Added
diff --git a/PFERD/crawl/ilias/ilias_html_cleaner.py b/PFERD/crawl/ilias/ilias_html_cleaner.py
index fb35bc0..35a7ea0 100644
--- a/PFERD/crawl/ilias/ilias_html_cleaner.py
+++ b/PFERD/crawl/ilias/ilias_html_cleaner.py
@@ -39,6 +39,10 @@ _STYLE_TAG_CONTENT = """
       margin: 0.5rem 0;
     }
 
+    img {
+        background-color: white;
+    }
+
     body {
       padding: 1em;
       grid-template-columns: 1fr min(60rem, 90%) 1fr;
@@ -56,12 +60,11 @@ _ARTICLE_WORTHY_CLASSES = [
 def insert_base_markup(soup: BeautifulSoup) -> BeautifulSoup:
     head = soup.new_tag("head")
     soup.insert(0, head)
+    # Force UTF-8 encoding
+    head.append(soup.new_tag("meta", charset="utf-8"))
 
-    simplecss_link: Tag = soup.new_tag("link")
     # <link rel="stylesheet" href="https://cdn.simplecss.org/simple.css">
-    simplecss_link["rel"] = "stylesheet"
-    simplecss_link["href"] = "https://cdn.simplecss.org/simple.css"
-    head.append(simplecss_link)
+    head.append(soup.new_tag("link", rel="stylesheet", href="https://cdn.simplecss.org/simple.css"))
 
     # Basic style tags for compat
     style: Tag = soup.new_tag("style")

From 77fce7daf85101719ef4385ba583dd0aeff35a35 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 17 Apr 2025 11:22:35 +0200
Subject: [PATCH 120/147] Bump version to 3.8.1

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4dbd832..af5bcfb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.8.1 - 2025-04-17
+
 ## Fixed
 - Description html files now specify at UTF-8 encoding
 - Images in descriptions now always have a white background
diff --git a/PFERD/version.py b/PFERD/version.py
index 77c0c6c..d67e528 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.8.0"
+VERSION = "3.8.1"

From bdf17f5c870a51a8bfe7a2072ab17b6c1e66d11c Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 23 Apr 2025 16:03:37 +0200
Subject: [PATCH 121/147] Ignore wikis

---
 CHANGELOG.md                           | 3 +++
 PFERD/crawl/ilias/ilias_web_crawler.py | 8 ++++++++
 PFERD/crawl/ilias/kit_ilias_html.py    | 6 ++++++
 3 files changed, 17 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index af5bcfb..2bc00b6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+## Changed
+- Explicitly mention that wikis are not supported at the moment and ignore them
+
 ## 3.8.1 - 2025-04-17
 
 ## Fixed
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 52ecf92..8ba959a 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -424,6 +424,14 @@ instance's greatest bottleneck.
                 "[bright_black](not descending into linked course)"
             )
             return None
+        elif element.type == IliasElementType.WIKI:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](wikis are not currently supported)"
+            )
+            return None
         elif element.type == IliasElementType.LEARNING_MODULE:
             return await self._handle_learning_module(element, element_path)
         elif element.type == IliasElementType.LINK:
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 5ea17d6..6d3e487 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -120,6 +120,7 @@ class IliasElementType(Enum):
     SCORM_LEARNING_MODULE = "scorm_learning_module"
     SURVEY = "survey"
     TEST = "test"  # an online test. Will be ignored currently.
+    WIKI = "wiki"
 
     def matcher(self) -> IliasElementMatcher:
         match self:
@@ -243,6 +244,11 @@ class IliasElementType(Enum):
                     TypeMatcher.query("cmdclass=iltestscreengui"),
                     TypeMatcher.img_src("_tst.svg")
                 )
+            case IliasElementType.WIKI:
+                return TypeMatcher.any(
+                    TypeMatcher.query("baseClass=ilwikihandlergui"),
+                    TypeMatcher.img_src("wiki.svg")
+                )
 
         raise CrawlWarning(f"Unknown matcher {self}")
 

From b305e1ce2337399b233daf3c881e43308ce065f3 Mon Sep 17 00:00:00 2001
From: Nikolas Heise <nikolas.heise@uni-konstanz.de>
Date: Tue, 22 Apr 2025 13:30:32 +0200
Subject: [PATCH 122/147] Fix login using the native ilias login form

---
 CHANGELOG.md                           |  3 +++
 PFERD/crawl/ilias/ilias_web_crawler.py | 12 +++++-------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2bc00b6..0e8dc10 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,9 @@ ambiguous situations.
 ## Changed
 - Explicitly mention that wikis are not supported at the moment and ignore them
 
+## Fixed
+- Ilias-native login
+
 ## 3.8.1 - 2025-04-17
 
 ## Fixed
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 8ba959a..48396f9 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -1039,7 +1039,7 @@ instance's greatest bottleneck.
             async with self.session.get(urljoin(self._base_url, "/login.php"), params=params) as request:
                 login_page = soupify(await request.read())
 
-            login_form = cast(Optional[Tag], login_page.find("form", attrs={"name": "formlogin"}))
+            login_form = cast(Optional[Tag], login_page.find("form", attrs={"name": "login_form"}))
             if login_form is None:
                 raise CrawlError("Could not find the login form! Specified client id might be invalid.")
 
@@ -1049,14 +1049,12 @@ instance's greatest bottleneck.
 
             username, password = await self._auth.credentials()
 
-            login_data = {
-                "username": username,
-                "password": password,
-                "cmd[doStandardAuthentication]": "Login",
-            }
+            login_form_data = aiohttp.FormData()
+            login_form_data.add_field('login_form/input_3/input_4', username)
+            login_form_data.add_field('login_form/input_3/input_5', password)
 
             # do the actual login
-            async with self.session.post(urljoin(self._base_url, login_url), data=login_data) as request:
+            async with self.session.post(urljoin(self._base_url, login_url), data=login_form_data) as request:
                 soup = IliasSoup(soupify(await request.read()), str(request.url))
                 if not IliasPage.is_logged_in(soup):
                     self._auth.invalidate_credentials()

From 4c230ef6dd216e5fcadc17388e1c17d8a2ee4619 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Fri, 25 Apr 2025 13:45:57 +0200
Subject: [PATCH 123/147] Fix exercise crawling

---
 CHANGELOG.md                           |   1 +
 PFERD/crawl/ilias/ilias_web_crawler.py |   1 +
 PFERD/crawl/ilias/kit_ilias_html.py    | 158 +++++++++++++++----------
 3 files changed, 95 insertions(+), 65 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0e8dc10..e7273a0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,7 @@ ambiguous situations.
 
 ## Fixed
 - Ilias-native login
+- Exercise crawling
 
 ## 3.8.1 - 2025-04-17
 
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 48396f9..3b78e5d 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -107,6 +107,7 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
 _DIRECTORY_PAGES: Set[IliasElementType] = {
     IliasElementType.EXERCISE,
     IliasElementType.EXERCISE_FILES,
+    IliasElementType.EXERCISE_OVERVIEW,
     IliasElementType.FOLDER,
     IliasElementType.INFO_TAB,
     IliasElementType.MEDIACAST_VIDEO_FOLDER,
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 6d3e487..0a09ecc 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -97,7 +97,8 @@ class IliasElementType(Enum):
     BOOKING = "booking"
     COURSE = "course"
     DCL_RECORD_LIST = "dcl_record_list"
-    EXERCISE = "exercise"
+    EXERCISE_OVERVIEW = "exercise_overview"
+    EXERCISE = "exercise"  # own submitted files
     EXERCISE_FILES = "exercise_files"  # own submitted files
     FILE = "file"
     FOLDER = "folder"
@@ -141,13 +142,15 @@ class IliasElementType(Enum):
                     TypeMatcher.query("cmdclass=ildclrecordlistgui")
                 )
             case IliasElementType.EXERCISE:
+                return TypeMatcher.never()
+            case IliasElementType.EXERCISE_FILES:
+                return TypeMatcher.never()
+            case IliasElementType.EXERCISE_OVERVIEW:
                 return TypeMatcher.any(
                     TypeMatcher.path("/exc/"),
                     TypeMatcher.path("_exc_"),
                     TypeMatcher.img_src("_exc.svg"),
                 )
-            case IliasElementType.EXERCISE_FILES:
-                return TypeMatcher.never()
             case IliasElementType.FILE:
                 return TypeMatcher.any(
                     TypeMatcher.query("cmd=sendfile"),
@@ -530,6 +533,8 @@ class IliasPage:
         if self._contains_collapsed_future_meetings():
             log.explain("Requesting *all* future meetings")
             return self._uncollapse_future_meetings_url()
+        if self._is_exercise_not_all_shown():
+            return self._show_all_exercises()
         if not self._is_content_tab_selected():
             if self._page_type != IliasElementType.INFO_TAB:
                 log.explain("Selecting content tab")
@@ -561,7 +566,7 @@ class IliasPage:
 
     def _is_exercise_file(self) -> bool:
         # we know it from before
-        if self._page_type == IliasElementType.EXERCISE:
+        if self._page_type == IliasElementType.EXERCISE_OVERVIEW:
             return True
 
         # We have no suitable parent - let's guesss
@@ -598,6 +603,17 @@ class IliasPage:
         link = self._abs_url_from_link(element)
         return IliasPageElement.create_new(IliasElementType.FOLDER, link, "show all meetings")
 
+    def _is_exercise_not_all_shown(self) -> bool:
+        return (self._page_type == IliasElementType.EXERCISE_OVERVIEW
+                and "mode=all" not in self._page_url.lower())
+
+    def _show_all_exercises(self) -> Optional[IliasPageElement]:
+        return IliasPageElement.create_new(
+            IliasElementType.EXERCISE_OVERVIEW,
+            self._page_url + "&mode=all",
+            "show all exercises"
+        )
+
     def _is_content_tab_selected(self) -> bool:
         return self._select_content_page_url() is None
 
@@ -863,15 +879,62 @@ class IliasPage:
 
     def _find_exercise_entries(self) -> list[IliasPageElement]:
         if self._soup.find(id="tab_submission"):
-            log.explain("Found submission tab. This is an exercise detail page")
-            return self._find_exercise_entries_detail_page()
+            log.explain("Found submission tab. This is an exercise detail or files page")
+            if self._soup.select_one("#tab_submission.active") is None:
+                log.explain("  This is a details page")
+                return self._find_exercise_entries_detail_page()
+            else:
+                log.explain("  This is a files page")
+                return self._find_exercise_entries_files_page()
+
         log.explain("Found no submission tab. This is an exercise root page")
         return self._find_exercise_entries_root_page()
 
     def _find_exercise_entries_detail_page(self) -> list[IliasPageElement]:
         results: list[IliasPageElement] = []
 
-        # Find all download links in the container (this will contain all the files)
+        if link := cast(Optional[Tag], self._soup.select_one("#tab_submission > a")):
+            results.append(IliasPageElement.create_new(
+                IliasElementType.EXERCISE_FILES,
+                self._abs_url_from_link(link),
+                "Submission"
+            ))
+        else:
+            log.explain("Found no submission link for exercise, maybe it has not started yet?")
+
+        # Find all download links in the container (this will contain all the *feedback* files)
+        download_links = cast(list[Tag], self._soup.find_all(
+            name="a",
+            # download links contain the given command class
+            attrs={"href": lambda x: x is not None and "cmd=download" in x},
+            text="Download"
+        ))
+
+        for link in download_links:
+            parent_row: Tag = cast(Tag, link.find_parent(
+                attrs={"class": lambda x: x is not None and "row" in x}))
+            name_tag = cast(Optional[Tag], parent_row.find(name="div"))
+
+            if not name_tag:
+                log.warn("Could not find name tag for exercise entry")
+                _unexpected_html_warning()
+                continue
+
+            name = _sanitize_path_name(name_tag.get_text().strip())
+            log.explain(f"Found exercise detail entry {name!r}")
+
+            results.append(IliasPageElement.create_new(
+                IliasElementType.FILE,
+                self._abs_url_from_link(link),
+                name
+            ))
+
+        return results
+
+    def _find_exercise_entries_files_page(self) -> list[IliasPageElement]:
+        results: list[IliasPageElement] = []
+
+        # Find all download links in the container
         download_links = cast(list[Tag], self._soup.find_all(
             name="a",
             # download links contain the given command class
@@ -884,7 +947,7 @@ class IliasPage:
             children = cast(list[Tag], parent_row.find_all("td"))
 
             name = _sanitize_path_name(children[1].get_text().strip())
-            log.explain(f"Found exercise detail entry {name!r}")
+            log.explain(f"Found exercise file entry {name!r}")
 
             date = None
             for child in reversed(children):
@@ -892,7 +955,7 @@ class IliasPage:
                 if date is not None:
                     break
             if date is None:
-                log.warn(f"Date parsing failed for exercise entry {name!r}")
+                log.warn(f"Date parsing failed for exercise file entry {name!r}")
 
             results.append(IliasPageElement.create_new(
                 IliasElementType.FILE,
@@ -906,66 +969,31 @@ class IliasPage:
     def _find_exercise_entries_root_page(self) -> list[IliasPageElement]:
         results: list[IliasPageElement] = []
 
-        # Each assignment is in an accordion container
-        assignment_containers: list[Tag] = self._soup.select(".il_VAccordionInnerContainer")
+        content_tab = cast(Optional[Tag], self._soup.find(id="ilContentContainer"))
+        if not content_tab:
+            log.warn("Could not find content tab in exercise overview page")
+            _unexpected_html_warning()
+            return []
 
-        for container in assignment_containers:
-            # Fetch the container name out of the header to use it in the path
-            container_name = cast(Tag, container.select_one(".ilAssignmentHeader")).get_text().strip()
-            log.explain(f"Found exercise container {container_name!r}")
+        individual_exercises = content_tab.find_all(
+            name="a",
+            attrs={
+                "href": lambda x: x is not None
+                and "ass_id=" in x
+                and "cmdClass=ilAssignmentPresentationGUI" in x
+            }
+        )
 
-            # Find all download links in the container (this will contain all the files)
-            files = cast(list[Tag], container.find_all(
-                name="a",
-                # download links contain the given command class
-                attrs={"href": lambda x: x is not None and "cmdClass=ilexsubmissiongui" in x},
-                text="Download"
+        for exercise in cast(list[Tag], individual_exercises):
+            name = _sanitize_path_name(exercise.get_text().strip())
+            results.append(IliasPageElement.create_new(
+                IliasElementType.EXERCISE,
+                self._abs_url_from_link(exercise),
+                name
             ))
 
-            # Grab each file as you now have the link
-            for file_link in files:
-                # Two divs, side by side. Left is the name, right is the link ==> get left
-                # sibling
-                file_name = cast(
-                    Tag,
-                    cast(Tag, file_link.parent).find_previous(name="div")
-                ).get_text().strip()
-                url = self._abs_url_from_link(file_link)
-
-                log.explain(f"Found exercise entry {file_name!r}")
-                results.append(IliasPageElement.create_new(
-                    IliasElementType.FILE,
-                    url,
-                    _sanitize_path_name(container_name) + "/" + _sanitize_path_name(file_name),
-                    mtime=None,  # We do not have any timestamp
-                    skip_sanitize=True
-                ))
-
-            # Find all links to file listings (e.g. "Submitted Files" for groups)
-            file_listings = cast(list[Tag], container.find_all(
-                name="a",
-                # download links contain the given command class
-                attrs={"href": lambda x: x is not None and "cmdclass=ilexsubmissionfilegui" in x.lower()}
-            ))
-
-            # Add each listing as a new
-            for listing in file_listings:
-                parent_container = cast(Tag, listing.find_parent(
-                    "div", attrs={"class": lambda x: x is not None and "form-group" in x}
-                ))
-                label_container = cast(Tag, parent_container.find(
-                    attrs={"class": lambda x: x is not None and "control-label" in x}
-                ))
-                file_name = label_container.get_text().strip()
-                url = self._abs_url_from_link(listing)
-                log.explain(f"Found exercise detail {file_name!r} at {url}")
-                results.append(IliasPageElement.create_new(
-                    IliasElementType.EXERCISE_FILES,
-                    url,
-                    _sanitize_path_name(container_name) + "/" + _sanitize_path_name(file_name),
-                    None,  # we do not have any timestamp
-                    skip_sanitize=True
-                ))
+        for result in results:
+            log.explain(f"Found exercise {result.name!r}")
 
         return results
 

From 77a23265a9c7433012fab4d893ae96ed294207f4 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 29 Apr 2025 17:55:57 +0200
Subject: [PATCH 124/147] Bump version to 3.8.2

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e7273a0..20a39b0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.8.2 - 2025-04-29
+
 ## Changed
 - Explicitly mention that wikis are not supported at the moment and ignore them
 
diff --git a/PFERD/version.py b/PFERD/version.py
index d67e528..12c568a 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.8.1"
+VERSION = "3.8.2"

From 8caad0008d049d5676a6c40f2e77110e106a6291 Mon Sep 17 00:00:00 2001
From: PinieP <59698589+PinieP@users.noreply.github.com>
Date: Mon, 5 May 2025 22:05:54 +0200
Subject: [PATCH 125/147] Fix check for nonexistent `ilias_url` command
 attribute to `base_url` (#113)

---
 PFERD/cli/command_ilias_web.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/PFERD/cli/command_ilias_web.py b/PFERD/cli/command_ilias_web.py
index 77a1657..5efec20 100644
--- a/PFERD/cli/command_ilias_web.py
+++ b/PFERD/cli/command_ilias_web.py
@@ -45,8 +45,8 @@ def load(
     load_crawler(args, section)
 
     section["type"] = COMMAND_NAME
-    if args.ilias_url is not None:
-        section["base_url"] = args.ilias_url
+    if args.base_url is not None:
+        section["base_url"] = args.base_url
     if args.client_id is not None:
         section["client_id"] = args.client_id
 

From 2b0d20a1f626292f310ffa21dc7a2683ae6b9066 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 29 Apr 2025 18:30:33 +0200
Subject: [PATCH 126/147] Fix crawling of exercises with instructions

We do not want a second path and the instruction field has an identical
link...
---
 CHANGELOG.md                        |  3 +++
 PFERD/crawl/ilias/kit_ilias_html.py | 19 ++++++++++---------
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 20a39b0..de29b58 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+## Fixed
+- Crawling of exercises with instructions
+
 ## 3.8.2 - 2025-04-29
 
 ## Changed
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 0a09ecc..105c606 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -975,16 +975,17 @@ class IliasPage:
             _unexpected_html_warning()
             return []
 
-        individual_exercises = content_tab.find_all(
-            name="a",
-            attrs={
-                "href": lambda x: x is not None
-                and "ass_id=" in x
-                and "cmdClass=ilAssignmentPresentationGUI" in x
-            }
-        )
+        exercise_links = content_tab.select(".il-item-title a")
+
+        for exercise in cast(list[Tag], exercise_links):
+            if "href" not in exercise.attrs:
+                continue
+            href = exercise.attrs["href"]
+            if type(href) is not str:
+                continue
+            if "ass_id=" not in href or "cmdclass=ilassignmentpresentationgui" not in href.lower():
+                continue
 
-        for exercise in cast(list[Tag], individual_exercises):
             name = _sanitize_path_name(exercise.get_text().strip())
             results.append(IliasPageElement.create_new(
                 IliasElementType.EXERCISE,

From 34564cedb44f4712656a2e48ae3b8fd0a8837c41 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 26 May 2025 15:59:25 +0200
Subject: [PATCH 127/147] Add support for link collections

---
 CHANGELOG.md                           |   6 ++
 PFERD/crawl/ilias/file_templates.py    |  95 ++++++++++++++---
 PFERD/crawl/ilias/ilias_web_crawler.py | 142 +++++++++++++++----------
 PFERD/crawl/ilias/kit_ilias_html.py    |   8 +-
 4 files changed, 180 insertions(+), 71 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index de29b58..f9bf6d0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,12 @@ ambiguous situations.
 
 ## Unreleased
 
+## Added
+- Support for link collections.  
+  In "fancy" mode, a single HTML file with multiple links is generated.
+  In all other modes, PFERD creates a folder for the collection and a new file
+  for every link inside.
+
 ## Fixed
 - Crawling of exercises with instructions
 
diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index ae8bb1e..f959917 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -1,3 +1,5 @@
+import dataclasses
+import re
 from enum import Enum
 from typing import Optional, cast
 
@@ -12,7 +14,9 @@ _link_template_fancy = """
     <head>
         <meta charset="UTF-8">
         <title>ILIAS - Link: {{name}}</title>
+        <!-- REPEAT REMOVE START -->
         <meta http-equiv = "refresh" content = "{{redirect_delay}}; url = {{link}}" />
+        <!-- REPEAT REMOVE END -->
     </head>
 
     <style>
@@ -23,6 +27,8 @@ _link_template_fancy = """
         display: flex;
         align-items: center;
         justify-content: center;
+        flex-direction: column;
+        gap: 4px;
     }
     body {
         padding: 0;
@@ -31,11 +37,16 @@ _link_template_fancy = """
         font-family: "Open Sans", Verdana, Arial, Helvetica, sans-serif;
         height: 100vh;
     }
-    .row {
-        background-color: white;
+    .column {
         min-width: 500px;
         max-width: 90vw;
         display: flex;
+        flex-direction: column;
+        row-gap: 5px;
+    }
+    .row {
+        background-color: white;
+        display: flex;
         padding: 1em;
     }
     .logo {
@@ -75,19 +86,23 @@ _link_template_fancy = """
     }
     </style>
     <body class="center-flex">
-        <div class="row">
-            <div class="logo center-flex">
-                <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24">
-                    <path d="M12 0c-6.627 0-12 5.373-12 12s5.373 12 12 12 12-5.373 12-12-5.373-12-12-12zm9.567 9.098c-.059-.058-.127-.108-.206-.138-.258-.101-1.35.603-1.515.256-.108-.231-.327.148-.578.008-.121-.067-.459-.52-.611-.465-.312.112.479.974.694 1.087.203-.154.86-.469 1.002-.039.271.812-.745 1.702-1.264 2.171-.775.702-.63-.454-1.159-.86-.277-.213-.274-.667-.555-.824-.125-.071-.7-.732-.694-.821l-.017.167c-.095.072-.297-.27-.319-.325 0 .298.485.772.646 1.011.273.409.42 1.005.756 1.339.179.18.866.923 1.045.908l.921-.437c.649.154-1.531 3.237-1.738 3.619-.171.321.139 1.112.114 1.49-.029.437-.374.579-.7.817-.35.255-.268.752-.562.934-.521.321-.897 1.366-1.639 1.361-.219-.001-1.151.364-1.273.007-.095-.258-.223-.455-.356-.71-.131-.25-.015-.51-.175-.731-.11-.154-.479-.502-.513-.684-.002-.157.118-.632.283-.715.231-.118.044-.462.016-.663-.048-.357-.27-.652-.535-.859-.393-.302-.189-.542-.098-.974 0-.206-.126-.476-.402-.396-.57.166-.396-.445-.812-.417-.299.021-.543.211-.821.295-.349.104-.707-.083-1.053-.126-1.421-.179-1.885-1.804-1.514-2.976.037-.192-.115-.547-.048-.696.159-.352.485-.752.768-1.021.16-.152.365-.113.553-.231.29-.182.294-.558.578-.789.404-.328.956-.321 1.482-.392.281-.037 1.35-.268 1.518-.06 0 .039.193.611-.019.578.438.023 1.061.756 1.476.585.213-.089.135-.744.573-.427.265.19 1.45.275 1.696.07.152-.125.236-.939.053-1.031.117.116-.618.125-.686.099-.122-.044-.235.115-.43.025.117.055-.651-.358-.22-.674-.181.132-.349-.037-.544.109-.135.109.062.181-.13.277-.305.155-.535-.53-.649-.607-.118-.077-1.024-.713-.777-.298l.797.793c-.04.026-.209-.289-.209-.059.053-.136.02.585-.105.35-.056-.09.091-.14.006-.271 0-.085-.23-.169-.275-.228-.126-.157-.462-.502-.644-.585-.05-.024-.771.088-.832.111-.071.099-.131.203-.181.314-.149.055-.29.127-.423.216l-.159.356c-.068.061-.772.294-.776.303.03-.076-.492-.172-.457-.324.038-.167.215-.687.169-.877-.048-.199 1.085.287 1.158-.238.029-.227.047-.492-.316-.531.069.008.702-.249.807-.364.148-.169.486-.447.731-.447.286 0 .225-.417.356-.622.133.053-.071.38.088.512-.01-.104.45.057.494.033.105-.056.691-.023.601-.299-.101-.28.052-.197.183-.255-.02.008.248-.458.363-.456-.104-.089-.398.112-.516.103-.308-.024-.177-.525-.061-.672.09-.116-.246-.258-.25-.036-.006.332-.314.633-.243 1.075.109.666-.743-.161-.816-.115-.283.172-.515-.216-.368-.449.149-.238.51-.226.659-.48.104-.179.227-.389.388-.524.541-.454.689-.091 1.229-.042.526.048.178.125.105.327-.07.192.289.261.413.1.071-.092.232-.326.301-.499.07-.175.578-.2.527-.365 2.72 1.148 4.827 3.465 5.694 6.318zm-11.113-3.779l.068-.087.073-.019c.042-.034.086-.118.151-.104.043.009.146.095.111.148-.037.054-.066-.049-.081.101-.018.169-.188.167-.313.222-.087.037-.175-.018-.09-.104l.088-.108-.007-.049zm.442.245c.046-.045.138-.008.151-.094.014-.084.078-.178-.008-.335-.022-.042.116-.082.051-.137l-.109.032s.155-.668.364-.366l-.089.103c.135.134.172.47.215.687.127.066.324.078.098.192.117-.02-.618.314-.715.178-.072-.083.317-.139.307-.173-.004-.011-.317-.02-.265-.087zm1.43-3.547l-.356.326c-.36.298-1.28.883-1.793.705-.524-.18-1.647.667-1.826.673-.067.003.002-.641.36-.689-.141.021.993-.575 1.185-.805.678-.146 1.381-.227 2.104-.227l.326.017zm-5.086 1.19c.07.082.278.092-.026.288-.183.11-.377.809-.548.809-.51.223-.542-.439-1.109.413-.078.115-.395.158-.644.236.685-.688 1.468-1.279 2.327-1.746zm-5.24 8.793c0-.541.055-1.068.139-1.586l.292.185c.113.135.113.719.169.911.139.482.484.751.748 1.19.155.261.414.923.332 1.197.109-.179 1.081.824 1.259 1.033.418.492.74 1.088.061 1.574-.219.158.334 1.14.049 1.382l-.365.094c-.225.138-.235.397-.166.631-1.562-1.765-2.518-4.076-2.518-6.611zm14.347-5.823c.083-.01-.107.167-.107.167.033.256.222.396.581.527.437.157.038.455-.213.385-.139-.039-.854-.255-.879.025 0 .167-.679.001-.573-.175.073-.119.05-.387.186-.562.193-.255.38-.116.386.032-.001.394.398-.373.619-.399z"/>
-                </svg>
-            </div>
-            <div class="tile">
-                <div class="top-row">
-                    <a href="{{link}}">{{name}}</a>
+        <div class="column">
+        <!-- REPEAT START -->
+            <div class="row">
+                <div class="logo center-flex">
+                    <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24">
+                        <path d="M12 0c-6.627 0-12 5.373-12 12s5.373 12 12 12 12-5.373 12-12-5.373-12-12-12zm9.567 9.098c-.059-.058-.127-.108-.206-.138-.258-.101-1.35.603-1.515.256-.108-.231-.327.148-.578.008-.121-.067-.459-.52-.611-.465-.312.112.479.974.694 1.087.203-.154.86-.469 1.002-.039.271.812-.745 1.702-1.264 2.171-.775.702-.63-.454-1.159-.86-.277-.213-.274-.667-.555-.824-.125-.071-.7-.732-.694-.821l-.017.167c-.095.072-.297-.27-.319-.325 0 .298.485.772.646 1.011.273.409.42 1.005.756 1.339.179.18.866.923 1.045.908l.921-.437c.649.154-1.531 3.237-1.738 3.619-.171.321.139 1.112.114 1.49-.029.437-.374.579-.7.817-.35.255-.268.752-.562.934-.521.321-.897 1.366-1.639 1.361-.219-.001-1.151.364-1.273.007-.095-.258-.223-.455-.356-.71-.131-.25-.015-.51-.175-.731-.11-.154-.479-.502-.513-.684-.002-.157.118-.632.283-.715.231-.118.044-.462.016-.663-.048-.357-.27-.652-.535-.859-.393-.302-.189-.542-.098-.974 0-.206-.126-.476-.402-.396-.57.166-.396-.445-.812-.417-.299.021-.543.211-.821.295-.349.104-.707-.083-1.053-.126-1.421-.179-1.885-1.804-1.514-2.976.037-.192-.115-.547-.048-.696.159-.352.485-.752.768-1.021.16-.152.365-.113.553-.231.29-.182.294-.558.578-.789.404-.328.956-.321 1.482-.392.281-.037 1.35-.268 1.518-.06 0 .039.193.611-.019.578.438.023 1.061.756 1.476.585.213-.089.135-.744.573-.427.265.19 1.45.275 1.696.07.152-.125.236-.939.053-1.031.117.116-.618.125-.686.099-.122-.044-.235.115-.43.025.117.055-.651-.358-.22-.674-.181.132-.349-.037-.544.109-.135.109.062.181-.13.277-.305.155-.535-.53-.649-.607-.118-.077-1.024-.713-.777-.298l.797.793c-.04.026-.209-.289-.209-.059.053-.136.02.585-.105.35-.056-.09.091-.14.006-.271 0-.085-.23-.169-.275-.228-.126-.157-.462-.502-.644-.585-.05-.024-.771.088-.832.111-.071.099-.131.203-.181.314-.149.055-.29.127-.423.216l-.159.356c-.068.061-.772.294-.776.303.03-.076-.492-.172-.457-.324.038-.167.215-.687.169-.877-.048-.199 1.085.287 1.158-.238.029-.227.047-.492-.316-.531.069.008.702-.249.807-.364.148-.169.486-.447.731-.447.286 0 .225-.417.356-.622.133.053-.071.38.088.512-.01-.104.45.057.494.033.105-.056.691-.023.601-.299-.101-.28.052-.197.183-.255-.02.008.248-.458.363-.456-.104-.089-.398.112-.516.103-.308-.024-.177-.525-.061-.672.09-.116-.246-.258-.25-.036-.006.332-.314.633-.243 1.075.109.666-.743-.161-.816-.115-.283.172-.515-.216-.368-.449.149-.238.51-.226.659-.48.104-.179.227-.389.388-.524.541-.454.689-.091 1.229-.042.526.048.178.125.105.327-.07.192.289.261.413.1.071-.092.232-.326.301-.499.07-.175.578-.2.527-.365 2.72 1.148 4.827 3.465 5.694 6.318zm-11.113-3.779l.068-.087.073-.019c.042-.034.086-.118.151-.104.043.009.146.095.111.148-.037.054-.066-.049-.081.101-.018.169-.188.167-.313.222-.087.037-.175-.018-.09-.104l.088-.108-.007-.049zm.442.245c.046-.045.138-.008.151-.094.014-.084.078-.178-.008-.335-.022-.042.116-.082.051-.137l-.109.032s.155-.668.364-.366l-.089.103c.135.134.172.47.215.687.127.066.324.078.098.192.117-.02-.618.314-.715.178-.072-.083.317-.139.307-.173-.004-.011-.317-.02-.265-.087zm1.43-3.547l-.356.326c-.36.298-1.28.883-1.793.705-.524-.18-1.647.667-1.826.673-.067.003.002-.641.36-.689-.141.021.993-.575 1.185-.805.678-.146 1.381-.227 2.104-.227l.326.017zm-5.086 1.19c.07.082.278.092-.026.288-.183.11-.377.809-.548.809-.51.223-.542-.439-1.109.413-.078.115-.395.158-.644.236.685-.688 1.468-1.279 2.327-1.746zm-5.24 8.793c0-.541.055-1.068.139-1.586l.292.185c.113.135.113.719.169.911.139.482.484.751.748 1.19.155.261.414.923.332 1.197.109-.179 1.081.824 1.259 1.033.418.492.74 1.088.061 1.574-.219.158.334 1.14.049 1.382l-.365.094c-.225.138-.235.397-.166.631-1.562-1.765-2.518-4.076-2.518-6.611zm14.347-5.823c.083-.01-.107.167-.107.167.033.256.222.396.581.527.437.157.038.455-.213.385-.139-.039-.854-.255-.879.025 0 .167-.679.001-.573-.175.073-.119.05-.387.186-.562.193-.255.38-.116.386.032-.001.394.398-.373.619-.399z"/>
+                    </svg>
                 </div>
-                <div class="bottom-row">{{description}}</div>
+                <div class="tile">
+                    <div class="top-row">
+                        <a href="{{link}}">{{name}}</a>
+                    </div>
+                    <div class="bottom-row">{{description}}</div>
+                </div>
+                <div class="menu-button center-flex"> ⯆ </div>
             </div>
-            <div class="menu-button center-flex"> ⯆ </div>
+        <!-- REPEAT END -->
         </div>
     </body>
 </html>
@@ -255,6 +270,13 @@ def forum_thread_template(name: str, url: str, heading: bs4.Tag, content: bs4.Ta
         .replace("{{content}}", cast(str, content.prettify()))
 
 
+@dataclasses.dataclass
+class LinkData:
+    name: str
+    url: str
+    description: str
+
+
 class Links(Enum):
     IGNORE = "ignore"
     PLAINTEXT = "plaintext"
@@ -272,6 +294,11 @@ class Links(Enum):
             return None
         raise ValueError("Missing switch case")
 
+    def collection_as_one(self) -> bool:
+        if self == Links.FANCY:
+            return True
+        return False
+
     def extension(self) -> Optional[str]:
         if self == Links.FANCY:
             return ".html"
@@ -283,10 +310,48 @@ class Links(Enum):
             return None
         raise ValueError("Missing switch case")
 
+    def interpolate(self, redirect_delay: int, collection_name: str, links: list[LinkData]) -> str:
+        template = self.template()
+        if template is None:
+            raise ValueError("Cannot interpolate ignored links")
+
+        if len(links) == 1:
+            link = links[0]
+            content = template
+            content = content.replace("{{link}}", link.url)
+            content = content.replace("{{name}}", link.name)
+            content = content.replace("{{description}}", link.description)
+            content = content.replace("{{redirect_delay}}", str(redirect_delay))
+            return content
+        if self == Links.PLAINTEXT or self == Links.INTERNET_SHORTCUT:
+            return "\n".join(f"{link.url}" for link in links)
+
+        # All others get coerced to fancy
+        content = cast(str, Links.FANCY.template())
+        repeated_content = cast(
+            re.Match[str],
+            re.search(r"<!-- REPEAT START -->([\s\S]+)<!-- REPEAT END -->", content)
+        ).group(1)
+
+        parts = []
+        for link in links:
+            instance = repeated_content
+            instance = instance.replace("{{link}}", link.url)
+            instance = instance.replace("{{name}}", link.name)
+            instance = instance.replace("{{description}}", link.description)
+            instance = instance.replace("{{redirect_delay}}", str(redirect_delay))
+            parts.append(instance)
+
+        content = content.replace(repeated_content, "\n".join(parts))
+        content = content.replace("{{name}}", collection_name)
+        content = re.sub(r"<!-- REPEAT REMOVE START -->[\s\S]+<!-- REPEAT REMOVE END -->", "", content)
+
+        return content
+
     @staticmethod
     def from_string(string: str) -> "Links":
         try:
             return Links(string)
         except ValueError:
-            raise ValueError("must be one of 'ignore', 'plaintext',"
-                             " 'html', 'internet-shortcut'")
+            options = [f"'{option.value}'" for option in Links]
+            raise ValueError(f"must be one of {', '.join(options)}")
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 3b78e5d..b682c0a 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -19,7 +19,7 @@ from ...utils import fmt_path, soupify, url_set_query_param
 from ..crawler import CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
 from ..http_crawler import HttpCrawler, HttpCrawlerSection
 from .async_helper import _iorepeat
-from .file_templates import Links, forum_thread_template, learning_module_template
+from .file_templates import LinkData, Links, forum_thread_template, learning_module_template
 from .ilias_html_cleaner import clean, insert_base_markup
 from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
                              IliasPageElement, IliasSoup, _sanitize_path_name, parse_ilias_forum_export)
@@ -437,6 +437,8 @@ instance's greatest bottleneck.
             return await self._handle_learning_module(element, element_path)
         elif element.type == IliasElementType.LINK:
             return await self._handle_link(element, element_path)
+        elif element.type == IliasElementType.LINK_COLLECTION:
+            return await self._handle_link(element, element_path)
         elif element.type == IliasElementType.BOOKING:
             return await self._handle_booking(element, element_path)
         elif element.type == IliasElementType.OPENCAST_VIDEO:
@@ -462,44 +464,97 @@ instance's greatest bottleneck.
         log.explain_topic(f"Decision: Crawl Link {fmt_path(element_path)}")
         log.explain(f"Links type is {self._links}")
 
-        link_template_maybe = self._links.template()
-        link_extension = self._links.extension()
-        if not link_template_maybe or not link_extension:
+        export_url = url_set_query_param(element.url, "cmd", "exportHTML")
+        resolved = await self._resolve_link_target(export_url)
+        if resolved == "none":
+            links = [LinkData(element.name, "", element.description or "")]
+        else:
+            links = self._parse_link_content(element, cast(BeautifulSoup, resolved))
+
+        maybe_extension = self._links.extension()
+
+        if not maybe_extension:
             log.explain("Answer: No")
             return None
         else:
             log.explain("Answer: Yes")
-        element_path = element_path.with_name(element_path.name + link_extension)
 
-        maybe_dl = await self.download(element_path, mtime=element.mtime)
-        if not maybe_dl:
+        if len(links) <= 1 or self._links.collection_as_one():
+            element_path = element_path.with_name(element_path.name + maybe_extension)
+            maybe_dl = await self.download(element_path, mtime=element.mtime)
+            if not maybe_dl:
+                return None
+            return self._download_link(self._links, element.name, links, maybe_dl)
+
+        maybe_cl = await self.crawl(element_path)
+        if not maybe_cl:
             return None
+        # Required for download_all closure
+        cl = maybe_cl
+        extension = maybe_extension
 
-        return self._download_link(element, link_template_maybe, maybe_dl)
+        async def download_all() -> None:
+            for link in links:
+                path = cl.path / (_sanitize_path_name(link.name) + extension)
+                if dl := await self.download(path, mtime=element.mtime):
+                    await self._download_link(self._links, element.name, [link], dl)
+
+        return download_all()
 
     @anoncritical
     @_iorepeat(3, "resolving link")
-    async def _download_link(self, element: IliasPageElement, link_template: str, dl: DownloadToken) -> None:
-        async with dl as (bar, sink):
-            export_url = element.url.replace("cmd=calldirectlink", "cmd=exportHTML")
-            real_url = await self._resolve_link_target(export_url)
-            self._write_link_content(link_template, real_url, element.name, element.description, sink)
-
-    def _write_link_content(
+    async def _download_link(
         self,
-        link_template: str,
-        url: str,
-        name: str,
-        description: Optional[str],
-        sink: FileSink,
+        link_renderer: Links,
+        collection_name: str,
+        links: list[LinkData],
+        dl: DownloadToken
     ) -> None:
-        content = link_template
-        content = content.replace("{{link}}", url)
-        content = content.replace("{{name}}", name)
-        content = content.replace("{{description}}", str(description))
-        content = content.replace("{{redirect_delay}}", str(self._link_file_redirect_delay))
-        sink.file.write(content.encode("utf-8"))
-        sink.done()
+        async with dl as (bar, sink):
+            rendered = link_renderer.interpolate(self._link_file_redirect_delay, collection_name, links)
+            sink.file.write(rendered.encode("utf-8"))
+            sink.done()
+
+    async def _resolve_link_target(self, export_url: str) -> Union[BeautifulSoup, Literal['none']]:
+        async def impl() -> Optional[Union[BeautifulSoup, Literal['none']]]:
+            async with self.session.get(export_url, allow_redirects=False) as resp:
+                # No redirect means we were authenticated
+                if hdrs.LOCATION not in resp.headers:
+                    return soupify(await resp.read())  # .select_one("a").get("href").strip()  # type: ignore
+                # We are either unauthenticated or the link is not active
+                new_url = resp.headers[hdrs.LOCATION].lower()
+                if "baseclass=illinkresourcehandlergui" in new_url and "cmd=infoscreen" in new_url:
+                    return "none"
+                return None
+
+        auth_id = await self._current_auth_id()
+        target = await impl()
+        if target is not None:
+            return target
+
+        await self.authenticate(auth_id)
+
+        target = await impl()
+        if target is not None:
+            return target
+
+        raise CrawlError("resolve_link_target failed even after authenticating")
+
+    @staticmethod
+    def _parse_link_content(element: IliasPageElement, content: BeautifulSoup) -> list[LinkData]:
+        links = cast(list[Tag], list(content.select("a")))
+        if len(links) == 1:
+            url = str(links[0].get("href")).strip()
+            return [LinkData(name=element.name, description=element.description or "", url=url)]
+
+        results = []
+        for link in links:
+            url = str(link.get("href")).strip()
+            name = link.get_text(strip=True)
+            description = cast(Tag, link.find_next_sibling("dd")).get_text(strip=True)
+            results.append(LinkData(name=name, description=description, url=url.strip()))
+
+        return results
 
     async def _handle_booking(
         self,
@@ -524,7 +579,7 @@ instance's greatest bottleneck.
 
         self._ensure_not_seen(element, element_path)
 
-        return self._download_booking(element, link_template_maybe, maybe_dl)
+        return self._download_booking(element, maybe_dl)
 
     @anoncritical
     @_iorepeat(1, "downloading description")
@@ -545,36 +600,13 @@ instance's greatest bottleneck.
     async def _download_booking(
         self,
         element: IliasPageElement,
-        link_template: str,
         dl: DownloadToken,
     ) -> None:
         async with dl as (bar, sink):
-            self._write_link_content(link_template, element.url, element.name, element.description, sink)
-
-    async def _resolve_link_target(self, export_url: str) -> str:
-        async def impl() -> Optional[str]:
-            async with self.session.get(export_url, allow_redirects=False) as resp:
-                # No redirect means we were authenticated
-                if hdrs.LOCATION not in resp.headers:
-                    return soupify(await resp.read()).select_one("a").get("href").strip()  # type: ignore
-                # We are either unauthenticated or the link is not active
-                new_url = resp.headers[hdrs.LOCATION].lower()
-                if "baseclass=illinkresourcehandlergui" in new_url and "cmd=infoscreen" in new_url:
-                    return ""
-                return None
-
-        auth_id = await self._current_auth_id()
-        target = await impl()
-        if target is not None:
-            return target
-
-        await self.authenticate(auth_id)
-
-        target = await impl()
-        if target is not None:
-            return target
-
-        raise CrawlError("resolve_link_target failed even after authenticating")
+            links = [LinkData(name=element.name, description=element.description or "", url=element.url)]
+            rendered = self._links.interpolate(self._link_file_redirect_delay, element.name, links)
+            sink.file.write(rendered.encode("utf-8"))
+            sink.done()
 
     async def _handle_opencast_video(
         self,
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 105c606..70ec3d7 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -109,6 +109,7 @@ class IliasElementType(Enum):
     LEARNING_MODULE_HTML = "learning_module_html"
     LITERATURE_LIST = "literature_list"
     LINK = "link"
+    LINK_COLLECTION = "link_collection"
     MEDIA_POOL = "media_pool"
     MEDIACAST_VIDEO = "mediacast_video"
     MEDIACAST_VIDEO_FOLDER = "mediacast_video_folder"
@@ -202,7 +203,12 @@ class IliasElementType(Enum):
                         TypeMatcher.query("baseclass=illinkresourcehandlergui"),
                         TypeMatcher.query("calldirectlink"),
                     ),
-                    TypeMatcher.img_src("_webr.svg")
+                    TypeMatcher.img_src("_webr.svg")  # duplicated :(
+                )
+            case IliasElementType.LINK_COLLECTION:
+                return TypeMatcher.any(
+                    TypeMatcher.query("baseclass=illinkresourcehandlergui"),
+                    TypeMatcher.img_src("_webr.svg")  # duplicated :(
                 )
             case IliasElementType.MEDIA_POOL:
                 return TypeMatcher.any(

From 549ce6cce911f298eb0ea16c6e00dca2880d7dc4 Mon Sep 17 00:00:00 2001
From: "Mr. Pine" <git@mr-pine.de>
Date: Wed, 28 May 2025 17:04:57 +0200
Subject: [PATCH 128/147] Ignore unavailable elements (#119)

---
 CHANGELOG.md                           | 3 +++
 PFERD/crawl/ilias/ilias_web_crawler.py | 9 +++++++++
 2 files changed, 12 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f9bf6d0..59cc6fe 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,6 +30,9 @@ ambiguous situations.
 
 ## Fixed
 - Crawling of exercises with instructions
+- Don't download unavailable elements.  
+  Elements that are unavailable (for example, because their availability is
+  time restricted) will not download the HTML for the info page anymore.
 
 ## 3.8.2 - 2025-04-29
 
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index b682c0a..2eb8e9c 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -329,6 +329,15 @@ instance's greatest bottleneck.
         # directory escape attacks.
         element_path = PurePath(parent_path, element.name)
 
+        # This is symptomatic of no access to the element, for example, because
+        # of time availability restrictions.
+        if "cmdClass=ilInfoScreenGUI" in element.url and "cmd=showSummary" in element.url:
+            log.explain(
+                "Skipping element as url points to info screen, "
+                "this should only happen with not-yet-released elements"
+            )
+            return None
+
         if element.type in _VIDEO_ELEMENTS:
             if not self._videos:
                 log.status(

From 56e30659504b1583a8ce165d517bb93933b3c9f6 Mon Sep 17 00:00:00 2001
From: Christian Schliz <github@foxat.de>
Date: Fri, 30 May 2025 17:13:45 +0200
Subject: [PATCH 129/147] Document usage of pilot.ilias.studium.kit.edu (#111)

---
 CONFIG.md | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/CONFIG.md b/CONFIG.md
index 9b79be8..201ddde 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -163,13 +163,14 @@ out of the box for the corresponding universities:
 
 [ilias-dl]: https://github.com/V3lop5/ilias-downloader/blob/main/configs "ilias-downloader configs"
 
-| University    | `base_url`                              | `login_type` | `client_id`   |
-|---------------|-----------------------------------------|--------------|---------------|
-| FH Aachen     | https://www.ili.fh-aachen.de            | local        | elearning     |
-| Uni Köln      | https://www.ilias.uni-koeln.de/ilias    | local        | uk            |
-| Uni Konstanz  | https://ilias.uni-konstanz.de           | local        | ILIASKONSTANZ |
-| Uni Stuttgart | https://ilias3.uni-stuttgart.de         | local        | Uni_Stuttgart |
-| Uni Tübingen  | https://ovidius.uni-tuebingen.de/ilias3 | shibboleth   |               |
+| University      | `base_url`                              | `login_type` | `client_id`   |
+|-----------------|-----------------------------------------|--------------|---------------|
+| FH Aachen       | https://www.ili.fh-aachen.de            | local        | elearning     |
+| Uni Köln        | https://www.ilias.uni-koeln.de/ilias    | local        | uk            |
+| Uni Konstanz    | https://ilias.uni-konstanz.de           | local        | ILIASKONSTANZ |
+| Uni Stuttgart   | https://ilias3.uni-stuttgart.de         | local        | Uni_Stuttgart |
+| Uni Tübingen    | https://ovidius.uni-tuebingen.de/ilias3 | shibboleth   |               |
+| KIT ILIAS Pilot | https://pilot.ilias.studium.kit.edu     | shibboleth   | pilot         |
 
 If your university isn't listed, try navigating to your instance's login page.
 Assuming no custom login service is used, the URL will look something like this:

From 27e69af2f3cc0371f457e387b451c73eded43929 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 1 Jul 2025 14:26:10 +0200
Subject: [PATCH 130/147] Update changelog for 8caad00

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 59cc6fe..997d780 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -33,6 +33,7 @@ ambiguous situations.
 - Don't download unavailable elements.  
   Elements that are unavailable (for example, because their availability is
   time restricted) will not download the HTML for the info page anymore.
+- `base_url` argument for `ilias-web` crawler causing crashes
 
 ## 3.8.2 - 2025-04-29
 

From 465f8b28c0dc2a4abcebc846a3b2066701c78785 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 1 Jul 2025 14:28:30 +0200
Subject: [PATCH 131/147] Bump version to 3.8.3

---
 CHANGELOG.md     | 2 ++
 PFERD/version.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 997d780..5fdec53 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ ambiguous situations.
 
 ## Unreleased
 
+## 3.8.3 - 2025-07-01
+
 ## Added
 - Support for link collections.  
   In "fancy" mode, a single HTML file with multiple links is generated.
diff --git a/PFERD/version.py b/PFERD/version.py
index 12c568a..c6c8b14 100644
--- a/PFERD/version.py
+++ b/PFERD/version.py
@@ -1,2 +1,2 @@
 NAME = "PFERD"
-VERSION = "3.8.2"
+VERSION = "3.8.3"

From 3755f593ff60e06ffbeab01135f924c1f3664453 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 1 Jul 2025 14:33:11 +0200
Subject: [PATCH 132/147] Update nix flake to 25.05

---
 flake.lock | 8 ++++----
 flake.nix  | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/flake.lock b/flake.lock
index d9326af..8d211fc 100644
--- a/flake.lock
+++ b/flake.lock
@@ -2,16 +2,16 @@
   "nodes": {
     "nixpkgs": {
       "locked": {
-        "lastModified": 1744440957,
-        "narHash": "sha256-FHlSkNqFmPxPJvy+6fNLaNeWnF1lZSgqVCl/eWaJRc4=",
+        "lastModified": 1751211869,
+        "narHash": "sha256-1Cu92i1KSPbhPCKxoiVG5qnoRiKTgR5CcGSRyLpOd7Y=",
         "owner": "NixOS",
         "repo": "nixpkgs",
-        "rev": "26d499fc9f1d567283d5d56fcf367edd815dba1d",
+        "rev": "b43c397f6c213918d6cfe6e3550abfe79b5d1c51",
         "type": "github"
       },
       "original": {
         "owner": "NixOS",
-        "ref": "nixos-24.11",
+        "ref": "nixos-25.05",
         "repo": "nixpkgs",
         "type": "github"
       }
diff --git a/flake.nix b/flake.nix
index c8dbe0c..7027e20 100644
--- a/flake.nix
+++ b/flake.nix
@@ -2,7 +2,7 @@
   description = "Tool for downloading course-related files from ILIAS";
 
   inputs = {
-    nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11";
+    nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05";
   };
 
   outputs = { self, nixpkgs }:

From 207af51aa49d021d2ea4fd774044a0772a103a08 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 27 Jul 2025 20:13:05 +0200
Subject: [PATCH 133/147] Include description in internet-shortcut links

---
 CHANGELOG.md                        | 3 +++
 PFERD/crawl/ilias/file_templates.py | 1 +
 2 files changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5fdec53..7da225b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ ambiguous situations.
 
 ## Unreleased
 
+## Added
+- Store the description when using the `internet-shortcut` link format
+
 ## 3.8.3 - 2025-07-01
 
 ## Added
diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index f959917..f256dd8 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -111,6 +111,7 @@ _link_template_fancy = """
 _link_template_internet_shortcut = """
 [InternetShortcut]
 URL={{link}}
+Desc={{description}}
 """.strip()
 
 _learning_module_template = """

From f6c713d62198f9970d81b56dbf86fcb04f760629 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 19 Oct 2025 15:08:21 +0200
Subject: [PATCH 134/147] Fix mypy errors

---
 PFERD/crawl/ilias/ilias_web_crawler.py | 2 --
 PFERD/crawl/ilias/kit_ilias_html.py    | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 2eb8e9c..ee1de9c 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -985,8 +985,6 @@ instance's greatest bottleneck.
         """
         log.explain_topic("Internalizing images")
         for elem in tag.find_all(recursive=True):
-            if not isinstance(elem, Tag):
-                continue
             if elem.name == "img":
                 if src := elem.attrs.get("src", None):
                     url = urljoin(self._base_url, cast(str, src))
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 70ec3d7..5b88e8d 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -435,7 +435,7 @@ class IliasPage:
         return None
 
     def get_description(self) -> Optional[BeautifulSoup]:
-        def is_interesting_class(name: str) -> bool:
+        def is_interesting_class(name: str | None) -> bool:
             return name in [
                 "ilCOPageSection", "ilc_Paragraph", "ilc_va_ihcap_VAccordIHeadCap",
                 "ilc_va_ihcap_AccordIHeadCap", "ilc_media_cont_MediaContainer"
@@ -1243,7 +1243,7 @@ class IliasPage:
         # In a series of divs.
         # Find the parent containing all those divs, so we can filter our what we need
         properties_parent = cast(Tag, cast(Tag, link_element.find_parent(
-            "div", {"class": lambda x: "il_ContainerListItem" in x}
+            "div", {"class": lambda x: x is not None and "il_ContainerListItem" in x}
         )).select_one(".il_ItemProperties"))
         # The first one is always the filetype
         file_type = cast(Tag, properties_parent.select_one("span.il_ItemProperty")).get_text().strip()

From ee4625be784263c979414d7c688c9243c0970967 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 19 Oct 2025 15:08:29 +0200
Subject: [PATCH 135/147] Hardcode max line length in scripts/check

---
 scripts/check | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/check b/scripts/check
index aea2783..6f4f4c2 100755
--- a/scripts/check
+++ b/scripts/check
@@ -3,4 +3,4 @@
 set -e
 
 mypy .
-flake8 PFERD
+flake8 PFERD --max-line-length 110

From 2cf0e060ed126537dd993896b6aa793e2a6b9e80 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 19 Oct 2025 15:19:43 +0200
Subject: [PATCH 136/147] Reformat and switch to ruff

---
 PFERD/auth/__init__.py                  |  28 +-
 PFERD/auth/keyring.py                   |   1 -
 PFERD/cli/command_ilias_web.py          |   9 +-
 PFERD/cli/command_kit_ilias_web.py      |   4 +-
 PFERD/cli/command_kit_ipd.py            |  10 +-
 PFERD/cli/command_local.py              |  14 +-
 PFERD/cli/common_ilias_args.py          |  26 +-
 PFERD/cli/parser.py                     | 103 +--
 PFERD/config.py                         |   8 +-
 PFERD/crawl/__init__.py                 |  27 +-
 PFERD/crawl/crawler.py                  |  49 +-
 PFERD/crawl/http_crawler.py             |  44 +-
 PFERD/crawl/ilias/__init__.py           |   8 +-
 PFERD/crawl/ilias/file_templates.py     |  14 +-
 PFERD/crawl/ilias/ilias_html_cleaner.py |   2 +-
 PFERD/crawl/ilias/ilias_web_crawler.py  | 136 ++--
 PFERD/crawl/ilias/kit_ilias_html.py     | 370 +++++-----
 PFERD/crawl/ilias/shibboleth_login.py   |  10 +-
 PFERD/crawl/kit_ipd_crawler.py          |  15 +-
 PFERD/crawl/local_crawler.py            |  37 +-
 PFERD/deduplicator.py                   |  25 +-
 PFERD/limiter.py                        |   7 +-
 PFERD/logging.py                        |  47 +-
 PFERD/output_dir.py                     | 107 +--
 PFERD/pferd.py                          |   8 +-
 PFERD/transformer.py                    |  36 +-
 PFERD/utils.py                          |   8 +-
 pyproject.toml                          |  31 +-
 scripts/check                           |   2 +-
 scripts/format                          |   3 +-
 uv.lock                                 | 905 ++++++++++++++++++++++++
 31 files changed, 1507 insertions(+), 587 deletions(-)
 create mode 100644 uv.lock

diff --git a/PFERD/auth/__init__.py b/PFERD/auth/__init__.py
index aa3ba8e..80d4586 100644
--- a/PFERD/auth/__init__.py
+++ b/PFERD/auth/__init__.py
@@ -9,21 +9,19 @@ from .pass_ import PassAuthenticator, PassAuthSection
 from .simple import SimpleAuthenticator, SimpleAuthSection
 from .tfa import TfaAuthenticator
 
-AuthConstructor = Callable[[
-    str,                # Name (without the "auth:" prefix)
-    SectionProxy,       # Authenticator's section of global config
-    Config,             # Global config
-], Authenticator]
+AuthConstructor = Callable[
+    [
+        str,  # Name (without the "auth:" prefix)
+        SectionProxy,  # Authenticator's section of global config
+        Config,  # Global config
+    ],
+    Authenticator,
+]
 
 AUTHENTICATORS: Dict[str, AuthConstructor] = {
-    "credential-file": lambda n, s, c:
-        CredentialFileAuthenticator(n, CredentialFileAuthSection(s), c),
-    "keyring": lambda n, s, c:
-        KeyringAuthenticator(n, KeyringAuthSection(s)),
-    "pass": lambda n, s, c:
-        PassAuthenticator(n, PassAuthSection(s)),
-    "simple": lambda n, s, c:
-        SimpleAuthenticator(n, SimpleAuthSection(s)),
-    "tfa": lambda n, s, c:
-        TfaAuthenticator(n),
+    "credential-file": lambda n, s, c: CredentialFileAuthenticator(n, CredentialFileAuthSection(s), c),
+    "keyring": lambda n, s, c: KeyringAuthenticator(n, KeyringAuthSection(s)),
+    "pass": lambda n, s, c: PassAuthenticator(n, PassAuthSection(s)),
+    "simple": lambda n, s, c: SimpleAuthenticator(n, SimpleAuthSection(s)),
+    "tfa": lambda n, s, c: TfaAuthenticator(n),
 }
diff --git a/PFERD/auth/keyring.py b/PFERD/auth/keyring.py
index 02a9269..7ff2673 100644
--- a/PFERD/auth/keyring.py
+++ b/PFERD/auth/keyring.py
@@ -17,7 +17,6 @@ class KeyringAuthSection(AuthSection):
 
 
 class KeyringAuthenticator(Authenticator):
-
     def __init__(self, name: str, section: KeyringAuthSection) -> None:
         super().__init__(name)
 
diff --git a/PFERD/cli/command_ilias_web.py b/PFERD/cli/command_ilias_web.py
index 5efec20..b68e48f 100644
--- a/PFERD/cli/command_ilias_web.py
+++ b/PFERD/cli/command_ilias_web.py
@@ -21,23 +21,20 @@ GROUP.add_argument(
     "--base-url",
     type=str,
     metavar="BASE_URL",
-    help="The base url of the ilias instance"
+    help="The base url of the ilias instance",
 )
 
 GROUP.add_argument(
     "--client-id",
     type=str,
     metavar="CLIENT_ID",
-    help="The client id of the ilias instance"
+    help="The client id of the ilias instance",
 )
 
 configure_common_group_args(GROUP)
 
 
-def load(
-        args: argparse.Namespace,
-        parser: configparser.ConfigParser,
-) -> None:
+def load(args: argparse.Namespace, parser: configparser.ConfigParser) -> None:
     log.explain(f"Creating config for command '{COMMAND_NAME}'")
 
     parser["crawl:ilias"] = {}
diff --git a/PFERD/cli/command_kit_ilias_web.py b/PFERD/cli/command_kit_ilias_web.py
index 10797c2..b3b45c5 100644
--- a/PFERD/cli/command_kit_ilias_web.py
+++ b/PFERD/cli/command_kit_ilias_web.py
@@ -21,8 +21,8 @@ configure_common_group_args(GROUP)
 
 
 def load(
-        args: argparse.Namespace,
-        parser: configparser.ConfigParser,
+    args: argparse.Namespace,
+    parser: configparser.ConfigParser,
 ) -> None:
     log.explain(f"Creating config for command '{COMMAND_NAME}'")
 
diff --git a/PFERD/cli/command_kit_ipd.py b/PFERD/cli/command_kit_ipd.py
index b53e67e..589d9a3 100644
--- a/PFERD/cli/command_kit_ipd.py
+++ b/PFERD/cli/command_kit_ipd.py
@@ -18,25 +18,25 @@ GROUP.add_argument(
     "--link-regex",
     type=str,
     metavar="REGEX",
-    help="href-matching regex to identify downloadable files"
+    help="href-matching regex to identify downloadable files",
 )
 GROUP.add_argument(
     "target",
     type=str,
     metavar="TARGET",
-    help="url to crawl"
+    help="url to crawl",
 )
 GROUP.add_argument(
     "output",
     type=Path,
     metavar="OUTPUT",
-    help="output directory"
+    help="output directory",
 )
 
 
 def load(
-        args: argparse.Namespace,
-        parser: configparser.ConfigParser,
+    args: argparse.Namespace,
+    parser: configparser.ConfigParser,
 ) -> None:
     log.explain("Creating config for command 'kit-ipd'")
 
diff --git a/PFERD/cli/command_local.py b/PFERD/cli/command_local.py
index 309c42f..6016afa 100644
--- a/PFERD/cli/command_local.py
+++ b/PFERD/cli/command_local.py
@@ -18,37 +18,37 @@ GROUP.add_argument(
     "target",
     type=Path,
     metavar="TARGET",
-    help="directory to crawl"
+    help="directory to crawl",
 )
 GROUP.add_argument(
     "output",
     type=Path,
     metavar="OUTPUT",
-    help="output directory"
+    help="output directory",
 )
 GROUP.add_argument(
     "--crawl-delay",
     type=float,
     metavar="SECONDS",
-    help="artificial delay to simulate for crawl requests"
+    help="artificial delay to simulate for crawl requests",
 )
 GROUP.add_argument(
     "--download-delay",
     type=float,
     metavar="SECONDS",
-    help="artificial delay to simulate for download requests"
+    help="artificial delay to simulate for download requests",
 )
 GROUP.add_argument(
     "--download-speed",
     type=int,
     metavar="BYTES_PER_SECOND",
-    help="download speed to simulate"
+    help="download speed to simulate",
 )
 
 
 def load(
-        args: argparse.Namespace,
-        parser: configparser.ConfigParser,
+    args: argparse.Namespace,
+    parser: configparser.ConfigParser,
 ) -> None:
     log.explain("Creating config for command 'local'")
 
diff --git a/PFERD/cli/common_ilias_args.py b/PFERD/cli/common_ilias_args.py
index bbbbee5..edad6da 100644
--- a/PFERD/cli/common_ilias_args.py
+++ b/PFERD/cli/common_ilias_args.py
@@ -12,58 +12,60 @@ def configure_common_group_args(group: argparse._ArgumentGroup) -> None:
         "target",
         type=str,
         metavar="TARGET",
-        help="course id, 'desktop', or ILIAS URL to crawl"
+        help="course id, 'desktop', or ILIAS URL to crawl",
     )
     group.add_argument(
         "output",
         type=Path,
         metavar="OUTPUT",
-        help="output directory"
+        help="output directory",
     )
     group.add_argument(
-        "--username", "-u",
+        "--username",
+        "-u",
         type=str,
         metavar="USERNAME",
-        help="user name for authentication"
+        help="user name for authentication",
     )
     group.add_argument(
         "--keyring",
         action=BooleanOptionalAction,
-        help="use the system keyring to store and retrieve passwords"
+        help="use the system keyring to store and retrieve passwords",
     )
     group.add_argument(
         "--credential-file",
         type=Path,
         metavar="PATH",
-        help="read username and password from a credential file"
+        help="read username and password from a credential file",
     )
     group.add_argument(
         "--links",
         type=show_value_error(Links.from_string),
         metavar="OPTION",
-        help="how to represent external links"
+        help="how to represent external links",
     )
     group.add_argument(
         "--link-redirect-delay",
         type=int,
         metavar="SECONDS",
-        help="time before 'fancy' links redirect to to their target (-1 to disable)"
+        help="time before 'fancy' links redirect to to their target (-1 to disable)",
     )
     group.add_argument(
         "--videos",
         action=BooleanOptionalAction,
-        help="crawl and download videos"
+        help="crawl and download videos",
     )
     group.add_argument(
         "--forums",
         action=BooleanOptionalAction,
-        help="crawl and download forum posts"
+        help="crawl and download forum posts",
     )
     group.add_argument(
-        "--http-timeout", "-t",
+        "--http-timeout",
+        "-t",
         type=float,
         metavar="SECONDS",
-        help="timeout for all HTTP requests"
+        help="timeout for all HTTP requests",
     )
 
 
diff --git a/PFERD/cli/parser.py b/PFERD/cli/parser.py
index be483fd..12bfeac 100644
--- a/PFERD/cli/parser.py
+++ b/PFERD/cli/parser.py
@@ -15,15 +15,15 @@ class ParserLoadError(Exception):
 # TODO Replace with argparse version when updating to 3.9?
 class BooleanOptionalAction(argparse.Action):
     def __init__(
-            self,
-            option_strings: List[str],
-            dest: Any,
-            default: Any = None,
-            type: Any = None,
-            choices: Any = None,
-            required: Any = False,
-            help: Any = None,
-            metavar: Any = None,
+        self,
+        option_strings: List[str],
+        dest: Any,
+        default: Any = None,
+        type: Any = None,
+        choices: Any = None,
+        required: Any = False,
+        help: Any = None,
+        metavar: Any = None,
     ):
         if len(option_strings) != 1:
             raise ValueError("There must be exactly one option string")
@@ -48,11 +48,11 @@ class BooleanOptionalAction(argparse.Action):
         )
 
     def __call__(
-            self,
-            parser: argparse.ArgumentParser,
-            namespace: argparse.Namespace,
-            values: Union[str, Sequence[Any], None],
-            option_string: Optional[str] = None,
+        self,
+        parser: argparse.ArgumentParser,
+        namespace: argparse.Namespace,
+        values: Union[str, Sequence[Any], None],
+        option_string: Optional[str] = None,
     ) -> None:
         if option_string and option_string in self.option_strings:
             value = not option_string.startswith("--no-")
@@ -67,11 +67,13 @@ def show_value_error(inner: Callable[[str], Any]) -> Callable[[str], Any]:
     Some validation functions (like the from_string in our enums) raise a ValueError.
     Argparse only pretty-prints ArgumentTypeErrors though, so we need to wrap our ValueErrors.
     """
+
     def wrapper(input: str) -> Any:
         try:
             return inner(input)
         except ValueError as e:
             raise ArgumentTypeError(e)
+
     return wrapper
 
 
@@ -81,52 +83,57 @@ CRAWLER_PARSER_GROUP = CRAWLER_PARSER.add_argument_group(
     description="arguments common to all crawlers",
 )
 CRAWLER_PARSER_GROUP.add_argument(
-    "--redownload", "-r",
+    "--redownload",
+    "-r",
     type=show_value_error(Redownload.from_string),
     metavar="OPTION",
-    help="when to download a file that's already present locally"
+    help="when to download a file that's already present locally",
 )
 CRAWLER_PARSER_GROUP.add_argument(
     "--on-conflict",
     type=show_value_error(OnConflict.from_string),
     metavar="OPTION",
-    help="what to do when local and remote files or directories differ"
+    help="what to do when local and remote files or directories differ",
 )
 CRAWLER_PARSER_GROUP.add_argument(
-    "--transform", "-T",
+    "--transform",
+    "-T",
     action="append",
     type=str,
     metavar="RULE",
-    help="add a single transformation rule. Can be specified multiple times"
+    help="add a single transformation rule. Can be specified multiple times",
 )
 CRAWLER_PARSER_GROUP.add_argument(
-    "--tasks", "-n",
+    "--tasks",
+    "-n",
     type=int,
     metavar="N",
-    help="maximum number of concurrent tasks (crawling, downloading)"
+    help="maximum number of concurrent tasks (crawling, downloading)",
 )
 CRAWLER_PARSER_GROUP.add_argument(
-    "--downloads", "-N",
+    "--downloads",
+    "-N",
     type=int,
     metavar="N",
-    help="maximum number of tasks that may download data at the same time"
+    help="maximum number of tasks that may download data at the same time",
 )
 CRAWLER_PARSER_GROUP.add_argument(
-    "--task-delay", "-d",
+    "--task-delay",
+    "-d",
     type=float,
     metavar="SECONDS",
-    help="time the crawler should wait between subsequent tasks"
+    help="time the crawler should wait between subsequent tasks",
 )
 CRAWLER_PARSER_GROUP.add_argument(
     "--windows-paths",
     action=BooleanOptionalAction,
-    help="whether to repair invalid paths on windows"
+    help="whether to repair invalid paths on windows",
 )
 
 
 def load_crawler(
-        args: argparse.Namespace,
-        section: configparser.SectionProxy,
+    args: argparse.Namespace,
+    section: configparser.SectionProxy,
 ) -> None:
     if args.redownload is not None:
         section["redownload"] = args.redownload.value
@@ -152,79 +159,79 @@ PARSER.add_argument(
     version=f"{NAME} {VERSION} (https://github.com/Garmelon/PFERD)",
 )
 PARSER.add_argument(
-    "--config", "-c",
+    "--config",
+    "-c",
     type=Path,
     metavar="PATH",
-    help="custom config file"
+    help="custom config file",
 )
 PARSER.add_argument(
     "--dump-config",
     action="store_true",
-    help="dump current configuration to the default config path and exit"
+    help="dump current configuration to the default config path and exit",
 )
 PARSER.add_argument(
     "--dump-config-to",
     metavar="PATH",
-    help="dump current configuration to a file and exit."
-    " Use '-' as path to print to stdout instead"
+    help="dump current configuration to a file and exit. Use '-' as path to print to stdout instead",
 )
 PARSER.add_argument(
     "--debug-transforms",
     action="store_true",
-    help="apply transform rules to files of previous run"
+    help="apply transform rules to files of previous run",
 )
 PARSER.add_argument(
-    "--crawler", "-C",
+    "--crawler",
+    "-C",
     action="append",
     type=str,
     metavar="NAME",
-    help="only execute a single crawler."
-    " Can be specified multiple times to execute multiple crawlers"
+    help="only execute a single crawler. Can be specified multiple times to execute multiple crawlers",
 )
 PARSER.add_argument(
-    "--skip", "-S",
+    "--skip",
+    "-S",
     action="append",
     type=str,
     metavar="NAME",
-    help="don't execute this particular crawler."
-    " Can be specified multiple times to skip multiple crawlers"
+    help="don't execute this particular crawler. Can be specified multiple times to skip multiple crawlers",
 )
 PARSER.add_argument(
     "--working-dir",
     type=Path,
     metavar="PATH",
-    help="custom working directory"
+    help="custom working directory",
 )
 PARSER.add_argument(
     "--explain",
     action=BooleanOptionalAction,
-    help="log and explain in detail what PFERD is doing"
+    help="log and explain in detail what PFERD is doing",
 )
 PARSER.add_argument(
     "--status",
     action=BooleanOptionalAction,
-    help="print status updates while PFERD is crawling"
+    help="print status updates while PFERD is crawling",
 )
 PARSER.add_argument(
     "--report",
     action=BooleanOptionalAction,
-    help="print a report of all local changes before exiting"
+    help="print a report of all local changes before exiting",
 )
 PARSER.add_argument(
     "--share-cookies",
     action=BooleanOptionalAction,
-    help="whether crawlers should share cookies where applicable"
+    help="whether crawlers should share cookies where applicable",
 )
 PARSER.add_argument(
     "--show-not-deleted",
     action=BooleanOptionalAction,
-    help="print messages in status and report when PFERD did not delete a local only file"
+    help="print messages in status and report when PFERD did not delete a local only file",
 )
 
 
 def load_default_section(
-        args: argparse.Namespace,
-        parser: configparser.ConfigParser,
+    args: argparse.Namespace,
+    parser: configparser.ConfigParser,
 ) -> None:
     section = parser[parser.default_section]
 
diff --git a/PFERD/config.py b/PFERD/config.py
index b2cff4e..1a0f017 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -53,10 +53,10 @@ class Section:
         raise ConfigOptionError(self.s.name, key, desc)
 
     def invalid_value(
-            self,
-            key: str,
-            value: Any,
-            reason: Optional[str],
+        self,
+        key: str,
+        value: Any,
+        reason: Optional[str],
     ) -> NoReturn:
         if reason is None:
             self.error(key, f"Invalid value {value!r}")
diff --git a/PFERD/crawl/__init__.py b/PFERD/crawl/__init__.py
index 9a0e080..04a5e3f 100644
--- a/PFERD/crawl/__init__.py
+++ b/PFERD/crawl/__init__.py
@@ -8,20 +8,19 @@ from .ilias import IliasWebCrawler, IliasWebCrawlerSection, KitIliasWebCrawler,
 from .kit_ipd_crawler import KitIpdCrawler, KitIpdCrawlerSection
 from .local_crawler import LocalCrawler, LocalCrawlerSection
 
-CrawlerConstructor = Callable[[
-    str,                       # Name (without the "crawl:" prefix)
-    SectionProxy,              # Crawler's section of global config
-    Config,                    # Global config
-    Dict[str, Authenticator],  # Loaded authenticators by name
-], Crawler]
+CrawlerConstructor = Callable[
+    [
+        str,  # Name (without the "crawl:" prefix)
+        SectionProxy,  # Crawler's section of global config
+        Config,  # Global config
+        Dict[str, Authenticator],  # Loaded authenticators by name
+    ],
+    Crawler,
+]
 
 CRAWLERS: Dict[str, CrawlerConstructor] = {
-    "local": lambda n, s, c, a:
-        LocalCrawler(n, LocalCrawlerSection(s), c),
-    "ilias-web": lambda n, s, c, a:
-        IliasWebCrawler(n, IliasWebCrawlerSection(s), c, a),
-    "kit-ilias-web": lambda n, s, c, a:
-        KitIliasWebCrawler(n, KitIliasWebCrawlerSection(s), c, a),
-    "kit-ipd": lambda n, s, c, a:
-        KitIpdCrawler(n, KitIpdCrawlerSection(s), c),
+    "local": lambda n, s, c, a: LocalCrawler(n, LocalCrawlerSection(s), c),
+    "ilias-web": lambda n, s, c, a: IliasWebCrawler(n, IliasWebCrawlerSection(s), c, a),
+    "kit-ilias-web": lambda n, s, c, a: KitIliasWebCrawler(n, KitIliasWebCrawlerSection(s), c, a),
+    "kit-ipd": lambda n, s, c, a: KitIpdCrawler(n, KitIpdCrawlerSection(s), c),
 }
diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index 7ef5fe4..f1aec5a 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -132,8 +132,9 @@ class DownloadToken(ReusableAsyncContextManager[Tuple[ProgressBar, FileSink]]):
         await self._stack.enter_async_context(self._limiter.limit_download())
         sink = await self._stack.enter_async_context(self._fs_token)
         # The "Downloaded ..." message is printed in the output dir, not here
-        bar = self._stack.enter_context(log.download_bar("[bold bright_cyan]", "Downloading",
-                                                         fmt_path(self._path)))
+        bar = self._stack.enter_context(
+            log.download_bar("[bold bright_cyan]", "Downloading", fmt_path(self._path))
+        )
 
         return bar, sink
 
@@ -216,10 +217,10 @@ class CrawlerSection(Section):
 
 class Crawler(ABC):
     def __init__(
-            self,
-            name: str,
-            section: CrawlerSection,
-            config: Config,
+        self,
+        name: str,
+        section: CrawlerSection,
+        config: Config,
     ) -> None:
         """
         Initialize a crawler from its name and its section in the config file.
@@ -293,13 +294,13 @@ class Crawler(ABC):
         return CrawlToken(self._limiter, path)
 
     def should_try_download(
-            self,
-            path: PurePath,
-            *,
-            etag_differs: Optional[bool] = None,
-            mtime: Optional[datetime] = None,
-            redownload: Optional[Redownload] = None,
-            on_conflict: Optional[OnConflict] = None,
+        self,
+        path: PurePath,
+        *,
+        etag_differs: Optional[bool] = None,
+        mtime: Optional[datetime] = None,
+        redownload: Optional[Redownload] = None,
+        on_conflict: Optional[OnConflict] = None,
     ) -> bool:
         log.explain_topic(f"Decision: Should Download {fmt_path(path)}")
 
@@ -308,11 +309,7 @@ class Crawler(ABC):
             return False
 
         should_download = self._output_dir.should_try_download(
-            path,
-            etag_differs=etag_differs,
-            mtime=mtime,
-            redownload=redownload,
-            on_conflict=on_conflict
+            path, etag_differs=etag_differs, mtime=mtime, redownload=redownload, on_conflict=on_conflict
         )
         if should_download:
             log.explain("Answer: Yes")
@@ -322,13 +319,13 @@ class Crawler(ABC):
             return False
 
     async def download(
-            self,
-            path: PurePath,
-            *,
-            etag_differs: Optional[bool] = None,
-            mtime: Optional[datetime] = None,
-            redownload: Optional[Redownload] = None,
-            on_conflict: Optional[OnConflict] = None,
+        self,
+        path: PurePath,
+        *,
+        etag_differs: Optional[bool] = None,
+        mtime: Optional[datetime] = None,
+        redownload: Optional[Redownload] = None,
+        on_conflict: Optional[OnConflict] = None,
     ) -> Optional[DownloadToken]:
         log.explain_topic(f"Decision: Download {fmt_path(path)}")
         path = self._deduplicator.mark(path)
@@ -346,7 +343,7 @@ class Crawler(ABC):
             etag_differs=etag_differs,
             mtime=mtime,
             redownload=redownload,
-            on_conflict=on_conflict
+            on_conflict=on_conflict,
         )
         if fs_token is None:
             log.explain("Answer: No")
diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index 471bf1e..572b39d 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -29,11 +29,11 @@ class HttpCrawler(Crawler):
     COOKIE_FILE = PurePath(".cookies")
 
     def __init__(
-            self,
-            name: str,
-            section: HttpCrawlerSection,
-            config: Config,
-            shared_auth: Optional[Authenticator] = None,
+        self,
+        name: str,
+        section: HttpCrawlerSection,
+        config: Config,
+        shared_auth: Optional[Authenticator] = None,
     ) -> None:
         super().__init__(name, section, config)
 
@@ -252,23 +252,23 @@ class HttpCrawler(Crawler):
         self._load_cookies()
 
         async with aiohttp.ClientSession(
-                headers={"User-Agent": f"{NAME}/{VERSION}"},
-                cookie_jar=self._cookie_jar,
-                connector=aiohttp.TCPConnector(ssl=ssl.create_default_context(cafile=certifi.where())),
-                timeout=ClientTimeout(
-                    # 30 minutes. No download in the history of downloads was longer than 30 minutes.
-                    # This is enough to transfer a 600 MB file over a 3 Mib/s connection.
-                    # Allowing an arbitrary value could be annoying for overnight batch jobs
-                    total=15 * 60,
-                    connect=self._http_timeout,
-                    sock_connect=self._http_timeout,
-                    sock_read=self._http_timeout,
-                ),
-                # See https://github.com/aio-libs/aiohttp/issues/6626
-                # Without this aiohttp will mangle the redirect header from Shibboleth, invalidating the
-                # passed signature. Shibboleth will not accept the broken signature and authentication will
-                # fail.
-                requote_redirect_url=False
+            headers={"User-Agent": f"{NAME}/{VERSION}"},
+            cookie_jar=self._cookie_jar,
+            connector=aiohttp.TCPConnector(ssl=ssl.create_default_context(cafile=certifi.where())),
+            timeout=ClientTimeout(
+                # 30 minutes. No download in the history of downloads was longer than 30 minutes.
+                # This is enough to transfer a 600 MB file over a 3 Mib/s connection.
+                # Allowing an arbitrary value could be annoying for overnight batch jobs
+                total=15 * 60,
+                connect=self._http_timeout,
+                sock_connect=self._http_timeout,
+                sock_read=self._http_timeout,
+            ),
+            # See https://github.com/aio-libs/aiohttp/issues/6626
+            # Without this aiohttp will mangle the redirect header from Shibboleth, invalidating the
+            # passed signature. Shibboleth will not accept the broken signature and authentication will
+            # fail.
+            requote_redirect_url=False,
         ) as session:
             self.session = session
             try:
diff --git a/PFERD/crawl/ilias/__init__.py b/PFERD/crawl/ilias/__init__.py
index 287bd3d..fa1aaed 100644
--- a/PFERD/crawl/ilias/__init__.py
+++ b/PFERD/crawl/ilias/__init__.py
@@ -1,5 +1,9 @@
-from .kit_ilias_web_crawler import (IliasWebCrawler, IliasWebCrawlerSection, KitIliasWebCrawler,
-                                    KitIliasWebCrawlerSection)
+from .kit_ilias_web_crawler import (
+    IliasWebCrawler,
+    IliasWebCrawlerSection,
+    KitIliasWebCrawler,
+    KitIliasWebCrawlerSection,
+)
 
 __all__ = [
     "IliasWebCrawler",
diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index f256dd8..814bb7b 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -254,8 +254,8 @@ def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next
         )
 
     if bot_nav := body.select_one(".ilc_page_bnav_BottomNavigation"):
-        bot_nav.replace_with(soupify(nav_template.replace(
-            "{{left}}", left).replace("{{right}}", right).encode())
+        bot_nav.replace_with(
+            soupify(nav_template.replace("{{left}}", left).replace("{{right}}", right).encode())
         )
 
     body_str = cast(str, body.prettify())
@@ -265,10 +265,11 @@ def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next
 def forum_thread_template(name: str, url: str, heading: bs4.Tag, content: bs4.Tag) -> str:
     if title := cast(Optional[bs4.Tag], heading.find(name="b")):
         title.wrap(bs4.Tag(name="a", attrs={"href": url}))
-    return _forum_thread_template \
-        .replace("{{name}}", name) \
-        .replace("{{heading}}", cast(str, heading.prettify())) \
+    return (
+        _forum_thread_template.replace("{{name}}", name)
+        .replace("{{heading}}", cast(str, heading.prettify()))
         .replace("{{content}}", cast(str, content.prettify()))
+    )
 
 
 @dataclasses.dataclass
@@ -330,8 +331,7 @@ class Links(Enum):
         # All others get coerced to fancy
         content = cast(str, Links.FANCY.template())
         repeated_content = cast(
-            re.Match[str],
-            re.search(r"<!-- REPEAT START -->([\s\S]+)<!-- REPEAT END -->", content)
+            re.Match[str], re.search(r"<!-- REPEAT START -->([\s\S]+)<!-- REPEAT END -->", content)
         ).group(1)
 
         parts = []
diff --git a/PFERD/crawl/ilias/ilias_html_cleaner.py b/PFERD/crawl/ilias/ilias_html_cleaner.py
index 35a7ea0..958860a 100644
--- a/PFERD/crawl/ilias/ilias_html_cleaner.py
+++ b/PFERD/crawl/ilias/ilias_html_cleaner.py
@@ -86,7 +86,7 @@ def clean(soup: BeautifulSoup) -> BeautifulSoup:
 
     for block in cast(list[Tag], soup.find_all(class_="ilc_va_ihcap_VAccordIHeadCap")):
         block.name = "h3"
-        block["class"] += ["accordion-head"]  # type: ignore
+        block["class"] += ["accordion-head"]
 
     for dummy in soup.select(".ilc_text_block_Standard.ilc_Paragraph"):
         children = list(dummy.children)
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index ee1de9c..e6929b5 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -21,8 +21,16 @@ from ..http_crawler import HttpCrawler, HttpCrawlerSection
 from .async_helper import _iorepeat
 from .file_templates import LinkData, Links, forum_thread_template, learning_module_template
 from .ilias_html_cleaner import clean, insert_base_markup
-from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
-                             IliasPageElement, IliasSoup, _sanitize_path_name, parse_ilias_forum_export)
+from .kit_ilias_html import (
+    IliasElementType,
+    IliasForumThread,
+    IliasLearningModulePage,
+    IliasPage,
+    IliasPageElement,
+    IliasSoup,
+    _sanitize_path_name,
+    parse_ilias_forum_export,
+)
 from .shibboleth_login import ShibbolethLogin
 
 TargetType = Union[str, int]
@@ -55,9 +63,7 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
 
         self.invalid_value("login_type", login_type, "Should be <shibboleth | local>")
 
-    def tfa_auth(
-        self, authenticators: Dict[str, Authenticator]
-    ) -> Optional[Authenticator]:
+    def tfa_auth(self, authenticators: Dict[str, Authenticator]) -> Optional[Authenticator]:
         value: Optional[str] = self.s.get("tfa_auth")
         if value is None:
             return None
@@ -166,17 +172,19 @@ class IliasWebCrawler(HttpCrawler):
         name: str,
         section: IliasWebCrawlerSection,
         config: Config,
-        authenticators: Dict[str, Authenticator]
+        authenticators: Dict[str, Authenticator],
     ):
         # Setting a main authenticator for cookie sharing
         auth = section.auth(authenticators)
         super().__init__(name, section, config, shared_auth=auth)
 
         if section.tasks() > 1:
-            log.warn("""
+            log.warn(
+                """
 Please avoid using too many parallel requests as these are the KIT ILIAS
 instance's greatest bottleneck.
-            """.strip())
+            """.strip()
+            )
 
         self._auth = auth
         self._base_url = section.base_url()
@@ -210,22 +218,19 @@ instance's greatest bottleneck.
         # Start crawling at the given course
         root_url = url_set_query_param(
             urljoin(self._base_url + "/", "goto.php"),
-            "target", f"crs_{course_id}",
+            "target",
+            f"crs_{course_id}",
         )
 
         await self._crawl_url(root_url, expected_id=course_id)
 
     async def _crawl_desktop(self) -> None:
         await self._crawl_url(
-            urljoin(self._base_url, "/ilias.php?baseClass=ilDashboardGUI&cmd=show"),
-            crawl_nested_courses=True
+            urljoin(self._base_url, "/ilias.php?baseClass=ilDashboardGUI&cmd=show"), crawl_nested_courses=True
         )
 
     async def _crawl_url(
-        self,
-        url: str,
-        expected_id: Optional[int] = None,
-        crawl_nested_courses: bool = False
+        self, url: str, expected_id: Optional[int] = None, crawl_nested_courses: bool = False
     ) -> None:
         if awaitable := await self._handle_ilias_page(
             url, None, PurePath("."), expected_id, crawl_nested_courses
@@ -238,7 +243,7 @@ instance's greatest bottleneck.
         current_element: Optional[IliasPageElement],
         path: PurePath,
         expected_course_id: Optional[int] = None,
-        crawl_nested_courses: bool = False
+        crawl_nested_courses: bool = False,
     ) -> Optional[Coroutine[Any, Any, None]]:
         maybe_cl = await self.crawl(path)
         if not maybe_cl:
@@ -319,10 +324,7 @@ instance's greatest bottleneck.
     # works correctly.
     @anoncritical
     async def _handle_ilias_element(
-        self,
-        parent_path: PurePath,
-        element: IliasPageElement,
-        crawl_nested_courses: bool = False
+        self, parent_path: PurePath, element: IliasPageElement, crawl_nested_courses: bool = False
     ) -> Optional[Coroutine[Any, Any, None]]:
         # element.name might contain `/` if the crawler created nested elements,
         # so we can not sanitize it here. We trust in the output dir to thwart worst-case
@@ -344,7 +346,7 @@ instance's greatest bottleneck.
                     "[bold bright_black]",
                     "Ignored",
                     fmt_path(element_path),
-                    "[bright_black](enable with option 'videos')"
+                    "[bright_black](enable with option 'videos')",
                 )
                 return None
 
@@ -356,7 +358,7 @@ instance's greatest bottleneck.
                     "[bold bright_black]",
                     "Ignored",
                     fmt_path(element_path),
-                    "[bright_black](enable with option 'forums')"
+                    "[bright_black](enable with option 'forums')",
                 )
                 return None
             return await self._handle_forum(element, element_path)
@@ -365,7 +367,7 @@ instance's greatest bottleneck.
                 "[bold bright_black]",
                 "Ignored",
                 fmt_path(element_path),
-                "[bright_black](tests contain no relevant data)"
+                "[bright_black](tests contain no relevant data)",
             )
             return None
         elif element.type == IliasElementType.SURVEY:
@@ -373,7 +375,7 @@ instance's greatest bottleneck.
                 "[bold bright_black]",
                 "Ignored",
                 fmt_path(element_path),
-                "[bright_black](surveys contain no relevant data)"
+                "[bright_black](surveys contain no relevant data)",
             )
             return None
         elif element.type == IliasElementType.SCORM_LEARNING_MODULE:
@@ -381,7 +383,7 @@ instance's greatest bottleneck.
                 "[bold bright_black]",
                 "Ignored",
                 fmt_path(element_path),
-                "[bright_black](scorm learning modules are not supported)"
+                "[bright_black](scorm learning modules are not supported)",
             )
             return None
         elif element.type == IliasElementType.LITERATURE_LIST:
@@ -389,7 +391,7 @@ instance's greatest bottleneck.
                 "[bold bright_black]",
                 "Ignored",
                 fmt_path(element_path),
-                "[bright_black](literature lists are not currently supported)"
+                "[bright_black](literature lists are not currently supported)",
             )
             return None
         elif element.type == IliasElementType.LEARNING_MODULE_HTML:
@@ -397,7 +399,7 @@ instance's greatest bottleneck.
                 "[bold bright_black]",
                 "Ignored",
                 fmt_path(element_path),
-                "[bright_black](HTML learning modules are not supported)"
+                "[bright_black](HTML learning modules are not supported)",
             )
             return None
         elif element.type == IliasElementType.BLOG:
@@ -405,7 +407,7 @@ instance's greatest bottleneck.
                 "[bold bright_black]",
                 "Ignored",
                 fmt_path(element_path),
-                "[bright_black](blogs are not currently supported)"
+                "[bright_black](blogs are not currently supported)",
             )
             return None
         elif element.type == IliasElementType.DCL_RECORD_LIST:
@@ -413,7 +415,7 @@ instance's greatest bottleneck.
                 "[bold bright_black]",
                 "Ignored",
                 fmt_path(element_path),
-                "[bright_black](dcl record lists are not currently supported)"
+                "[bright_black](dcl record lists are not currently supported)",
             )
             return None
         elif element.type == IliasElementType.MEDIA_POOL:
@@ -421,7 +423,7 @@ instance's greatest bottleneck.
                 "[bold bright_black]",
                 "Ignored",
                 fmt_path(element_path),
-                "[bright_black](media pools are not currently supported)"
+                "[bright_black](media pools are not currently supported)",
             )
             return None
         elif element.type == IliasElementType.COURSE:
@@ -431,7 +433,7 @@ instance's greatest bottleneck.
                 "[bold bright_black]",
                 "Ignored",
                 fmt_path(element_path),
-                "[bright_black](not descending into linked course)"
+                "[bright_black](not descending into linked course)",
             )
             return None
         elif element.type == IliasElementType.WIKI:
@@ -439,7 +441,7 @@ instance's greatest bottleneck.
                 "[bold bright_black]",
                 "Ignored",
                 fmt_path(element_path),
-                "[bright_black](wikis are not currently supported)"
+                "[bright_black](wikis are not currently supported)",
             )
             return None
         elif element.type == IliasElementType.LEARNING_MODULE:
@@ -513,19 +515,15 @@ instance's greatest bottleneck.
     @anoncritical
     @_iorepeat(3, "resolving link")
     async def _download_link(
-        self,
-        link_renderer: Links,
-        collection_name: str,
-        links: list[LinkData],
-        dl: DownloadToken
+        self, link_renderer: Links, collection_name: str, links: list[LinkData], dl: DownloadToken
     ) -> None:
         async with dl as (bar, sink):
             rendered = link_renderer.interpolate(self._link_file_redirect_delay, collection_name, links)
             sink.file.write(rendered.encode("utf-8"))
             sink.done()
 
-    async def _resolve_link_target(self, export_url: str) -> Union[BeautifulSoup, Literal['none']]:
-        async def impl() -> Optional[Union[BeautifulSoup, Literal['none']]]:
+    async def _resolve_link_target(self, export_url: str) -> Union[BeautifulSoup, Literal["none"]]:
+        async def impl() -> Optional[Union[BeautifulSoup, Literal["none"]]]:
             async with self.session.get(export_url, allow_redirects=False) as resp:
                 # No redirect means we were authenticated
                 if hdrs.LOCATION not in resp.headers:
@@ -626,7 +624,7 @@ instance's greatest bottleneck.
         if self.prev_report:
             self.report.add_custom_value(
                 _get_video_cache_key(element),
-                self.prev_report.get_custom_value(_get_video_cache_key(element))
+                self.prev_report.get_custom_value(_get_video_cache_key(element)),
             )
 
         # A video might contain other videos, so let's "crawl" the video first
@@ -698,7 +696,7 @@ instance's greatest bottleneck.
         def add_to_report(paths: list[str]) -> None:
             self.report.add_custom_value(
                 _get_video_cache_key(element),
-                {"known_paths": paths, "own_path": str(self._transformer.transform(dl.path))}
+                {"known_paths": paths, "own_path": str(self._transformer.transform(dl.path))},
             )
 
         async with dl as (bar, sink):
@@ -752,11 +750,7 @@ instance's greatest bottleneck.
             await self._stream_from_url(element, sink, bar, is_video)
 
     async def _stream_from_url(
-        self,
-        element: IliasPageElement,
-        sink: FileSink,
-        bar: ProgressBar,
-        is_video: bool
+        self, element: IliasPageElement, sink: FileSink, bar: ProgressBar, is_video: bool
     ) -> None:
         url = element.url
 
@@ -831,10 +825,10 @@ instance's greatest bottleneck.
                 log.warn("Could not extract forum export url")
                 return
 
-            export = await self._post(export_url, {
-                "format": "html",
-                "cmd[createExportFile]": ""
-            })
+            export = await self._post(
+                export_url,
+                {"format": "html", "cmd[createExportFile]": ""},
+            )
 
             elements = parse_ilias_forum_export(soupify(export))
 
@@ -848,10 +842,7 @@ instance's greatest bottleneck.
     @anoncritical
     @_iorepeat(3, "saving forum thread")
     async def _download_forum_thread(
-        self,
-        parent_path: PurePath,
-        thread: Union[IliasForumThread, IliasPageElement],
-        forum_url: str
+        self, parent_path: PurePath, thread: Union[IliasForumThread, IliasPageElement], forum_url: str
     ) -> None:
         path = parent_path / (_sanitize_path_name(thread.name) + ".html")
         maybe_dl = await self.download(path, mtime=thread.mtime)
@@ -860,10 +851,7 @@ instance's greatest bottleneck.
 
         async with maybe_dl as (bar, sink):
             rendered = forum_thread_template(
-                thread.name,
-                forum_url,
-                thread.name_tag,
-                await self.internalize_images(thread.content_tag)
+                thread.name, forum_url, thread.name_tag, await self.internalize_images(thread.content_tag)
             )
             sink.file.write(rendered.encode("utf-8"))
             sink.done()
@@ -891,13 +879,13 @@ instance's greatest bottleneck.
             soup = await self._get_page(element.url)
             page = IliasPage(soup, element)
             if next := page.get_learning_module_data():
-                elements.extend(await self._crawl_learning_module_direction(
-                    cl.path, next.previous_url, "left", element
-                ))
+                elements.extend(
+                    await self._crawl_learning_module_direction(cl.path, next.previous_url, "left", element)
+                )
                 elements.append(next)
-                elements.extend(await self._crawl_learning_module_direction(
-                    cl.path, next.next_url, "right", element
-                ))
+                elements.extend(
+                    await self._crawl_learning_module_direction(cl.path, next.next_url, "right", element)
+                )
 
         # Reflect their natural ordering in the file names
         for index, lm_element in enumerate(elements):
@@ -907,9 +895,9 @@ instance's greatest bottleneck.
         for index, elem in enumerate(elements):
             prev_url = elements[index - 1].title if index > 0 else None
             next_url = elements[index + 1].title if index < len(elements) - 1 else None
-            tasks.append(asyncio.create_task(
-                self._download_learning_module_page(cl.path, elem, prev_url, next_url)
-            ))
+            tasks.append(
+                asyncio.create_task(self._download_learning_module_page(cl.path, elem, prev_url, next_url))
+            )
 
         # And execute them
         await self.gather(tasks)
@@ -919,7 +907,7 @@ instance's greatest bottleneck.
         path: PurePath,
         start_url: Optional[str],
         dir: Union[Literal["left"], Literal["right"]],
-        parent_element: IliasPageElement
+        parent_element: IliasPageElement,
     ) -> List[IliasLearningModulePage]:
         elements: List[IliasLearningModulePage] = []
 
@@ -950,7 +938,7 @@ instance's greatest bottleneck.
         parent_path: PurePath,
         element: IliasLearningModulePage,
         prev: Optional[str],
-        next: Optional[str]
+        next: Optional[str],
     ) -> None:
         path = parent_path / (_sanitize_path_name(element.title) + ".html")
         maybe_dl = await self.download(path)
@@ -1037,11 +1025,7 @@ instance's greatest bottleneck.
             )
         return soup
 
-    async def _post(
-        self,
-        url: str,
-        data: dict[str, Union[str, List[str]]]
-    ) -> bytes:
+    async def _post(self, url: str, data: dict[str, Union[str, List[str]]]) -> bytes:
         form_data = aiohttp.FormData()
         for key, val in data.items():
             form_data.add_field(key, val)
@@ -1090,8 +1074,8 @@ instance's greatest bottleneck.
             username, password = await self._auth.credentials()
 
             login_form_data = aiohttp.FormData()
-            login_form_data.add_field('login_form/input_3/input_4', username)
-            login_form_data.add_field('login_form/input_3/input_5', password)
+            login_form_data.add_field("login_form/input_3/input_4", username)
+            login_form_data.add_field("login_form/input_3/input_5", password)
 
             # do the actual login
             async with self.session.post(urljoin(self._base_url, login_url), data=login_form_data) as request:
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 5b88e8d..4abb350 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -42,15 +42,15 @@ class TypeMatcher:
             self.alt = alt
 
     class All:
-        matchers: list['IliasElementMatcher']
+        matchers: list["IliasElementMatcher"]
 
-        def __init__(self, matchers: list['IliasElementMatcher']):
+        def __init__(self, matchers: list["IliasElementMatcher"]):
             self.matchers = matchers
 
     class Any:
-        matchers: list['IliasElementMatcher']
+        matchers: list["IliasElementMatcher"]
 
-        def __init__(self, matchers: list['IliasElementMatcher']):
+        def __init__(self, matchers: list["IliasElementMatcher"]):
             self.matchers = matchers
 
     @staticmethod
@@ -70,11 +70,11 @@ class TypeMatcher:
         return TypeMatcher.ImgAlt(alt)
 
     @staticmethod
-    def all(*matchers: 'IliasElementMatcher') -> All:
+    def all(*matchers: "IliasElementMatcher") -> All:
         return TypeMatcher.All(list(matchers))
 
     @staticmethod
-    def any(*matchers: 'IliasElementMatcher') -> Any:
+    def any(*matchers: "IliasElementMatcher") -> Any:
         return TypeMatcher.Any(list(matchers))
 
     @staticmethod
@@ -127,20 +127,14 @@ class IliasElementType(Enum):
     def matcher(self) -> IliasElementMatcher:
         match self:
             case IliasElementType.BLOG:
-                return TypeMatcher.any(
-                    TypeMatcher.img_src("_blog.svg")
-                )
+                return TypeMatcher.any(TypeMatcher.img_src("_blog.svg"))
             case IliasElementType.BOOKING:
-                return TypeMatcher.any(
-                    TypeMatcher.path("/book/"),
-                    TypeMatcher.img_src("_book.svg")
-                )
+                return TypeMatcher.any(TypeMatcher.path("/book/"), TypeMatcher.img_src("_book.svg"))
             case IliasElementType.COURSE:
                 return TypeMatcher.any(TypeMatcher.path("/crs/"), TypeMatcher.img_src("_crsr.svg"))
             case IliasElementType.DCL_RECORD_LIST:
                 return TypeMatcher.any(
-                    TypeMatcher.img_src("_dcl.svg"),
-                    TypeMatcher.query("cmdclass=ildclrecordlistgui")
+                    TypeMatcher.img_src("_dcl.svg"), TypeMatcher.query("cmdclass=ildclrecordlistgui")
                 )
             case IliasElementType.EXERCISE:
                 return TypeMatcher.never()
@@ -162,14 +156,11 @@ class IliasElementType(Enum):
                 return TypeMatcher.any(
                     TypeMatcher.path("/fold/"),
                     TypeMatcher.img_src("_fold.svg"),
-
                     TypeMatcher.path("/grp/"),
                     TypeMatcher.img_src("_grp.svg"),
-
                     TypeMatcher.path("/copa/"),
                     TypeMatcher.path("_copa_"),
                     TypeMatcher.img_src("_copa.svg"),
-
                     # Not supported right now but warn users
                     # TypeMatcher.query("baseclass=ilmediapoolpresentationgui"),
                     # TypeMatcher.img_alt("medienpool"),
@@ -188,14 +179,10 @@ class IliasElementType(Enum):
             case IliasElementType.LITERATURE_LIST:
                 return TypeMatcher.img_src("_bibl.svg")
             case IliasElementType.LEARNING_MODULE:
-                return TypeMatcher.any(
-                    TypeMatcher.path("/lm/"),
-                    TypeMatcher.img_src("_lm.svg")
-                )
+                return TypeMatcher.any(TypeMatcher.path("/lm/"), TypeMatcher.img_src("_lm.svg"))
             case IliasElementType.LEARNING_MODULE_HTML:
                 return TypeMatcher.any(
-                    TypeMatcher.query("baseclass=ilhtlmpresentationgui"),
-                    TypeMatcher.img_src("_htlm.svg")
+                    TypeMatcher.query("baseclass=ilhtlmpresentationgui"), TypeMatcher.img_src("_htlm.svg")
                 )
             case IliasElementType.LINK:
                 return TypeMatcher.any(
@@ -203,17 +190,16 @@ class IliasElementType(Enum):
                         TypeMatcher.query("baseclass=illinkresourcehandlergui"),
                         TypeMatcher.query("calldirectlink"),
                     ),
-                    TypeMatcher.img_src("_webr.svg")  # duplicated :(
+                    TypeMatcher.img_src("_webr.svg"),  # duplicated :(
                 )
             case IliasElementType.LINK_COLLECTION:
                 return TypeMatcher.any(
                     TypeMatcher.query("baseclass=illinkresourcehandlergui"),
-                    TypeMatcher.img_src("_webr.svg")  # duplicated :(
+                    TypeMatcher.img_src("_webr.svg"),  # duplicated :(
                 )
             case IliasElementType.MEDIA_POOL:
                 return TypeMatcher.any(
-                    TypeMatcher.query("baseclass=ilmediapoolpresentationgui"),
-                    TypeMatcher.img_src("_mep.svg")
+                    TypeMatcher.query("baseclass=ilmediapoolpresentationgui"), TypeMatcher.img_src("_mep.svg")
                 )
             case IliasElementType.MEDIACAST_VIDEO:
                 return TypeMatcher.never()
@@ -221,12 +207,10 @@ class IliasElementType(Enum):
                 return TypeMatcher.any(
                     TypeMatcher.path("/mcst/"),
                     TypeMatcher.query("baseclass=ilmediacasthandlergui"),
-                    TypeMatcher.img_src("_mcst.svg")
+                    TypeMatcher.img_src("_mcst.svg"),
                 )
             case IliasElementType.MEETING:
-                return TypeMatcher.any(
-                    TypeMatcher.img_src("_sess.svg")
-                )
+                return TypeMatcher.any(TypeMatcher.img_src("_sess.svg"))
             case IliasElementType.MOB_VIDEO:
                 return TypeMatcher.never()
             case IliasElementType.OPENCAST_VIDEO:
@@ -239,24 +223,19 @@ class IliasElementType(Enum):
                 return TypeMatcher.never()
             case IliasElementType.SCORM_LEARNING_MODULE:
                 return TypeMatcher.any(
-                    TypeMatcher.query("baseclass=ilsahspresentationgui"),
-                    TypeMatcher.img_src("_sahs.svg")
+                    TypeMatcher.query("baseclass=ilsahspresentationgui"), TypeMatcher.img_src("_sahs.svg")
                 )
             case IliasElementType.SURVEY:
-                return TypeMatcher.any(
-                    TypeMatcher.path("/svy/"),
-                    TypeMatcher.img_src("svy.svg")
-                )
+                return TypeMatcher.any(TypeMatcher.path("/svy/"), TypeMatcher.img_src("svy.svg"))
             case IliasElementType.TEST:
                 return TypeMatcher.any(
                     TypeMatcher.query("cmdclass=ilobjtestgui"),
                     TypeMatcher.query("cmdclass=iltestscreengui"),
-                    TypeMatcher.img_src("_tst.svg")
+                    TypeMatcher.img_src("_tst.svg"),
                 )
             case IliasElementType.WIKI:
                 return TypeMatcher.any(
-                    TypeMatcher.query("baseClass=ilwikihandlergui"),
-                    TypeMatcher.img_src("wiki.svg")
+                    TypeMatcher.query("baseClass=ilwikihandlergui"), TypeMatcher.img_src("wiki.svg")
                 )
 
         raise CrawlWarning(f"Unknown matcher {self}")
@@ -291,7 +270,7 @@ class IliasPageElement:
             r"thr_pk=(?P<id>\d+)",  # forums
             r"ref_id=(?P<id>\d+)",
             r"target=[a-z]+_(?P<id>\d+)",
-            r"mm_(?P<id>\d+)"
+            r"mm_(?P<id>\d+)",
         ]
 
         for regex in regexes:
@@ -309,8 +288,8 @@ class IliasPageElement:
         name: str,
         mtime: Optional[datetime] = None,
         description: Optional[str] = None,
-        skip_sanitize: bool = False
-    ) -> 'IliasPageElement':
+        skip_sanitize: bool = False,
+    ) -> "IliasPageElement":
         if typ == IliasElementType.MEETING:
             normalized = IliasPageElement._normalize_meeting_name(name)
             log.explain(f"Normalized meeting name from {name!r} to {normalized!r}")
@@ -382,7 +361,6 @@ class IliasSoup:
 
 
 class IliasPage:
-
     def __init__(self, ilias_soup: IliasSoup, source_element: Optional[IliasPageElement]):
         self._ilias_soup = ilias_soup
         self._soup = ilias_soup.soup
@@ -422,23 +400,26 @@ class IliasPage:
         return self._find_normal_entries()
 
     def get_info_tab(self) -> Optional[IliasPageElement]:
-        tab: Optional[Tag] = cast(Optional[Tag], self._soup.find(
-            name="a",
-            attrs={"href": lambda x: x is not None and "cmdClass=ilinfoscreengui" in x}
-        ))
+        tab: Optional[Tag] = cast(
+            Optional[Tag],
+            self._soup.find(
+                name="a", attrs={"href": lambda x: x is not None and "cmdClass=ilinfoscreengui" in x}
+            ),
+        )
         if tab is not None:
             return IliasPageElement.create_new(
-                IliasElementType.INFO_TAB,
-                self._abs_url_from_link(tab),
-                "infos"
+                IliasElementType.INFO_TAB, self._abs_url_from_link(tab), "infos"
             )
         return None
 
     def get_description(self) -> Optional[BeautifulSoup]:
         def is_interesting_class(name: str | None) -> bool:
             return name in [
-                "ilCOPageSection", "ilc_Paragraph", "ilc_va_ihcap_VAccordIHeadCap",
-                "ilc_va_ihcap_AccordIHeadCap", "ilc_media_cont_MediaContainer"
+                "ilCOPageSection",
+                "ilc_Paragraph",
+                "ilc_va_ihcap_VAccordIHeadCap",
+                "ilc_va_ihcap_AccordIHeadCap",
+                "ilc_media_cont_MediaContainer",
             ]
 
         paragraphs: list[Tag] = cast(list[Tag], self._soup.find_all(class_=is_interesting_class))
@@ -457,7 +438,7 @@ class IliasPage:
                 if video := p.select_one("video"):
                     url, title = self._find_mob_video_url_title(video, p)
                     raw_html += '<div style="min-width: 100px; min-height: 100px; border: 1px solid black;'
-                    raw_html += 'display: flex; justify-content: center; align-items: center;'
+                    raw_html += "display: flex; justify-content: center; align-items: center;"
                     raw_html += ' margin: 0.5rem;">'
                     if url is not None and urlparse(url).hostname != urlparse(self._page_url).hostname:
                         if url.startswith("//"):
@@ -486,7 +467,7 @@ class IliasPage:
             title=title,
             content=content,
             next_url=self._find_learning_module_next(),
-            previous_url=self._find_learning_module_prev()
+            previous_url=self._find_learning_module_prev(),
         )
 
     def _find_learning_module_next(self) -> Optional[str]:
@@ -517,7 +498,7 @@ class IliasPage:
 
         rtoken_form = cast(
             Optional[Tag],
-            self._soup.find("form", attrs={"action": lambda x: x is not None and "rtoken=" in x})
+            self._soup.find("form", attrs={"action": lambda x: x is not None and "rtoken=" in x}),
         )
         if not rtoken_form:
             log.explain("Found no rtoken anywhere")
@@ -557,9 +538,7 @@ class IliasPage:
             return True
 
         # Raw listing without ILIAS fluff
-        video_element_table = self._soup.find(
-            name="table", id=re.compile(r"tbl_xoct_.+")
-        )
+        video_element_table = self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
         return video_element_table is not None
 
     def _is_ilias_opencast_embedding(self) -> bool:
@@ -600,24 +579,28 @@ class IliasPage:
         return self._uncollapse_future_meetings_url() is not None
 
     def _uncollapse_future_meetings_url(self) -> Optional[IliasPageElement]:
-        element = cast(Optional[Tag], self._soup.find(
-            "a",
-            attrs={"href": lambda x: x is not None and ("crs_next_sess=1" in x or "crs_prev_sess=1" in x)}
-        ))
+        element = cast(
+            Optional[Tag],
+            self._soup.find(
+                "a",
+                attrs={
+                    "href": lambda x: x is not None and ("crs_next_sess=1" in x or "crs_prev_sess=1" in x)
+                },
+            ),
+        )
         if not element:
             return None
         link = self._abs_url_from_link(element)
         return IliasPageElement.create_new(IliasElementType.FOLDER, link, "show all meetings")
 
     def _is_exercise_not_all_shown(self) -> bool:
-        return (self._page_type == IliasElementType.EXERCISE_OVERVIEW
-                and "mode=all" not in self._page_url.lower())
+        return (
+            self._page_type == IliasElementType.EXERCISE_OVERVIEW and "mode=all" not in self._page_url.lower()
+        )
 
     def _show_all_exercises(self) -> Optional[IliasPageElement]:
         return IliasPageElement.create_new(
-            IliasElementType.EXERCISE_OVERVIEW,
-            self._page_url + "&mode=all",
-            "show all exercises"
+            IliasElementType.EXERCISE_OVERVIEW, self._page_url + "&mode=all", "show all exercises"
         )
 
     def _is_content_tab_selected(self) -> bool:
@@ -631,10 +614,12 @@ class IliasPage:
         return "baseClass=ilmembershipoverviewgui" in self._page_url
 
     def _select_content_page_url(self) -> Optional[IliasPageElement]:
-        tab = cast(Optional[Tag], self._soup.find(
-            id="tab_view_content",
-            attrs={"class": lambda x: x is not None and "active" not in x}
-        ))
+        tab = cast(
+            Optional[Tag],
+            self._soup.find(
+                id="tab_view_content", attrs={"class": lambda x: x is not None and "active" not in x}
+            ),
+        )
         # Already selected (or not found)
         if not tab:
             return None
@@ -654,9 +639,7 @@ class IliasPage:
         # on the page, but defined in a JS object inside a script tag, passed to the player
         # library.
         # We do the impossible and RegEx the stream JSON object out of the page's HTML source
-        regex = re.compile(
-            r"({\"streams\"[\s\S]+?),\s*{\"paella_config_file", re.IGNORECASE
-        )
+        regex = re.compile(r"({\"streams\"[\s\S]+?),\s*{\"paella_config_file", re.IGNORECASE)
         json_match = regex.search(str(self._soup))
 
         if json_match is None:
@@ -687,10 +670,12 @@ class IliasPage:
     def _get_show_max_forum_entries_per_page_url(
         self, wanted_max: Optional[int] = None
     ) -> Optional[IliasPageElement]:
-        correct_link = cast(Optional[Tag], self._soup.find(
-            "a",
-            attrs={"href": lambda x: x is not None and "trows=800" in x and "cmd=showThreads" in x}
-        ))
+        correct_link = cast(
+            Optional[Tag],
+            self._soup.find(
+                "a", attrs={"href": lambda x: x is not None and "trows=800" in x and "cmd=showThreads" in x}
+            ),
+        )
 
         if not correct_link:
             return None
@@ -775,11 +760,11 @@ class IliasPage:
                 continue
             if "cmd=sendfile" not in link["href"]:
                 continue
-            items.append(IliasPageElement.create_new(
-                IliasElementType.FILE,
-                self._abs_url_from_link(link),
-                _sanitize_path_name(link.get_text())
-            ))
+            items.append(
+                IliasPageElement.create_new(
+                    IliasElementType.FILE, self._abs_url_from_link(link), _sanitize_path_name(link.get_text())
+                )
+            )
 
         return items
 
@@ -791,9 +776,9 @@ class IliasPage:
         #
         # We need to figure out where we are.
 
-        video_element_table = cast(Optional[Tag], self._soup.find(
-            name="table", id=re.compile(r"tbl_xoct_.+")
-        ))
+        video_element_table = cast(
+            Optional[Tag], self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
+        )
 
         if video_element_table is None:
             # We are in stage 1
@@ -829,8 +814,7 @@ class IliasPage:
 
         table_id = id_match.group(1)
 
-        query_params = {f"tbl_xoct_{table_id}_trows": "800",
-                        "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
+        query_params = {f"tbl_xoct_{table_id}_trows": "800", "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
         url = url_set_query_params(self._page_url, query_params)
 
         log.explain("Disabled pagination, retrying folder as a new entry")
@@ -841,9 +825,9 @@ class IliasPage:
         Crawls the "second stage" video page. This page contains the actual video urls.
         """
         # Video start links are marked with an "Abspielen" link
-        video_links = cast(list[Tag], self._soup.find_all(
-            name="a", text=re.compile(r"\s*(Abspielen|Play)\s*")
-        ))
+        video_links = cast(
+            list[Tag], self._soup.find_all(name="a", text=re.compile(r"\s*(Abspielen|Play)\s*"))
+        )
 
         results: list[IliasPageElement] = []
 
@@ -857,12 +841,12 @@ class IliasPage:
         # 6th or 7th child (1 indexed) is the modification time string. Try to find it
         # by parsing backwards from the end and finding something that looks like a date
         modification_time = None
-        row: Tag = link.parent.parent.parent  # type: ignore
+        row: Tag = link.parent.parent.parent
         column_count = len(row.select("td.std"))
         for index in range(column_count, 0, -1):
-            modification_string = link.parent.parent.parent.select_one(  # type: ignore
-                f"td.std:nth-child({index})"
-            ).get_text().strip()
+            modification_string = (
+                link.parent.parent.parent.select_one(f"td.std:nth-child({index})").get_text().strip()
+            )
             if match := re.search(r"\d+\.\d+.\d+ \d+:\d+", modification_string):
                 modification_time = datetime.strptime(match.group(0), "%d.%m.%Y %H:%M")
                 break
@@ -871,7 +855,7 @@ class IliasPage:
             log.warn(f"Could not determine upload time for {link}")
             modification_time = datetime.now()
 
-        title = link.parent.parent.parent.select_one("td.std:nth-child(3)").get_text().strip()  # type: ignore
+        title = link.parent.parent.parent.select_one("td.std:nth-child(3)").get_text().strip()
         title += ".mp4"
 
         video_name: str = _sanitize_path_name(title)
@@ -900,25 +884,29 @@ class IliasPage:
         results: list[IliasPageElement] = []
 
         if link := cast(Optional[Tag], self._soup.select_one("#tab_submission > a")):
-            results.append(IliasPageElement.create_new(
-                IliasElementType.EXERCISE_FILES,
-                self._abs_url_from_link(link),
-                "Submission"
-            ))
+            results.append(
+                IliasPageElement.create_new(
+                    IliasElementType.EXERCISE_FILES, self._abs_url_from_link(link), "Submission"
+                )
+            )
         else:
             log.explain("Found no submission link for exercise, maybe it has not started yet?")
 
         # Find all download links in the container (this will contain all the *feedback* files)
-        download_links = cast(list[Tag], self._soup.find_all(
-            name="a",
-            # download links contain the given command class
-            attrs={"href": lambda x: x is not None and "cmd=download" in x},
-            text="Download"
-        ))
+        download_links = cast(
+            list[Tag],
+            self._soup.find_all(
+                name="a",
+                # download links contain the given command class
+                attrs={"href": lambda x: x is not None and "cmd=download" in x},
+                text="Download",
+            ),
+        )
 
         for link in download_links:
-            parent_row: Tag = cast(Tag, link.find_parent(
-                attrs={"class": lambda x: x is not None and "row" in x}))
+            parent_row: Tag = cast(
+                Tag, link.find_parent(attrs={"class": lambda x: x is not None and "row" in x})
+            )
             name_tag = cast(Optional[Tag], parent_row.find(name="div"))
 
             if not name_tag:
@@ -929,11 +917,9 @@ class IliasPage:
             name = _sanitize_path_name(name_tag.get_text().strip())
             log.explain(f"Found exercise detail entry {name!r}")
 
-            results.append(IliasPageElement.create_new(
-                IliasElementType.FILE,
-                self._abs_url_from_link(link),
-                name
-            ))
+            results.append(
+                IliasPageElement.create_new(IliasElementType.FILE, self._abs_url_from_link(link), name)
+            )
 
         return results
 
@@ -941,12 +927,15 @@ class IliasPage:
         results: list[IliasPageElement] = []
 
         # Find all download links in the container
-        download_links = cast(list[Tag], self._soup.find_all(
-            name="a",
-            # download links contain the given command class
-            attrs={"href": lambda x: x is not None and "cmd=download" in x},
-            text="Download"
-        ))
+        download_links = cast(
+            list[Tag],
+            self._soup.find_all(
+                name="a",
+                # download links contain the given command class
+                attrs={"href": lambda x: x is not None and "cmd=download" in x},
+                text="Download",
+            ),
+        )
 
         for link in download_links:
             parent_row: Tag = cast(Tag, link.find_parent("tr"))
@@ -963,12 +952,9 @@ class IliasPage:
             if date is None:
                 log.warn(f"Date parsing failed for exercise file entry {name!r}")
 
-            results.append(IliasPageElement.create_new(
-                IliasElementType.FILE,
-                self._abs_url_from_link(link),
-                name,
-                date
-            ))
+            results.append(
+                IliasPageElement.create_new(IliasElementType.FILE, self._abs_url_from_link(link), name, date)
+            )
 
         return results
 
@@ -993,11 +979,11 @@ class IliasPage:
                 continue
 
             name = _sanitize_path_name(exercise.get_text().strip())
-            results.append(IliasPageElement.create_new(
-                IliasElementType.EXERCISE,
-                self._abs_url_from_link(exercise),
-                name
-            ))
+            results.append(
+                IliasPageElement.create_new(
+                    IliasElementType.EXERCISE, self._abs_url_from_link(exercise), name
+                )
+            )
 
         for result in results:
             log.explain(f"Found exercise {result.name!r}")
@@ -1043,13 +1029,11 @@ class IliasPage:
                 continue
 
             log.explain(f"Found {element_name!r} of type {element_type}")
-            result.append(IliasPageElement.create_new(
-                element_type,
-                abs_url,
-                element_name,
-                description=description,
-                skip_sanitize=True
-            ))
+            result.append(
+                IliasPageElement.create_new(
+                    element_type, abs_url, element_name, description=description, skip_sanitize=True
+                )
+            )
 
         result += self._find_cards()
         result += self._find_mediacast_videos()
@@ -1086,11 +1070,13 @@ class IliasPage:
                     if not title.endswith(".mp4") and not title.endswith(".webm"):
                         # just to make sure it has some kinda-alrightish ending
                         title = title + ".mp4"
-                    videos.append(IliasPageElement.create_new(
-                        typ=IliasElementType.MEDIACAST_VIDEO,
-                        url=self._abs_url_from_relative(cast(str, url)),
-                        name=_sanitize_path_name(title)
-                    ))
+                    videos.append(
+                        IliasPageElement.create_new(
+                            typ=IliasElementType.MEDIACAST_VIDEO,
+                            url=self._abs_url_from_relative(cast(str, url)),
+                            name=_sanitize_path_name(title),
+                        )
+                    )
 
         return videos
 
@@ -1114,12 +1100,11 @@ class IliasPage:
                 log.explain(f"Found external video at {url}, ignoring")
                 continue
 
-            videos.append(IliasPageElement.create_new(
-                typ=IliasElementType.MOB_VIDEO,
-                url=url,
-                name=_sanitize_path_name(title),
-                mtime=None
-            ))
+            videos.append(
+                IliasPageElement.create_new(
+                    typ=IliasElementType.MOB_VIDEO, url=url, name=_sanitize_path_name(title), mtime=None
+                )
+            )
 
         return videos
 
@@ -1161,11 +1146,11 @@ class IliasPage:
 
             # We should not crawl files under meetings
             if "ilContainerListItemContentCB" in cast(str, parent.get("class")):
-                link: Tag = parent.parent.find("a")  # type: ignore
+                link: Tag = parent.parent.find("a")
                 typ = IliasPage._find_type_for_element(
                     "meeting",
                     self._abs_url_from_link(link),
-                    lambda: IliasPage._find_icon_for_folder_entry(link)
+                    lambda: IliasPage._find_icon_for_folder_entry(link),
                 )
                 return typ == IliasElementType.MEETING
 
@@ -1191,9 +1176,11 @@ class IliasPage:
 
             # This is for these weird JS-y blocks and custom item groups
             if "ilContainerItemsContainer" in cast(str, parent.get("class")):
-                data_store_url = parent.parent.get("data-store-url", "").lower()  # type: ignore
-                is_custom_item_group = "baseclass=ilcontainerblockpropertiesstoragegui" in data_store_url \
-                                       and "cont_block_id=" in data_store_url
+                data_store_url = parent.parent.get("data-store-url", "").lower()
+                is_custom_item_group = (
+                    "baseclass=ilcontainerblockpropertiesstoragegui" in data_store_url
+                    and "cont_block_id=" in data_store_url
+                )
                 # I am currently under the impression that *only* those JS blocks have an
                 # ilNoDisplay class.
                 if not is_custom_item_group and "ilNoDisplay" not in cast(str, parent.get("class")):
@@ -1212,11 +1199,15 @@ class IliasPage:
 
         if outer_accordion_content:
             accordion_tag = cast(Tag, outer_accordion_content.parent)
-            head_tag = cast(Tag, accordion_tag.find(attrs={
-                "class": lambda x: x is not None and (
-                    "ilc_va_ihead_VAccordIHead" in x or "ilc_va_ihead_AccordIHead" in x
-                )
-            }))
+            head_tag = cast(
+                Tag,
+                accordion_tag.find(
+                    attrs={
+                        "class": lambda x: x is not None
+                        and ("ilc_va_ihead_VAccordIHead" in x or "ilc_va_ihead_AccordIHead" in x)
+                    }
+                ),
+            )
             found_titles.append(head_tag.get_text().strip())
 
         return [_sanitize_path_name(x) for x in reversed(found_titles)]
@@ -1224,14 +1215,12 @@ class IliasPage:
     @staticmethod
     def _find_link_description(link: Tag) -> Optional[str]:
         tile = cast(
-            Tag,
-            link.find_parent("div", {"class": lambda x: x is not None and "il_ContainerListItem" in x})
+            Tag, link.find_parent("div", {"class": lambda x: x is not None and "il_ContainerListItem" in x})
         )
         if not tile:
             return None
         description_element = cast(
-            Tag,
-            tile.find("div", {"class": lambda x: x is not None and "il_Description" in x})
+            Tag, tile.find("div", {"class": lambda x: x is not None and "il_Description" in x})
         )
         if not description_element:
             return None
@@ -1242,9 +1231,15 @@ class IliasPage:
         # Files have a list of properties (type, modification date, size, etc.)
         # In a series of divs.
         # Find the parent containing all those divs, so we can filter our what we need
-        properties_parent = cast(Tag, cast(Tag, link_element.find_parent(
-            "div", {"class": lambda x: x is not None and "il_ContainerListItem" in x}
-        )).select_one(".il_ItemProperties"))
+        properties_parent = cast(
+            Tag,
+            cast(
+                Tag,
+                link_element.find_parent(
+                    "div", {"class": lambda x: x is not None and "il_ContainerListItem" in x}
+                ),
+            ).select_one(".il_ItemProperties"),
+        )
         # The first one is always the filetype
         file_type = cast(Tag, properties_parent.select_one("span.il_ItemProperty")).get_text().strip()
 
@@ -1271,9 +1266,7 @@ class IliasPage:
         for title in card_titles:
             url = self._abs_url_from_link(title)
             name = _sanitize_path_name(title.get_text().strip())
-            typ = IliasPage._find_type_for_element(
-                name, url, lambda: IliasPage._find_icon_from_card(title)
-            )
+            typ = IliasPage._find_type_for_element(name, url, lambda: IliasPage._find_icon_from_card(title))
 
             if not typ:
                 _unexpected_html_warning()
@@ -1300,13 +1293,14 @@ class IliasPage:
                 continue
             url = self._abs_url_from_relative(open_match.group(1))
             name = _sanitize_path_name(button.get_text().strip())
-            typ = IliasPage._find_type_for_element(
-                name, url, lambda: IliasPage._find_icon_from_card(button)
+            typ = IliasPage._find_type_for_element(name, url, lambda: IliasPage._find_icon_from_card(button))
+            caption_parent = cast(
+                Tag,
+                button.find_parent(
+                    "div",
+                    attrs={"class": lambda x: x is not None and "caption" in x},
+                ),
             )
-            caption_parent = cast(Tag, button.find_parent(
-                "div",
-                attrs={"class": lambda x: x is not None and "caption" in x},
-            ))
             caption_container = caption_parent.find_next_sibling("div")
             if caption_container:
                 description = caption_container.get_text().strip()
@@ -1377,9 +1371,7 @@ class IliasPage:
 
         if found_parent is None:
             _unexpected_html_warning()
-            log.warn_contd(
-                f"Tried to figure out element type, but did not find an icon for {link_element!r}"
-            )
+            log.warn_contd(f"Tried to figure out element type, but did not find an icon for {link_element!r}")
             return None
 
         # Find the small descriptive icon to figure out the type
@@ -1389,8 +1381,7 @@ class IliasPage:
             img_tag = found_parent.select_one("img.icon")
 
         is_session_expansion_button = found_parent.find(
-            "a",
-            attrs={"href": lambda x: x is not None and ("crs_next_sess=" in x or "crs_prev_sess=" in x)}
+            "a", attrs={"href": lambda x: x is not None and ("crs_next_sess=" in x or "crs_prev_sess=" in x)}
         )
         if img_tag is None and is_session_expansion_button:
             log.explain("Found session expansion button, skipping it as it has no content")
@@ -1447,9 +1438,7 @@ class IliasPage:
         # Video listing embeds do not have complete ILIAS html. Try to match them by
         # their video listing table
         video_table = soup.find(
-            recursive=True,
-            name="table",
-            attrs={"id": lambda x: x is not None and x.startswith("tbl_xoct")}
+            recursive=True, name="table", attrs={"id": lambda x: x is not None and x.startswith("tbl_xoct")}
         )
         if video_table is not None:
             return True
@@ -1462,8 +1451,7 @@ class IliasPage:
     @staticmethod
     def _find_date_in_text(text: str) -> Optional[datetime]:
         modification_date_match = re.search(
-            r"(((\d+\. \w+ \d+)|(Gestern|Yesterday)|(Heute|Today)|(Morgen|Tomorrow)), \d+:\d+)",
-            text
+            r"(((\d+\. \w+ \d+)|(Gestern|Yesterday)|(Heute|Today)|(Morgen|Tomorrow)), \d+:\d+)", text
         )
         if modification_date_match is not None:
             modification_date_str = modification_date_match.group(1)
@@ -1501,8 +1489,8 @@ def _unexpected_html_warning() -> None:
     log.warn("Encountered unexpected HTML structure, ignoring element.")
 
 
-german_months = ['Jan', 'Feb', 'Mär', 'Apr', 'Mai', 'Jun', 'Jul', 'Aug', 'Sep', 'Okt', 'Nov', 'Dez']
-english_months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
+german_months = ["Jan", "Feb", "Mär", "Apr", "Mai", "Jun", "Jul", "Aug", "Sep", "Okt", "Nov", "Dez"]
+english_months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
 
 
 def demangle_date(date_str: str, fail_silently: bool = False) -> Optional[datetime]:
@@ -1579,7 +1567,7 @@ def parse_ilias_forum_export(forum_export: BeautifulSoup) -> list[IliasForumThre
 
         title = cast(Tag, p.find("b")).text
         if ":" in title:
-            title = title[title.find(":") + 1:]
+            title = title[title.find(":") + 1 :]
         title = title.strip()
 
         if not content_tag or content_tag.find_previous_sibling("p") != title_tag:
@@ -1604,7 +1592,7 @@ def _guess_timestamp_from_forum_post_content(content: Tag) -> Optional[datetime]
 
     for post in posts:
         text = post.text.strip()
-        text = text[text.rfind("|") + 1:]
+        text = text[text.rfind("|") + 1 :]
         date = demangle_date(text, fail_silently=True)
         if not date:
             continue
diff --git a/PFERD/crawl/ilias/shibboleth_login.py b/PFERD/crawl/ilias/shibboleth_login.py
index 7e725f0..bdff4ea 100644
--- a/PFERD/crawl/ilias/shibboleth_login.py
+++ b/PFERD/crawl/ilias/shibboleth_login.py
@@ -38,9 +38,7 @@ class ShibbolethLogin:
         async with sess.get(url) as response:
             shib_url = response.url
             if str(shib_url).startswith(self._ilias_url):
-                log.explain(
-                    "ILIAS recognized our shib token and logged us in in the background, returning"
-                )
+                log.explain("ILIAS recognized our shib token and logged us in in the background, returning")
                 return
             soup: BeautifulSoup = soupify(await response.read())
 
@@ -62,7 +60,7 @@ class ShibbolethLogin:
                 "fudis_web_authn_assertion_input": "",
             }
             if csrf_token_input := form.find("input", {"name": "csrf_token"}):
-                data["csrf_token"] = csrf_token_input["value"]  # type: ignore
+                data["csrf_token"] = csrf_token_input["value"]
             soup = await _post(sess, url, data)
 
             if soup.find(id="attributeRelease"):
@@ -81,7 +79,7 @@ class ShibbolethLogin:
         # (or clicking "Continue" if you have JS disabled)
         relay_state = cast(Tag, soup.find("input", {"name": "RelayState"}))
         saml_response = cast(Tag, soup.find("input", {"name": "SAMLResponse"}))
-        url = form = soup.find("form", {"method": "post"})["action"]  # type: ignore
+        url = form = soup.find("form", {"method": "post"})["action"]
         data = {  # using the info obtained in the while loop above
             "RelayState": cast(str, relay_state["value"]),
             "SAMLResponse": cast(str, saml_response["value"]),
@@ -110,7 +108,7 @@ class ShibbolethLogin:
             "fudis_otp_input": tfa_token,
         }
         if csrf_token_input := form.find("input", {"name": "csrf_token"}):
-            data["csrf_token"] = csrf_token_input["value"]  # type: ignore
+            data["csrf_token"] = csrf_token_input["value"]
         return await _post(session, url, data)
 
     @staticmethod
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index 21d9dec..f47c969 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -53,12 +53,11 @@ class KitIpdFolder:
 
 
 class KitIpdCrawler(HttpCrawler):
-
     def __init__(
-            self,
-            name: str,
-            section: KitIpdCrawlerSection,
-            config: Config,
+        self,
+        name: str,
+        section: KitIpdCrawlerSection,
+        config: Config,
     ):
         super().__init__(name, section, config)
         self._url = section.target()
@@ -104,11 +103,7 @@ class KitIpdCrawler(HttpCrawler):
         await self.gather(tasks)
 
     async def _download_file(
-        self,
-        parent: PurePath,
-        file: KitIpdFile,
-        etag: Optional[str],
-        mtime: Optional[datetime]
+        self, parent: PurePath, file: KitIpdFile, etag: Optional[str], mtime: Optional[datetime]
     ) -> None:
         element_path = parent / file.name
 
diff --git a/PFERD/crawl/local_crawler.py b/PFERD/crawl/local_crawler.py
index f102bc9..dfc6f65 100644
--- a/PFERD/crawl/local_crawler.py
+++ b/PFERD/crawl/local_crawler.py
@@ -18,31 +18,28 @@ class LocalCrawlerSection(CrawlerSection):
     def crawl_delay(self) -> float:
         value = self.s.getfloat("crawl_delay", fallback=0.0)
         if value < 0:
-            self.invalid_value("crawl_delay", value,
-                               "Must not be negative")
+            self.invalid_value("crawl_delay", value, "Must not be negative")
         return value
 
     def download_delay(self) -> float:
         value = self.s.getfloat("download_delay", fallback=0.0)
         if value < 0:
-            self.invalid_value("download_delay", value,
-                               "Must not be negative")
+            self.invalid_value("download_delay", value, "Must not be negative")
         return value
 
     def download_speed(self) -> Optional[int]:
         value = self.s.getint("download_speed")
         if value is not None and value <= 0:
-            self.invalid_value("download_speed", value,
-                               "Must be greater than 0")
+            self.invalid_value("download_speed", value, "Must be greater than 0")
         return value
 
 
 class LocalCrawler(Crawler):
     def __init__(
-            self,
-            name: str,
-            section: LocalCrawlerSection,
-            config: Config,
+        self,
+        name: str,
+        section: LocalCrawlerSection,
+        config: Config,
     ):
         super().__init__(name, section, config)
 
@@ -74,10 +71,12 @@ class LocalCrawler(Crawler):
         tasks = []
 
         async with cl:
-            await asyncio.sleep(random.uniform(
-                0.5 * self._crawl_delay,
-                self._crawl_delay,
-            ))
+            await asyncio.sleep(
+                random.uniform(
+                    0.5 * self._crawl_delay,
+                    self._crawl_delay,
+                )
+            )
 
             for child in path.iterdir():
                 pure_child = cl.path / child.name
@@ -93,10 +92,12 @@ class LocalCrawler(Crawler):
             return
 
         async with dl as (bar, sink):
-            await asyncio.sleep(random.uniform(
-                0.5 * self._download_delay,
-                self._download_delay,
-            ))
+            await asyncio.sleep(
+                random.uniform(
+                    0.5 * self._download_delay,
+                    self._download_delay,
+                )
+            )
 
             bar.set_total(stat.st_size)
 
diff --git a/PFERD/deduplicator.py b/PFERD/deduplicator.py
index 559addb..c204726 100644
--- a/PFERD/deduplicator.py
+++ b/PFERD/deduplicator.py
@@ -16,9 +16,28 @@ def name_variants(path: PurePath) -> Iterator[PurePath]:
 class Deduplicator:
     FORBIDDEN_CHARS = '<>:"/\\|?*' + "".join([chr(i) for i in range(0, 32)])
     FORBIDDEN_NAMES = {
-        "CON", "PRN", "AUX", "NUL",
-        "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9",
-        "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
+        "CON",
+        "PRN",
+        "AUX",
+        "NUL",
+        "COM1",
+        "COM2",
+        "COM3",
+        "COM4",
+        "COM5",
+        "COM6",
+        "COM7",
+        "COM8",
+        "COM9",
+        "LPT1",
+        "LPT2",
+        "LPT3",
+        "LPT4",
+        "LPT5",
+        "LPT6",
+        "LPT7",
+        "LPT8",
+        "LPT9",
     }
 
     def __init__(self, windows_paths: bool) -> None:
diff --git a/PFERD/limiter.py b/PFERD/limiter.py
index 3122a7a..49de0ed 100644
--- a/PFERD/limiter.py
+++ b/PFERD/limiter.py
@@ -12,12 +12,7 @@ class Slot:
 
 
 class Limiter:
-    def __init__(
-            self,
-            task_limit: int,
-            download_limit: int,
-            task_delay: float
-    ):
+    def __init__(self, task_limit: int, download_limit: int, task_delay: float):
         if task_limit <= 0:
             raise ValueError("task limit must be at least 1")
         if download_limit <= 0:
diff --git a/PFERD/logging.py b/PFERD/logging.py
index c19e4a0..e371494 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -8,8 +8,15 @@ from rich.console import Console, Group
 from rich.live import Live
 from rich.markup import escape
 from rich.panel import Panel
-from rich.progress import (BarColumn, DownloadColumn, Progress, TaskID, TextColumn, TimeRemainingColumn,
-                           TransferSpeedColumn)
+from rich.progress import (
+    BarColumn,
+    DownloadColumn,
+    Progress,
+    TaskID,
+    TextColumn,
+    TimeRemainingColumn,
+    TransferSpeedColumn,
+)
 from rich.table import Column
 
 
@@ -176,10 +183,14 @@ class Log:
         # Our print function doesn't take types other than strings, but the
         # underlying rich.print function does. This call is a special case
         # anyways, and we're calling it internally, so this should be fine.
-        self.print(Panel.fit("""
+        self.print(
+            Panel.fit(
+                """
 Please copy your program output and send it to the PFERD maintainers, either
 directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
-        """.strip()))  # type: ignore
+        """.strip()
+            )
+        )
 
     def explain_topic(self, text: str) -> None:
         """
@@ -236,10 +247,10 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
 
     @contextmanager
     def _bar(
-            self,
-            progress: Progress,
-            description: str,
-            total: Optional[float],
+        self,
+        progress: Progress,
+        description: str,
+        total: Optional[float],
     ) -> Iterator[ProgressBar]:
         if total is None:
             # Indeterminate progress bar
@@ -255,11 +266,11 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
             self._update_live()
 
     def crawl_bar(
-            self,
-            style: str,
-            action: str,
-            text: str,
-            total: Optional[float] = None,
+        self,
+        style: str,
+        action: str,
+        text: str,
+        total: Optional[float] = None,
     ) -> AbstractContextManager[ProgressBar]:
         """
         Allows markup in the "style" argument which will be applied to the
@@ -271,11 +282,11 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
         return self._bar(self._crawl_progress, description, total)
 
     def download_bar(
-            self,
-            style: str,
-            action: str,
-            text: str,
-            total: Optional[float] = None,
+        self,
+        style: str,
+        action: str,
+        text: str,
+        total: Optional[float] = None,
     ) -> AbstractContextManager[ProgressBar]:
         """
         Allows markup in the "style" argument which will be applied to the
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index 94337b6..c452c0f 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -35,8 +35,7 @@ class Redownload(Enum):
         try:
             return Redownload(string)
         except ValueError:
-            raise ValueError("must be one of 'never', 'never-smart',"
-                             " 'always', 'always-smart'")
+            raise ValueError("must be one of 'never', 'never-smart', 'always', 'always-smart'")
 
 
 class OnConflict(Enum):
@@ -51,8 +50,10 @@ class OnConflict(Enum):
         try:
             return OnConflict(string)
         except ValueError:
-            raise ValueError("must be one of 'prompt', 'local-first',"
-                             " 'remote-first', 'no-delete', 'no-delete-prompt-overwrite'")
+            raise ValueError(
+                "must be one of 'prompt', 'local-first',"
+                " 'remote-first', 'no-delete', 'no-delete-prompt-overwrite'"
+            )
 
 
 @dataclass
@@ -96,13 +97,13 @@ class FileSinkToken(ReusableAsyncContextManager[FileSink]):
     # download handed back to the OutputDirectory.
 
     def __init__(
-            self,
-            output_dir: "OutputDirectory",
-            remote_path: PurePath,
-            path: PurePath,
-            local_path: Path,
-            heuristics: Heuristics,
-            on_conflict: OnConflict,
+        self,
+        output_dir: "OutputDirectory",
+        remote_path: PurePath,
+        path: PurePath,
+        local_path: Path,
+        heuristics: Heuristics,
+        on_conflict: OnConflict,
     ):
         super().__init__()
 
@@ -118,15 +119,17 @@ class FileSinkToken(ReusableAsyncContextManager[FileSink]):
         sink = FileSink(file)
 
         async def after_download() -> None:
-            await self._output_dir._after_download(DownloadInfo(
-                self._remote_path,
-                self._path,
-                self._local_path,
-                tmp_path,
-                self._heuristics,
-                self._on_conflict,
-                sink.is_done(),
-            ))
+            await self._output_dir._after_download(
+                DownloadInfo(
+                    self._remote_path,
+                    self._path,
+                    self._local_path,
+                    tmp_path,
+                    self._heuristics,
+                    self._on_conflict,
+                    sink.is_done(),
+                )
+            )
 
         self._stack.push_async_callback(after_download)
         self._stack.enter_context(file)
@@ -138,10 +141,10 @@ class OutputDirectory:
     REPORT_FILE = PurePath(".report")
 
     def __init__(
-            self,
-            root: Path,
-            redownload: Redownload,
-            on_conflict: OnConflict,
+        self,
+        root: Path,
+        redownload: Redownload,
+        on_conflict: OnConflict,
     ):
         if os.name == "nt":
             # Windows limits the path length to 260 for some historical reason.
@@ -193,11 +196,11 @@ class OutputDirectory:
         return self._root / path
 
     def _should_download(
-            self,
-            local_path: Path,
-            heuristics: Heuristics,
-            redownload: Redownload,
-            on_conflict: OnConflict,
+        self,
+        local_path: Path,
+        heuristics: Heuristics,
+        redownload: Redownload,
+        on_conflict: OnConflict,
     ) -> bool:
         if not local_path.exists():
             log.explain("No corresponding file present locally")
@@ -270,9 +273,9 @@ class OutputDirectory:
     # files.
 
     async def _conflict_lfrf(
-            self,
-            on_conflict: OnConflict,
-            path: PurePath,
+        self,
+        on_conflict: OnConflict,
+        path: PurePath,
     ) -> bool:
         if on_conflict in {OnConflict.PROMPT, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
             async with log.exclusive_output():
@@ -289,9 +292,9 @@ class OutputDirectory:
         raise ValueError(f"{on_conflict!r} is not a valid conflict policy")
 
     async def _conflict_ldrf(
-            self,
-            on_conflict: OnConflict,
-            path: PurePath,
+        self,
+        on_conflict: OnConflict,
+        path: PurePath,
     ) -> bool:
         if on_conflict in {OnConflict.PROMPT, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
             async with log.exclusive_output():
@@ -308,10 +311,10 @@ class OutputDirectory:
         raise ValueError(f"{on_conflict!r} is not a valid conflict policy")
 
     async def _conflict_lfrd(
-            self,
-            on_conflict: OnConflict,
-            path: PurePath,
-            parent: PurePath,
+        self,
+        on_conflict: OnConflict,
+        path: PurePath,
+        parent: PurePath,
     ) -> bool:
         if on_conflict in {OnConflict.PROMPT, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
             async with log.exclusive_output():
@@ -328,9 +331,9 @@ class OutputDirectory:
         raise ValueError(f"{on_conflict!r} is not a valid conflict policy")
 
     async def _conflict_delete_lf(
-            self,
-            on_conflict: OnConflict,
-            path: PurePath,
+        self,
+        on_conflict: OnConflict,
+        path: PurePath,
     ) -> bool:
         if on_conflict == OnConflict.PROMPT:
             async with log.exclusive_output():
@@ -353,8 +356,8 @@ class OutputDirectory:
         return base.parent / name
 
     async def _create_tmp_file(
-            self,
-            local_path: Path,
+        self,
+        local_path: Path,
     ) -> Tuple[Path, BinaryIO]:
         """
         May raise an OutputDirError.
@@ -388,14 +391,14 @@ class OutputDirectory:
         return self._should_download(local_path, heuristics, redownload, on_conflict)
 
     async def download(
-            self,
-            remote_path: PurePath,
-            path: PurePath,
-            *,
-            etag_differs: Optional[bool] = None,
-            mtime: Optional[datetime] = None,
-            redownload: Optional[Redownload] = None,
-            on_conflict: Optional[OnConflict] = None,
+        self,
+        remote_path: PurePath,
+        path: PurePath,
+        *,
+        etag_differs: Optional[bool] = None,
+        mtime: Optional[datetime] = None,
+        redownload: Optional[Redownload] = None,
+        on_conflict: Optional[OnConflict] = None,
     ) -> Optional[FileSinkToken]:
         """
         May throw an OutputDirError, a MarkDuplicateError or a
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index ca2e5b7..c805c13 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -66,10 +66,10 @@ class Pferd:
         return crawlers_to_run
 
     def _find_crawlers_to_run(
-            self,
-            config: Config,
-            cli_crawlers: Optional[List[str]],
-            cli_skips: Optional[List[str]],
+        self,
+        config: Config,
+        cli_crawlers: Optional[List[str]],
+        cli_skips: Optional[List[str]],
     ) -> List[str]:
         log.explain_topic("Deciding which crawlers to run")
 
diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index a48c827..96b5ca7 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -208,7 +208,7 @@ class Line:
 
     @property
     def rest(self) -> str:
-        return self.line[self.index:]
+        return self.line[self.index :]
 
     def peek(self, amount: int = 1) -> str:
         return self.rest[:amount]
@@ -327,21 +327,27 @@ def parse_right(line: Line) -> Union[str, Ignore]:
 
 
 def parse_arrow_name(line: Line) -> str:
-    return line.one_of([
-        lambda: line.expect("exact-re"),
-        lambda: line.expect("exact"),
-        lambda: line.expect("name-re"),
-        lambda: line.expect("name"),
-        lambda: line.expect("re"),
-        lambda: line.expect(""),
-    ], "Expected arrow name")
+    return line.one_of(
+        [
+            lambda: line.expect("exact-re"),
+            lambda: line.expect("exact"),
+            lambda: line.expect("name-re"),
+            lambda: line.expect("name"),
+            lambda: line.expect("re"),
+            lambda: line.expect(""),
+        ],
+        "Expected arrow name",
+    )
 
 
 def parse_arrow_head(line: Line) -> ArrowHead:
-    return line.one_of([
-        lambda: line.expect_with(">>", ArrowHead.SEQUENCE),
-        lambda: line.expect_with(">", ArrowHead.NORMAL),
-    ], "Expected arrow head")
+    return line.one_of(
+        [
+            lambda: line.expect_with(">>", ArrowHead.SEQUENCE),
+            lambda: line.expect_with(">", ArrowHead.NORMAL),
+        ],
+        "Expected arrow head",
+    )
 
 
 def parse_eol(line: Line) -> None:
@@ -413,12 +419,12 @@ class Transformer:
 
     def transform(self, path: PurePath) -> Optional[PurePath]:
         for i, (line, tf) in enumerate(self._tfs):
-            log.explain(f"Testing rule {i+1}: {line}")
+            log.explain(f"Testing rule {i + 1}: {line}")
 
             try:
                 result = tf.transform(path)
             except Exception as e:
-                log.warn(f"Error while testing rule {i+1}: {line}")
+                log.warn(f"Error while testing rule {i + 1}: {line}")
                 log.warn_contd(str(e))
                 continue
 
diff --git a/PFERD/utils.py b/PFERD/utils.py
index 7c7b6f4..acd282e 100644
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -131,10 +131,10 @@ class ReusableAsyncContextManager(ABC, Generic[T]):
         return result
 
     async def __aexit__(
-            self,
-            exc_type: Optional[Type[BaseException]],
-            exc_value: Optional[BaseException],
-            traceback: Optional[TracebackType],
+        self,
+        exc_type: Optional[Type[BaseException]],
+        exc_value: Optional[BaseException],
+        traceback: Optional[TracebackType],
     ) -> Optional[bool]:
         if not self._active:
             raise RuntimeError("__aexit__ called too many times")
diff --git a/pyproject.toml b/pyproject.toml
index e22fe85..9d4460b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,16 +20,29 @@ pferd = "PFERD.__main__:main"
 [tool.setuptools.dynamic]
 version = {attr = "PFERD.version.VERSION"}
 
-[tool.flake8]
-max-line-length = 110
+[tool.ruff]
+line-length = 110
 
-[tool.isort]
-line_length = 110
-
-[tool.autopep8]
-max_line_length = 110
-in-place = true
-recursive = true
+[tool.ruff.lint]
+select = [
+    # pycodestyle
+    "E",
+    # Pyflakes
+    "F",
+    # pyupgrade
+    "UP",
+    # flake8-bugbear
+    "B",
+    # flake8-simplify
+    "SIM",
+    # isort
+    "I",
+]
+ignore = [
+  "UP045",
+  "SIM114",
+  "B023"
+]
 
 [tool.mypy]
 disallow_any_generics = true
diff --git a/scripts/check b/scripts/check
index 6f4f4c2..cce6a38 100755
--- a/scripts/check
+++ b/scripts/check
@@ -3,4 +3,4 @@
 set -e
 
 mypy .
-flake8 PFERD --max-line-length 110
+ruff check
diff --git a/scripts/format b/scripts/format
index 981cd75..38b10fd 100755
--- a/scripts/format
+++ b/scripts/format
@@ -2,5 +2,4 @@
 
 set -e
 
-autopep8 .
-isort .
+ruff format
diff --git a/uv.lock b/uv.lock
new file mode 100644
index 0000000..eba384b
--- /dev/null
+++ b/uv.lock
@@ -0,0 +1,905 @@
+version = 1
+revision = 3
+requires-python = ">=3.11"
+
+[[package]]
+name = "aiohappyeyeballs"
+version = "2.6.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/26/30/f84a107a9c4331c14b2b586036f40965c128aa4fee4dda5d3d51cb14ad54/aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558", size = 22760, upload-time = "2025-03-12T01:42:48.764Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0f/15/5bf3b99495fb160b63f95972b81750f18f7f4e02ad051373b669d17d44f2/aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8", size = 15265, upload-time = "2025-03-12T01:42:47.083Z" },
+]
+
+[[package]]
+name = "aiohttp"
+version = "3.13.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "aiohappyeyeballs" },
+    { name = "aiosignal" },
+    { name = "attrs" },
+    { name = "frozenlist" },
+    { name = "multidict" },
+    { name = "propcache" },
+    { name = "yarl" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ba/fa/3ae643cd525cf6844d3dc810481e5748107368eb49563c15a5fb9f680750/aiohttp-3.13.1.tar.gz", hash = "sha256:4b7ee9c355015813a6aa085170b96ec22315dabc3d866fd77d147927000e9464", size = 7835344, upload-time = "2025-10-17T14:03:29.337Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/be/2c/739d03730ffce57d2093e2e611e1541ac9a4b3bb88288c33275058b9ffc2/aiohttp-3.13.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9eefa0a891e85dca56e2d00760945a6325bd76341ec386d3ad4ff72eb97b7e64", size = 742004, upload-time = "2025-10-17T13:59:29.73Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/f8/7f5b7f7184d7c80e421dbaecbd13e0b2a0bb8663fd0406864f9a167a438c/aiohttp-3.13.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6c20eb646371a5a57a97de67e52aac6c47badb1564e719b3601bbb557a2e8fd0", size = 495601, upload-time = "2025-10-17T13:59:31.312Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/af/fb78d028b9642dd33ff127d9a6a151586f33daff631b05250fecd0ab23f8/aiohttp-3.13.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bfc28038cd86fb1deed5cc75c8fda45c6b0f5c51dfd76f8c63d3d22dc1ab3d1b", size = 491790, upload-time = "2025-10-17T13:59:33.304Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/ae/e40e422ee995e4f91f7f087b86304e3dd622d3a5b9ca902a1e94ebf9a117/aiohttp-3.13.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b22eeffca2e522451990c31a36fe0e71079e6112159f39a4391f1c1e259a795", size = 1746350, upload-time = "2025-10-17T13:59:35.158Z" },
+    { url = "https://files.pythonhosted.org/packages/28/a5/fe6022bb869bf2d2633b155ed8348d76358c22d5ff9692a15016b2d1019f/aiohttp-3.13.1-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:65782b2977c05ebd78787e3c834abe499313bf69d6b8be4ff9c340901ee7541f", size = 1703046, upload-time = "2025-10-17T13:59:37.077Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/a5/c4ef3617d7cdc49f2d5af077f19794946f0f2d94b93c631ace79047361a2/aiohttp-3.13.1-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:dacba54f9be3702eb866b0b9966754b475e1e39996e29e442c3cd7f1117b43a9", size = 1806161, upload-time = "2025-10-17T13:59:38.837Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/45/b87d2430aee7e7d00b24e3dff2c5bd69f21017f6edb19cfd91e514664fc8/aiohttp-3.13.1-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:aa878da718e8235302c365e376b768035add36b55177706d784a122cb822a6a4", size = 1894546, upload-time = "2025-10-17T13:59:40.741Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/a2/79eb466786a7f11a0292c353a8a9b95e88268c48c389239d7531d66dbb48/aiohttp-3.13.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e4b4e607fbd4964d65945a7b9d1e7f98b0d5545736ea613f77d5a2a37ff1e46", size = 1745683, upload-time = "2025-10-17T13:59:42.59Z" },
+    { url = "https://files.pythonhosted.org/packages/93/1a/153b0ad694f377e94eacc85338efe03ed4776a396c8bb47bd9227135792a/aiohttp-3.13.1-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0c3db2d0e5477ad561bf7ba978c3ae5f8f78afda70daa05020179f759578754f", size = 1605418, upload-time = "2025-10-17T13:59:45.229Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/4e/18605b1bfeb4b00d3396d833647cdb213118e2a96862e5aebee62ad065b4/aiohttp-3.13.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9739d34506fdf59bf2c092560d502aa728b8cdb33f34ba15fb5e2852c35dd829", size = 1722379, upload-time = "2025-10-17T13:59:46.969Z" },
+    { url = "https://files.pythonhosted.org/packages/72/13/0a38ad385d547fb283e0e1fe1ff1dff8899bd4ed0aaceeb13ec14abbf136/aiohttp-3.13.1-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:b902e30a268a85d50197b4997edc6e78842c14c0703450f632c2d82f17577845", size = 1716693, upload-time = "2025-10-17T13:59:49.217Z" },
+    { url = "https://files.pythonhosted.org/packages/55/65/7029d7573ab9009adde380052c6130d02c8db52195fda112db35e914fe7b/aiohttp-3.13.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:1bbfc04c8de7def6504cce0a97f9885a5c805fd2395a0634bc10f9d6ecb42524", size = 1784174, upload-time = "2025-10-17T13:59:51.439Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/36/fd46e39cb85418e45b0e4a8bfc39651ee0b8f08ea006adf217a221cdb269/aiohttp-3.13.1-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:6941853405a38a5eeb7d9776db77698df373ff7fa8c765cb81ea14a344fccbeb", size = 1593716, upload-time = "2025-10-17T13:59:53.367Z" },
+    { url = "https://files.pythonhosted.org/packages/85/b8/188e0cb1be37b4408373171070fda17c3bf9c67c0d3d4fd5ee5b1fa108e1/aiohttp-3.13.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:7764adcd2dc8bd21c8228a53dda2005428498dc4d165f41b6086f0ac1c65b1c9", size = 1799254, upload-time = "2025-10-17T13:59:55.352Z" },
+    { url = "https://files.pythonhosted.org/packages/67/ff/fdf768764eb427b0cc9ebb2cebddf990f94d98b430679f8383c35aa114be/aiohttp-3.13.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c09e08d38586fa59e5a2f9626505a0326fadb8e9c45550f029feeb92097a0afc", size = 1738122, upload-time = "2025-10-17T13:59:57.263Z" },
+    { url = "https://files.pythonhosted.org/packages/94/84/fce7a4d575943394d7c0e632273838eb6f39de8edf25386017bf5f0de23b/aiohttp-3.13.1-cp311-cp311-win32.whl", hash = "sha256:ce1371675e74f6cf271d0b5530defb44cce713fd0ab733713562b3a2b870815c", size = 430491, upload-time = "2025-10-17T13:59:59.466Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/d2/d21b8ab6315a5d588c550ab285b4f02ae363edf012920e597904c5a56608/aiohttp-3.13.1-cp311-cp311-win_amd64.whl", hash = "sha256:77a2f5cc28cf4704cc157be135c6a6cfb38c9dea478004f1c0fd7449cf445c28", size = 454808, upload-time = "2025-10-17T14:00:01.247Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/72/d463a10bf29871f6e3f63bcf3c91362dc4d72ed5917a8271f96672c415ad/aiohttp-3.13.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0760bd9a28efe188d77b7c3fe666e6ef74320d0f5b105f2e931c7a7e884c8230", size = 736218, upload-time = "2025-10-17T14:00:03.51Z" },
+    { url = "https://files.pythonhosted.org/packages/26/13/f7bccedbe52ea5a6eef1e4ebb686a8d7765319dfd0a5939f4238cb6e79e6/aiohttp-3.13.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7129a424b441c3fe018a414401bf1b9e1d49492445f5676a3aecf4f74f67fcdb", size = 491251, upload-time = "2025-10-17T14:00:05.756Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/7c/7ea51b5aed6cc69c873f62548da8345032aa3416336f2d26869d4d37b4a2/aiohttp-3.13.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e1cb04ae64a594f6ddf5cbb024aba6b4773895ab6ecbc579d60414f8115e9e26", size = 490394, upload-time = "2025-10-17T14:00:07.504Z" },
+    { url = "https://files.pythonhosted.org/packages/31/05/1172cc4af4557f6522efdee6eb2b9f900e1e320a97e25dffd3c5a6af651b/aiohttp-3.13.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:782d656a641e755decd6bd98d61d2a8ea062fd45fd3ff8d4173605dd0d2b56a1", size = 1737455, upload-time = "2025-10-17T14:00:09.403Z" },
+    { url = "https://files.pythonhosted.org/packages/24/3d/ce6e4eca42f797d6b1cd3053cf3b0a22032eef3e4d1e71b9e93c92a3f201/aiohttp-3.13.1-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:f92ad8169767429a6d2237331726c03ccc5f245222f9373aa045510976af2b35", size = 1699176, upload-time = "2025-10-17T14:00:11.314Z" },
+    { url = "https://files.pythonhosted.org/packages/25/04/7127ba55653e04da51477372566b16ae786ef854e06222a1c96b4ba6c8ef/aiohttp-3.13.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:0e778f634ca50ec005eefa2253856921c429581422d887be050f2c1c92e5ce12", size = 1767216, upload-time = "2025-10-17T14:00:13.668Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/3b/43bca1e75847e600f40df829a6b2f0f4e1d4c70fb6c4818fdc09a462afd5/aiohttp-3.13.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:9bc36b41cf4aab5d3b34d22934a696ab83516603d1bc1f3e4ff9930fe7d245e5", size = 1865870, upload-time = "2025-10-17T14:00:15.852Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/69/b204e5d43384197a614c88c1717c324319f5b4e7d0a1b5118da583028d40/aiohttp-3.13.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3fd4570ea696aee27204dd524f287127ed0966d14d309dc8cc440f474e3e7dbd", size = 1751021, upload-time = "2025-10-17T14:00:18.297Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/af/845dc6b6fdf378791d720364bf5150f80d22c990f7e3a42331d93b337cc7/aiohttp-3.13.1-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7bda795f08b8a620836ebfb0926f7973972a4bf8c74fdf9145e489f88c416811", size = 1561448, upload-time = "2025-10-17T14:00:20.152Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/91/d2ab08cd77ed76a49e4106b1cfb60bce2768242dd0c4f9ec0cb01e2cbf94/aiohttp-3.13.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:055a51d90e351aae53dcf324d0eafb2abe5b576d3ea1ec03827d920cf81a1c15", size = 1698196, upload-time = "2025-10-17T14:00:22.131Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/d1/082f0620dc428ecb8f21c08a191a4694915cd50f14791c74a24d9161cc50/aiohttp-3.13.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:d4131df864cbcc09bb16d3612a682af0db52f10736e71312574d90f16406a867", size = 1719252, upload-time = "2025-10-17T14:00:24.453Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/78/2af2f44491be7b08e43945b72d2b4fd76f0a14ba850ba9e41d28a7ce716a/aiohttp-3.13.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:163d3226e043f79bf47c87f8dfc89c496cc7bc9128cb7055ce026e435d551720", size = 1736529, upload-time = "2025-10-17T14:00:26.567Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/34/3e919ecdc93edaea8d140138049a0d9126141072e519535e2efa38eb7a02/aiohttp-3.13.1-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:a2370986a3b75c1a5f3d6f6d763fc6be4b430226577b0ed16a7c13a75bf43d8f", size = 1553723, upload-time = "2025-10-17T14:00:28.592Z" },
+    { url = "https://files.pythonhosted.org/packages/21/4b/d8003aeda2f67f359b37e70a5a4b53fee336d8e89511ac307ff62aeefcdb/aiohttp-3.13.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:d7c14de0c7c9f1e6e785ce6cbe0ed817282c2af0012e674f45b4e58c6d4ea030", size = 1763394, upload-time = "2025-10-17T14:00:31.051Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/7b/1dbe6a39e33af9baaafc3fc016a280663684af47ba9f0e5d44249c1f72ec/aiohttp-3.13.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bb611489cf0db10b99beeb7280bd39e0ef72bc3eb6d8c0f0a16d8a56075d1eb7", size = 1718104, upload-time = "2025-10-17T14:00:33.407Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/88/bd1b38687257cce67681b9b0fa0b16437be03383fa1be4d1a45b168bef25/aiohttp-3.13.1-cp312-cp312-win32.whl", hash = "sha256:f90fe0ee75590f7428f7c8b5479389d985d83c949ea10f662ab928a5ed5cf5e6", size = 425303, upload-time = "2025-10-17T14:00:35.829Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/e3/4481f50dd6f27e9e58c19a60cff44029641640237e35d32b04aaee8cf95f/aiohttp-3.13.1-cp312-cp312-win_amd64.whl", hash = "sha256:3461919a9dca272c183055f2aab8e6af0adc810a1b386cce28da11eb00c859d9", size = 452071, upload-time = "2025-10-17T14:00:37.764Z" },
+    { url = "https://files.pythonhosted.org/packages/16/6d/d267b132342e1080f4c1bb7e1b4e96b168b3cbce931ec45780bff693ff95/aiohttp-3.13.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:55785a7f8f13df0c9ca30b5243d9909bd59f48b274262a8fe78cee0828306e5d", size = 730727, upload-time = "2025-10-17T14:00:39.681Z" },
+    { url = "https://files.pythonhosted.org/packages/92/c8/1cf495bac85cf71b80fad5f6d7693e84894f11b9fe876b64b0a1e7cbf32f/aiohttp-3.13.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4bef5b83296cebb8167707b4f8d06c1805db0af632f7a72d7c5288a84667e7c3", size = 488678, upload-time = "2025-10-17T14:00:41.541Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/19/23c6b81cca587ec96943d977a58d11d05a82837022e65cd5502d665a7d11/aiohttp-3.13.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:27af0619c33f9ca52f06069ec05de1a357033449ab101836f431768ecfa63ff5", size = 487637, upload-time = "2025-10-17T14:00:43.527Z" },
+    { url = "https://files.pythonhosted.org/packages/48/58/8f9464afb88b3eed145ad7c665293739b3a6f91589694a2bb7e5778cbc72/aiohttp-3.13.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a47fe43229a8efd3764ef7728a5c1158f31cdf2a12151fe99fde81c9ac87019c", size = 1718975, upload-time = "2025-10-17T14:00:45.496Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/8b/c3da064ca392b2702f53949fd7c403afa38d9ee10bf52c6ad59a42537103/aiohttp-3.13.1-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:6e68e126de5b46e8b2bee73cab086b5d791e7dc192056916077aa1e2e2b04437", size = 1686905, upload-time = "2025-10-17T14:00:47.707Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/a4/9c8a3843ecf526daee6010af1a66eb62579be1531d2d5af48ea6f405ad3c/aiohttp-3.13.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e65ef49dd22514329c55970d39079618a8abf856bae7147913bb774a3ab3c02f", size = 1754907, upload-time = "2025-10-17T14:00:49.702Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/80/1f470ed93e06436e3fc2659a9fc329c192fa893fb7ed4e884d399dbfb2a8/aiohttp-3.13.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0e425a7e0511648b3376839dcc9190098671a47f21a36e815b97762eb7d556b0", size = 1857129, upload-time = "2025-10-17T14:00:51.822Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/e6/33d305e6cce0a8daeb79c7d8d6547d6e5f27f4e35fa4883fc9c9eb638596/aiohttp-3.13.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:010dc9b7110f055006acd3648d5d5955bb6473b37c3663ec42a1b4cba7413e6b", size = 1738189, upload-time = "2025-10-17T14:00:53.976Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/42/8df03367e5a64327fe0c39291080697795430c438fc1139c7cc1831aa1df/aiohttp-3.13.1-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:1b5c722d0ca5f57d61066b5dfa96cdb87111e2519156b35c1f8dd17c703bee7a", size = 1553608, upload-time = "2025-10-17T14:00:56.144Z" },
+    { url = "https://files.pythonhosted.org/packages/96/17/6d5c73cd862f1cf29fddcbb54aac147037ff70a043a2829d03a379e95742/aiohttp-3.13.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:93029f0e9b77b714904a281b5aa578cdc8aa8ba018d78c04e51e1c3d8471b8ec", size = 1681809, upload-time = "2025-10-17T14:00:58.603Z" },
+    { url = "https://files.pythonhosted.org/packages/be/31/8926c8ab18533f6076ce28d2c329a203b58c6861681906e2d73b9c397588/aiohttp-3.13.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:d1824c7d08d8ddfc8cb10c847f696942e5aadbd16fd974dfde8bd2c3c08a9fa1", size = 1711161, upload-time = "2025-10-17T14:01:01.744Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/36/2f83e1ca730b1e0a8cf1c8ab9559834c5eec9f5da86e77ac71f0d16b521d/aiohttp-3.13.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:8f47d0ff5b3eb9c1278a2f56ea48fda667da8ebf28bd2cb378b7c453936ce003", size = 1731999, upload-time = "2025-10-17T14:01:04.626Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/ec/1f818cc368dfd4d5ab4e9efc8f2f6f283bfc31e1c06d3e848bcc862d4591/aiohttp-3.13.1-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:8a396b1da9b51ded79806ac3b57a598f84e0769eaa1ba300655d8b5e17b70c7b", size = 1548684, upload-time = "2025-10-17T14:01:06.828Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/ad/33d36efd16e4fefee91b09a22a3a0e1b830f65471c3567ac5a8041fac812/aiohttp-3.13.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d9c52a65f54796e066b5d674e33b53178014752d28bca555c479c2c25ffcec5b", size = 1756676, upload-time = "2025-10-17T14:01:09.517Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/c4/4a526d84e77d464437713ca909364988ed2e0cd0cdad2c06cb065ece9e08/aiohttp-3.13.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a89da72d18d6c95a653470b78d8ee5aa3c4b37212004c103403d0776cbea6ff0", size = 1715577, upload-time = "2025-10-17T14:01:11.958Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/21/e39638b7d9c7f1362c4113a91870f89287e60a7ea2d037e258b81e8b37d5/aiohttp-3.13.1-cp313-cp313-win32.whl", hash = "sha256:02e0258b7585ddf5d01c79c716ddd674386bfbf3041fbbfe7bdf9c7c32eb4a9b", size = 424468, upload-time = "2025-10-17T14:01:14.344Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/00/f3a92c592a845ebb2f47d102a67f35f0925cb854c5e7386f1a3a1fdff2ab/aiohttp-3.13.1-cp313-cp313-win_amd64.whl", hash = "sha256:ef56ffe60e8d97baac123272bde1ab889ee07d3419606fae823c80c2b86c403e", size = 450806, upload-time = "2025-10-17T14:01:16.437Z" },
+    { url = "https://files.pythonhosted.org/packages/97/be/0f6c41d2fd0aab0af133c509cabaf5b1d78eab882cb0ceb872e87ceeabf7/aiohttp-3.13.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:77f83b3dc5870a2ea79a0fcfdcc3fc398187ec1675ff61ec2ceccad27ecbd303", size = 733828, upload-time = "2025-10-17T14:01:18.58Z" },
+    { url = "https://files.pythonhosted.org/packages/75/14/24e2ac5efa76ae30e05813e0f50737005fd52da8ddffee474d4a5e7f38a6/aiohttp-3.13.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:9cafd2609ebb755e47323306c7666283fbba6cf82b5f19982ea627db907df23a", size = 489320, upload-time = "2025-10-17T14:01:20.644Z" },
+    { url = "https://files.pythonhosted.org/packages/da/5a/4cbe599358d05ea7db4869aff44707b57d13f01724d48123dc68b3288d5a/aiohttp-3.13.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9c489309a2ca548d5f11131cfb4092f61d67954f930bba7e413bcdbbb82d7fae", size = 489899, upload-time = "2025-10-17T14:01:22.638Z" },
+    { url = "https://files.pythonhosted.org/packages/67/96/3aec9d9cfc723273d4386328a1e2562cf23629d2f57d137047c49adb2afb/aiohttp-3.13.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79ac15fe5fdbf3c186aa74b656cd436d9a1e492ba036db8901c75717055a5b1c", size = 1716556, upload-time = "2025-10-17T14:01:25.406Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/99/39a3d250595b5c8172843831221fa5662884f63f8005b00b4034f2a7a836/aiohttp-3.13.1-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:095414be94fce3bc080684b4cd50fb70d439bc4662b2a1984f45f3bf9ede08aa", size = 1665814, upload-time = "2025-10-17T14:01:27.683Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/96/8319e7060a85db14a9c178bc7b3cf17fad458db32ba6d2910de3ca71452d/aiohttp-3.13.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c68172e1a2dca65fa1272c85ca72e802d78b67812b22827df01017a15c5089fa", size = 1755767, upload-time = "2025-10-17T14:01:29.914Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/c6/0a2b3d886b40aa740fa2294cd34ed46d2e8108696748492be722e23082a7/aiohttp-3.13.1-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3751f9212bcd119944d4ea9de6a3f0fee288c177b8ca55442a2cdff0c8201eb3", size = 1836591, upload-time = "2025-10-17T14:01:32.28Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/34/8ab5904b3331c91a58507234a1e2f662f837e193741609ee5832eb436251/aiohttp-3.13.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8619dca57d98a8353abdc7a1eeb415548952b39d6676def70d9ce76d41a046a9", size = 1714915, upload-time = "2025-10-17T14:01:35.138Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/d3/d36077ca5f447649112189074ac6c192a666bf68165b693e48c23b0d008c/aiohttp-3.13.1-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:97795a0cb0a5f8a843759620e9cbd8889f8079551f5dcf1ccd99ed2f056d9632", size = 1546579, upload-time = "2025-10-17T14:01:38.237Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/14/dbc426a1bb1305c4fc78ce69323498c9e7c699983366ef676aa5d3f949fa/aiohttp-3.13.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1060e058da8f9f28a7026cdfca9fc886e45e551a658f6a5c631188f72a3736d2", size = 1680633, upload-time = "2025-10-17T14:01:40.902Z" },
+    { url = "https://files.pythonhosted.org/packages/29/83/1e68e519aff9f3ef6d4acb6cdda7b5f592ef5c67c8f095dc0d8e06ce1c3e/aiohttp-3.13.1-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:f48a2c26333659101ef214907d29a76fe22ad7e912aa1e40aeffdff5e8180977", size = 1678675, upload-time = "2025-10-17T14:01:43.779Z" },
+    { url = "https://files.pythonhosted.org/packages/38/b9/7f3e32a81c08b6d29ea15060c377e1f038ad96cd9923a85f30e817afff22/aiohttp-3.13.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:f1dfad638b9c91ff225162b2824db0e99ae2d1abe0dc7272b5919701f0a1e685", size = 1726829, upload-time = "2025-10-17T14:01:46.546Z" },
+    { url = "https://files.pythonhosted.org/packages/23/ce/610b1f77525a0a46639aea91377b12348e9f9412cc5ddcb17502aa4681c7/aiohttp-3.13.1-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:8fa09ab6dd567cb105db4e8ac4d60f377a7a94f67cf669cac79982f626360f32", size = 1542985, upload-time = "2025-10-17T14:01:49.082Z" },
+    { url = "https://files.pythonhosted.org/packages/53/39/3ac8dfdad5de38c401846fa071fcd24cb3b88ccfb024854df6cbd9b4a07e/aiohttp-3.13.1-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:4159fae827f9b5f655538a4f99b7cbc3a2187e5ca2eee82f876ef1da802ccfa9", size = 1741556, upload-time = "2025-10-17T14:01:51.846Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/48/b1948b74fea7930b0f29595d1956842324336de200593d49a51a40607fdc/aiohttp-3.13.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ad671118c19e9cfafe81a7a05c294449fe0ebb0d0c6d5bb445cd2190023f5cef", size = 1696175, upload-time = "2025-10-17T14:01:54.232Z" },
+    { url = "https://files.pythonhosted.org/packages/96/26/063bba38e4b27b640f56cc89fe83cc3546a7ae162c2e30ca345f0ccdc3d1/aiohttp-3.13.1-cp314-cp314-win32.whl", hash = "sha256:c5c970c148c48cf6acb65224ca3c87a47f74436362dde75c27bc44155ccf7dfc", size = 430254, upload-time = "2025-10-17T14:01:56.451Z" },
+    { url = "https://files.pythonhosted.org/packages/88/aa/25fd764384dc4eab714023112d3548a8dd69a058840d61d816ea736097a2/aiohttp-3.13.1-cp314-cp314-win_amd64.whl", hash = "sha256:748a00167b7a88385756fa615417d24081cba7e58c8727d2e28817068b97c18c", size = 456256, upload-time = "2025-10-17T14:01:58.752Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/9f/9ba6059de4bad25c71cd88e3da53f93e9618ea369cf875c9f924b1c167e2/aiohttp-3.13.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:390b73e99d7a1f0f658b3f626ba345b76382f3edc65f49d6385e326e777ed00e", size = 765956, upload-time = "2025-10-17T14:02:01.515Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/30/b86da68b494447d3060f45c7ebb461347535dab4af9162a9267d9d86ca31/aiohttp-3.13.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:27e83abb330e687e019173d8fc1fd6a1cf471769624cf89b1bb49131198a810a", size = 503206, upload-time = "2025-10-17T14:02:03.818Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/21/d27a506552843ff9eeb9fcc2d45f943b09eefdfdf205aab044f4f1f39f6a/aiohttp-3.13.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2b20eed07131adbf3e873e009c2869b16a579b236e9d4b2f211bf174d8bef44a", size = 507719, upload-time = "2025-10-17T14:02:05.947Z" },
+    { url = "https://files.pythonhosted.org/packages/58/23/4042230ec7e4edc7ba43d0342b5a3d2fe0222ca046933c4251a35aaf17f5/aiohttp-3.13.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:58fee9ef8477fd69e823b92cfd1f590ee388521b5ff8f97f3497e62ee0656212", size = 1862758, upload-time = "2025-10-17T14:02:08.469Z" },
+    { url = "https://files.pythonhosted.org/packages/df/88/525c45bea7cbb9f65df42cadb4ff69f6a0dbf95931b0ff7d1fdc40a1cb5f/aiohttp-3.13.1-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:1f62608fcb7b3d034d5e9496bea52d94064b7b62b06edba82cd38191336bbeda", size = 1717790, upload-time = "2025-10-17T14:02:11.37Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/80/21e9b5eb77df352a5788713f37359b570a793f0473f3a72db2e46df379b9/aiohttp-3.13.1-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:fdc4d81c3dfc999437f23e36d197e8b557a3f779625cd13efe563a9cfc2ce712", size = 1842088, upload-time = "2025-10-17T14:02:13.872Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/bf/d1738f6d63fe8b2a0ad49533911b3347f4953cd001bf3223cb7b61f18dff/aiohttp-3.13.1-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:601d7ec812f746fd80ff8af38eeb3f196e1bab4a4d39816ccbc94c222d23f1d0", size = 1934292, upload-time = "2025-10-17T14:02:16.624Z" },
+    { url = "https://files.pythonhosted.org/packages/04/e6/26cab509b42610ca49573f2fc2867810f72bd6a2070182256c31b14f2e98/aiohttp-3.13.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:47c3f21c469b840d9609089435c0d9918ae89f41289bf7cc4afe5ff7af5458db", size = 1791328, upload-time = "2025-10-17T14:02:19.051Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/6d/baf7b462852475c9d045bee8418d9cdf280efb687752b553e82d0c58bcc2/aiohttp-3.13.1-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d6c6cdc0750db88520332d4aaa352221732b0cafe89fd0e42feec7cb1b5dc236", size = 1622663, upload-time = "2025-10-17T14:02:21.397Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/48/396a97318af9b5f4ca8b3dc14a67976f71c6400a9609c622f96da341453f/aiohttp-3.13.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:58a12299eeb1fca2414ee2bc345ac69b0f765c20b82c3ab2a75d91310d95a9f6", size = 1787791, upload-time = "2025-10-17T14:02:24.212Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/e2/6925f6784134ce3ff3ce1a8502ab366432a3b5605387618c1a939ce778d9/aiohttp-3.13.1-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:0989cbfc195a4de1bb48f08454ef1cb47424b937e53ed069d08404b9d3c7aea1", size = 1775459, upload-time = "2025-10-17T14:02:26.971Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/e3/b372047ba739fc39f199b99290c4cc5578ce5fd125f69168c967dac44021/aiohttp-3.13.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:feb5ee664300e2435e0d1bc3443a98925013dfaf2cae9699c1f3606b88544898", size = 1789250, upload-time = "2025-10-17T14:02:29.686Z" },
+    { url = "https://files.pythonhosted.org/packages/02/8c/9f48b93d7d57fc9ef2ad4adace62e4663ea1ce1753806c4872fb36b54c39/aiohttp-3.13.1-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:58a6f8702da0c3606fb5cf2e669cce0ca681d072fe830968673bb4c69eb89e88", size = 1616139, upload-time = "2025-10-17T14:02:32.151Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/c6/c64e39d61aaa33d7de1be5206c0af3ead4b369bf975dac9fdf907a4291c1/aiohttp-3.13.1-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:a417ceb433b9d280e2368ffea22d4bc6e3e0d894c4bc7768915124d57d0964b6", size = 1815829, upload-time = "2025-10-17T14:02:34.635Z" },
+    { url = "https://files.pythonhosted.org/packages/22/75/e19e93965ea675f1151753b409af97a14f1d888588a555e53af1e62b83eb/aiohttp-3.13.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8ac8854f7b0466c5d6a9ea49249b3f6176013859ac8f4bb2522ad8ed6b94ded2", size = 1760923, upload-time = "2025-10-17T14:02:37.364Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/a4/06ed38f1dabd98ea136fd116cba1d02c9b51af5a37d513b6850a9a567d86/aiohttp-3.13.1-cp314-cp314t-win32.whl", hash = "sha256:be697a5aeff42179ed13b332a411e674994bcd406c81642d014ace90bf4bb968", size = 463318, upload-time = "2025-10-17T14:02:39.924Z" },
+    { url = "https://files.pythonhosted.org/packages/04/0f/27e4fdde899e1e90e35eeff56b54ed63826435ad6cdb06b09ed312d1b3fa/aiohttp-3.13.1-cp314-cp314t-win_amd64.whl", hash = "sha256:f1d6aa90546a4e8f20c3500cb68ab14679cd91f927fa52970035fd3207dfb3da", size = 496721, upload-time = "2025-10-17T14:02:42.199Z" },
+]
+
+[[package]]
+name = "aiosignal"
+version = "1.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "frozenlist" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/61/62/06741b579156360248d1ec624842ad0edf697050bbaf7c3e46394e106ad1/aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7", size = 25007, upload-time = "2025-07-03T22:54:43.528Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" },
+]
+
+[[package]]
+name = "attrs"
+version = "25.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6b/5c/685e6633917e101e5dcb62b9dd76946cbb57c26e133bae9e0cd36033c0a9/attrs-25.4.0.tar.gz", hash = "sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11", size = 934251, upload-time = "2025-10-06T13:54:44.725Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3a/2a/7cc015f5b9f5db42b7d48157e23356022889fc354a2813c15934b7cb5c0e/attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373", size = 67615, upload-time = "2025-10-06T13:54:43.17Z" },
+]
+
+[[package]]
+name = "backports-tarfile"
+version = "1.2.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/86/72/cd9b395f25e290e633655a100af28cb253e4393396264a98bd5f5951d50f/backports_tarfile-1.2.0.tar.gz", hash = "sha256:d75e02c268746e1b8144c278978b6e98e85de6ad16f8e4b0844a154557eca991", size = 86406, upload-time = "2024-05-28T17:01:54.731Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b9/fa/123043af240e49752f1c4bd24da5053b6bd00cad78c2be53c0d1e8b975bc/backports.tarfile-1.2.0-py3-none-any.whl", hash = "sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34", size = 30181, upload-time = "2024-05-28T17:01:53.112Z" },
+]
+
+[[package]]
+name = "beautifulsoup4"
+version = "4.14.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "soupsieve" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/77/e9/df2358efd7659577435e2177bfa69cba6c33216681af51a707193dec162a/beautifulsoup4-4.14.2.tar.gz", hash = "sha256:2a98ab9f944a11acee9cc848508ec28d9228abfd522ef0fad6a02a72e0ded69e", size = 625822, upload-time = "2025-09-29T10:05:42.613Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/94/fe/3aed5d0be4d404d12d36ab97e2f1791424d9ca39c2f754a6285d59a3b01d/beautifulsoup4-4.14.2-py3-none-any.whl", hash = "sha256:5ef6fa3a8cbece8488d66985560f97ed091e22bbc4e9c2338508a9d5de6d4515", size = 106392, upload-time = "2025-09-29T10:05:43.771Z" },
+]
+
+[[package]]
+name = "certifi"
+version = "2025.10.5"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/4c/5b/b6ce21586237c77ce67d01dc5507039d444b630dd76611bbca2d8e5dcd91/certifi-2025.10.5.tar.gz", hash = "sha256:47c09d31ccf2acf0be3f701ea53595ee7e0b8fa08801c6624be771df09ae7b43", size = 164519, upload-time = "2025-10-05T04:12:15.808Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e4/37/af0d2ef3967ac0d6113837b44a4f0bfe1328c2b9763bd5b1744520e5cfed/certifi-2025.10.5-py3-none-any.whl", hash = "sha256:0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de", size = 163286, upload-time = "2025-10-05T04:12:14.03Z" },
+]
+
+[[package]]
+name = "cffi"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pycparser", marker = "implementation_name != 'PyPy'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b1/b7/1200d354378ef52ec227395d95c2576330fd22a869f7a70e88e1447eb234/cffi-2.0.0-cp311-cp311-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:baf5215e0ab74c16e2dd324e8ec067ef59e41125d3eade2b863d294fd5035c92", size = 209613, upload-time = "2025-09-08T23:22:29.475Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/56/6033f5e86e8cc9bb629f0077ba71679508bdf54a9a5e112a3c0b91870332/cffi-2.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:730cacb21e1bdff3ce90babf007d0a0917cc3e6492f336c2f0134101e0944f93", size = 216476, upload-time = "2025-09-08T23:22:31.063Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/7f/55fecd70f7ece178db2f26128ec41430d8720f2d12ca97bf8f0a628207d5/cffi-2.0.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:6824f87845e3396029f3820c206e459ccc91760e8fa24422f8b0c3d1731cbec5", size = 203374, upload-time = "2025-09-08T23:22:32.507Z" },
+    { url = "https://files.pythonhosted.org/packages/84/ef/a7b77c8bdc0f77adc3b46888f1ad54be8f3b7821697a7b89126e829e676a/cffi-2.0.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:9de40a7b0323d889cf8d23d1ef214f565ab154443c42737dfe52ff82cf857664", size = 202597, upload-time = "2025-09-08T23:22:34.132Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/91/500d892b2bf36529a75b77958edfcd5ad8e2ce4064ce2ecfeab2125d72d1/cffi-2.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8941aaadaf67246224cee8c3803777eed332a19d909b47e29c9842ef1e79ac26", size = 215574, upload-time = "2025-09-08T23:22:35.443Z" },
+    { url = "https://files.pythonhosted.org/packages/44/64/58f6255b62b101093d5df22dcb752596066c7e89dd725e0afaed242a61be/cffi-2.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a05d0c237b3349096d3981b727493e22147f934b20f6f125a3eba8f994bec4a9", size = 218971, upload-time = "2025-09-08T23:22:36.805Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/49/fa72cebe2fd8a55fbe14956f9970fe8eb1ac59e5df042f603ef7c8ba0adc/cffi-2.0.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:94698a9c5f91f9d138526b48fe26a199609544591f859c870d477351dc7b2414", size = 211972, upload-time = "2025-09-08T23:22:38.436Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/28/dd0967a76aab36731b6ebfe64dec4e981aff7e0608f60c2d46b46982607d/cffi-2.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:5fed36fccc0612a53f1d4d9a816b50a36702c28a2aa880cb8a122b3466638743", size = 217078, upload-time = "2025-09-08T23:22:39.776Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/df/a4f0fbd47331ceeba3d37c2e51e9dfc9722498becbeec2bd8bc856c9538a/cffi-2.0.0-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe", size = 212529, upload-time = "2025-09-08T23:22:47.349Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/72/12b5f8d3865bf0f87cf1404d8c374e7487dcf097a1c91c436e72e6badd83/cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062", size = 220097, upload-time = "2025-09-08T23:22:48.677Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/95/7a135d52a50dfa7c882ab0ac17e8dc11cec9d55d2c18dda414c051c5e69e/cffi-2.0.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e", size = 207983, upload-time = "2025-09-08T23:22:50.06Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/c8/15cb9ada8895957ea171c62dc78ff3e99159ee7adb13c0123c001a2546c1/cffi-2.0.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037", size = 206519, upload-time = "2025-09-08T23:22:51.364Z" },
+    { url = "https://files.pythonhosted.org/packages/78/2d/7fa73dfa841b5ac06c7b8855cfc18622132e365f5b81d02230333ff26e9e/cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba", size = 219572, upload-time = "2025-09-08T23:22:52.902Z" },
+    { url = "https://files.pythonhosted.org/packages/07/e0/267e57e387b4ca276b90f0434ff88b2c2241ad72b16d31836adddfd6031b/cffi-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94", size = 222963, upload-time = "2025-09-08T23:22:54.518Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/75/1f2747525e06f53efbd878f4d03bac5b859cbc11c633d0fb81432d98a795/cffi-2.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187", size = 221361, upload-time = "2025-09-08T23:22:55.867Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/1e/d22cc63332bd59b06481ceaac49d6c507598642e2230f201649058a7e704/cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b", size = 212446, upload-time = "2025-09-08T23:23:03.472Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/f5/a2c23eb03b61a0b8747f211eb716446c826ad66818ddc7810cc2cc19b3f2/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", size = 220101, upload-time = "2025-09-08T23:23:04.792Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/7f/e6647792fc5850d634695bc0e6ab4111ae88e89981d35ac269956605feba/cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2", size = 207948, upload-time = "2025-09-08T23:23:06.127Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/1e/a5a1bd6f1fb30f22573f76533de12a00bf274abcdc55c8edab639078abb6/cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3", size = 206422, upload-time = "2025-09-08T23:23:07.753Z" },
+    { url = "https://files.pythonhosted.org/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499, upload-time = "2025-09-08T23:23:09.648Z" },
+    { url = "https://files.pythonhosted.org/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928, upload-time = "2025-09-08T23:23:10.928Z" },
+    { url = "https://files.pythonhosted.org/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302, upload-time = "2025-09-08T23:23:12.42Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/43/0e822876f87ea8a4ef95442c3d766a06a51fc5298823f884ef87aaad168c/cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b", size = 220049, upload-time = "2025-09-08T23:23:20.853Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/89/76799151d9c2d2d1ead63c2429da9ea9d7aac304603de0c6e8764e6e8e70/cffi-2.0.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c", size = 207793, upload-time = "2025-09-08T23:23:22.08Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/dd/3465b14bb9e24ee24cb88c9e3730f6de63111fffe513492bf8c808a3547e/cffi-2.0.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef", size = 206300, upload-time = "2025-09-08T23:23:23.314Z" },
+    { url = "https://files.pythonhosted.org/packages/47/d9/d83e293854571c877a92da46fdec39158f8d7e68da75bf73581225d28e90/cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775", size = 219244, upload-time = "2025-09-08T23:23:24.541Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/0f/1f177e3683aead2bb00f7679a16451d302c436b5cbf2505f0ea8146ef59e/cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205", size = 222828, upload-time = "2025-09-08T23:23:26.143Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/0f/cafacebd4b040e3119dcb32fed8bdef8dfe94da653155f9d0b9dc660166e/cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1", size = 220926, upload-time = "2025-09-08T23:23:27.873Z" },
+    { url = "https://files.pythonhosted.org/packages/be/b4/c56878d0d1755cf9caa54ba71e5d049479c52f9e4afc230f06822162ab2f/cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c", size = 221593, upload-time = "2025-09-08T23:23:31.91Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/0d/eb704606dfe8033e7128df5e90fee946bbcb64a04fcdaa97321309004000/cffi-2.0.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8", size = 209354, upload-time = "2025-09-08T23:23:33.214Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/19/3c435d727b368ca475fb8742ab97c9cb13a0de600ce86f62eab7fa3eea60/cffi-2.0.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc", size = 208480, upload-time = "2025-09-08T23:23:34.495Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/44/681604464ed9541673e486521497406fadcc15b5217c3e326b061696899a/cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592", size = 221584, upload-time = "2025-09-08T23:23:36.096Z" },
+    { url = "https://files.pythonhosted.org/packages/25/8e/342a504ff018a2825d395d44d63a767dd8ebc927ebda557fecdaca3ac33a/cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512", size = 224443, upload-time = "2025-09-08T23:23:37.328Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/5e/b666bacbbc60fbf415ba9988324a132c9a7a0448a9a8f125074671c0f2c3/cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4", size = 223437, upload-time = "2025-09-08T23:23:38.945Z" },
+]
+
+[[package]]
+name = "cryptography"
+version = "46.0.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cffi", marker = "platform_python_implementation != 'PyPy'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/9f/33/c00162f49c0e2fe8064a62cb92b93e50c74a72bc370ab92f86112b33ff62/cryptography-46.0.3.tar.gz", hash = "sha256:a8b17438104fed022ce745b362294d9ce35b4c2e45c1d958ad4a4b019285f4a1", size = 749258, upload-time = "2025-10-15T23:18:31.74Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1c/67/38769ca6b65f07461eb200e85fc1639b438bdc667be02cf7f2cd6a64601c/cryptography-46.0.3-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:09859af8466b69bc3c27bdf4f5d84a665e0f7ab5088412e9e2ec49758eca5cbc", size = 4296667, upload-time = "2025-10-15T23:16:54.369Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/49/498c86566a1d80e978b42f0d702795f69887005548c041636df6ae1ca64c/cryptography-46.0.3-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:01ca9ff2885f3acc98c29f1860552e37f6d7c7d013d7334ff2a9de43a449315d", size = 4450807, upload-time = "2025-10-15T23:16:56.414Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/0a/863a3604112174c8624a2ac3c038662d9e59970c7f926acdcfaed8d61142/cryptography-46.0.3-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:6eae65d4c3d33da080cff9c4ab1f711b15c1d9760809dad6ea763f3812d254cb", size = 4299615, upload-time = "2025-10-15T23:16:58.442Z" },
+    { url = "https://files.pythonhosted.org/packages/64/02/b73a533f6b64a69f3cd3872acb6ebc12aef924d8d103133bb3ea750dc703/cryptography-46.0.3-cp311-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5bf0ed4490068a2e72ac03d786693adeb909981cc596425d09032d372bcc849", size = 4016800, upload-time = "2025-10-15T23:17:00.378Z" },
+    { url = "https://files.pythonhosted.org/packages/25/d5/16e41afbfa450cde85a3b7ec599bebefaef16b5c6ba4ec49a3532336ed72/cryptography-46.0.3-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:5ecfccd2329e37e9b7112a888e76d9feca2347f12f37918facbb893d7bb88ee8", size = 4984707, upload-time = "2025-10-15T23:17:01.98Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/56/e7e69b427c3878352c2fb9b450bd0e19ed552753491d39d7d0a2f5226d41/cryptography-46.0.3-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a2c0cd47381a3229c403062f764160d57d4d175e022c1df84e168c6251a22eec", size = 4482541, upload-time = "2025-10-15T23:17:04.078Z" },
+    { url = "https://files.pythonhosted.org/packages/78/f6/50736d40d97e8483172f1bb6e698895b92a223dba513b0ca6f06b2365339/cryptography-46.0.3-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:549e234ff32571b1f4076ac269fcce7a808d3bf98b76c8dd560e42dbc66d7d91", size = 4299464, upload-time = "2025-10-15T23:17:05.483Z" },
+    { url = "https://files.pythonhosted.org/packages/00/de/d8e26b1a855f19d9994a19c702fa2e93b0456beccbcfe437eda00e0701f2/cryptography-46.0.3-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:c0a7bb1a68a5d3471880e264621346c48665b3bf1c3759d682fc0864c540bd9e", size = 4950838, upload-time = "2025-10-15T23:17:07.425Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/29/798fc4ec461a1c9e9f735f2fc58741b0daae30688f41b2497dcbc9ed1355/cryptography-46.0.3-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:10b01676fc208c3e6feeb25a8b83d81767e8059e1fe86e1dc62d10a3018fa926", size = 4481596, upload-time = "2025-10-15T23:17:09.343Z" },
+    { url = "https://files.pythonhosted.org/packages/15/8d/03cd48b20a573adfff7652b76271078e3045b9f49387920e7f1f631d125e/cryptography-46.0.3-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0abf1ffd6e57c67e92af68330d05760b7b7efb243aab8377e583284dbab72c71", size = 4426782, upload-time = "2025-10-15T23:17:11.22Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/b1/ebacbfe53317d55cf33165bda24c86523497a6881f339f9aae5c2e13e57b/cryptography-46.0.3-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a04bee9ab6a4da801eb9b51f1b708a1b5b5c9eb48c03f74198464c66f0d344ac", size = 4698381, upload-time = "2025-10-15T23:17:12.829Z" },
+    { url = "https://files.pythonhosted.org/packages/73/dc/9aa866fbdbb95b02e7f9d086f1fccfeebf8953509b87e3f28fff927ff8a0/cryptography-46.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c8daeb2d2174beb4575b77482320303f3d39b8e81153da4f0fb08eb5fe86a6c5", size = 4288728, upload-time = "2025-10-15T23:17:21.527Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/fd/bc1daf8230eaa075184cbbf5f8cd00ba9db4fd32d63fb83da4671b72ed8a/cryptography-46.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:39b6755623145ad5eff1dab323f4eae2a32a77a7abef2c5089a04a3d04366715", size = 4435078, upload-time = "2025-10-15T23:17:23.042Z" },
+    { url = "https://files.pythonhosted.org/packages/82/98/d3bd5407ce4c60017f8ff9e63ffee4200ab3e23fe05b765cab805a7db008/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:db391fa7c66df6762ee3f00c95a89e6d428f4d60e7abc8328f4fe155b5ac6e54", size = 4293460, upload-time = "2025-10-15T23:17:24.885Z" },
+    { url = "https://files.pythonhosted.org/packages/26/e9/e23e7900983c2b8af7a08098db406cf989d7f09caea7897e347598d4cd5b/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:78a97cf6a8839a48c49271cdcbd5cf37ca2c1d6b7fdd86cc864f302b5e9bf459", size = 3995237, upload-time = "2025-10-15T23:17:26.449Z" },
+    { url = "https://files.pythonhosted.org/packages/91/15/af68c509d4a138cfe299d0d7ddb14afba15233223ebd933b4bbdbc7155d3/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:dfb781ff7eaa91a6f7fd41776ec37c5853c795d3b358d4896fdbb5df168af422", size = 4967344, upload-time = "2025-10-15T23:17:28.06Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/e3/8643d077c53868b681af077edf6b3cb58288b5423610f21c62aadcbe99f4/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:6f61efb26e76c45c4a227835ddeae96d83624fb0d29eb5df5b96e14ed1a0afb7", size = 4466564, upload-time = "2025-10-15T23:17:29.665Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/43/c1e8726fa59c236ff477ff2b5dc071e54b21e5a1e51aa2cee1676f1c986f/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:23b1a8f26e43f47ceb6d6a43115f33a5a37d57df4ea0ca295b780ae8546e8044", size = 4292415, upload-time = "2025-10-15T23:17:31.686Z" },
+    { url = "https://files.pythonhosted.org/packages/42/f9/2f8fefdb1aee8a8e3256a0568cffc4e6d517b256a2fe97a029b3f1b9fe7e/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:b419ae593c86b87014b9be7396b385491ad7f320bde96826d0dd174459e54665", size = 4931457, upload-time = "2025-10-15T23:17:33.478Z" },
+    { url = "https://files.pythonhosted.org/packages/79/30/9b54127a9a778ccd6d27c3da7563e9f2d341826075ceab89ae3b41bf5be2/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:50fc3343ac490c6b08c0cf0d704e881d0d660be923fd3076db3e932007e726e3", size = 4466074, upload-time = "2025-10-15T23:17:35.158Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/68/b4f4a10928e26c941b1b6a179143af9f4d27d88fe84a6a3c53592d2e76bf/cryptography-46.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:22d7e97932f511d6b0b04f2bfd818d73dcd5928db509460aaf48384778eb6d20", size = 4420569, upload-time = "2025-10-15T23:17:37.188Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/49/3746dab4c0d1979888f125226357d3262a6dd40e114ac29e3d2abdf1ec55/cryptography-46.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:d55f3dffadd674514ad19451161118fd010988540cee43d8bc20675e775925de", size = 4681941, upload-time = "2025-10-15T23:17:39.236Z" },
+    { url = "https://files.pythonhosted.org/packages/27/32/b68d27471372737054cbd34c84981f9edbc24fe67ca225d389799614e27f/cryptography-46.0.3-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4b7387121ac7d15e550f5cb4a43aef2559ed759c35df7336c402bb8275ac9683", size = 4294089, upload-time = "2025-10-15T23:17:48.269Z" },
+    { url = "https://files.pythonhosted.org/packages/26/42/fa8389d4478368743e24e61eea78846a0006caffaf72ea24a15159215a14/cryptography-46.0.3-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:15ab9b093e8f09daab0f2159bb7e47532596075139dd74365da52ecc9cb46c5d", size = 4440029, upload-time = "2025-10-15T23:17:49.837Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/eb/f483db0ec5ac040824f269e93dd2bd8a21ecd1027e77ad7bdf6914f2fd80/cryptography-46.0.3-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:46acf53b40ea38f9c6c229599a4a13f0d46a6c3fa9ef19fc1a124d62e338dfa0", size = 4297222, upload-time = "2025-10-15T23:17:51.357Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/cf/da9502c4e1912cb1da3807ea3618a6829bee8207456fbbeebc361ec38ba3/cryptography-46.0.3-cp38-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:10ca84c4668d066a9878890047f03546f3ae0a6b8b39b697457b7757aaf18dbc", size = 4012280, upload-time = "2025-10-15T23:17:52.964Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/8f/9adb86b93330e0df8b3dcf03eae67c33ba89958fc2e03862ef1ac2b42465/cryptography-46.0.3-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:36e627112085bb3b81b19fed209c05ce2a52ee8b15d161b7c643a7d5a88491f3", size = 4978958, upload-time = "2025-10-15T23:17:54.965Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/a0/5fa77988289c34bdb9f913f5606ecc9ada1adb5ae870bd0d1054a7021cc4/cryptography-46.0.3-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:1000713389b75c449a6e979ffc7dcc8ac90b437048766cef052d4d30b8220971", size = 4473714, upload-time = "2025-10-15T23:17:56.754Z" },
+    { url = "https://files.pythonhosted.org/packages/14/e5/fc82d72a58d41c393697aa18c9abe5ae1214ff6f2a5c18ac470f92777895/cryptography-46.0.3-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:b02cf04496f6576afffef5ddd04a0cb7d49cf6be16a9059d793a30b035f6b6ac", size = 4296970, upload-time = "2025-10-15T23:17:58.588Z" },
+    { url = "https://files.pythonhosted.org/packages/78/06/5663ed35438d0b09056973994f1aec467492b33bd31da36e468b01ec1097/cryptography-46.0.3-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:71e842ec9bc7abf543b47cf86b9a743baa95f4677d22baa4c7d5c69e49e9bc04", size = 4940236, upload-time = "2025-10-15T23:18:00.897Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/59/873633f3f2dcd8a053b8dd1d38f783043b5fce589c0f6988bf55ef57e43e/cryptography-46.0.3-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:402b58fc32614f00980b66d6e56a5b4118e6cb362ae8f3fda141ba4689bd4506", size = 4472642, upload-time = "2025-10-15T23:18:02.749Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/39/8e71f3930e40f6877737d6f69248cf74d4e34b886a3967d32f919cc50d3b/cryptography-46.0.3-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ef639cb3372f69ec44915fafcd6698b6cc78fbe0c2ea41be867f6ed612811963", size = 4423126, upload-time = "2025-10-15T23:18:04.85Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/c7/f65027c2810e14c3e7268353b1681932b87e5a48e65505d8cc17c99e36ae/cryptography-46.0.3-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3b51b8ca4f1c6453d8829e1eb7299499ca7f313900dd4d89a24b8b87c0a780d4", size = 4686573, upload-time = "2025-10-15T23:18:06.908Z" },
+    { url = "https://files.pythonhosted.org/packages/da/38/f59940ec4ee91e93d3311f7532671a5cef5570eb04a144bf203b58552d11/cryptography-46.0.3-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:191bb60a7be5e6f54e30ba16fdfae78ad3a342a0599eb4193ba88e3f3d6e185b", size = 4243992, upload-time = "2025-10-15T23:18:18.695Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/0c/35b3d92ddebfdfda76bb485738306545817253d0a3ded0bfe80ef8e67aa5/cryptography-46.0.3-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c70cc23f12726be8f8bc72e41d5065d77e4515efae3690326764ea1b07845cfb", size = 4409944, upload-time = "2025-10-15T23:18:20.597Z" },
+    { url = "https://files.pythonhosted.org/packages/99/55/181022996c4063fc0e7666a47049a1ca705abb9c8a13830f074edb347495/cryptography-46.0.3-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:9394673a9f4de09e28b5356e7fff97d778f8abad85c9d5ac4a4b7e25a0de7717", size = 4242957, upload-time = "2025-10-15T23:18:22.18Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/af/72cd6ef29f9c5f731251acadaeb821559fe25f10852f44a63374c9ca08c1/cryptography-46.0.3-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:94cd0549accc38d1494e1f8de71eca837d0509d0d44bf11d158524b0e12cebf9", size = 4409447, upload-time = "2025-10-15T23:18:24.209Z" },
+]
+
+[[package]]
+name = "frozenlist"
+version = "1.8.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/2d/f5/c831fac6cc817d26fd54c7eaccd04ef7e0288806943f7cc5bbf69f3ac1f0/frozenlist-1.8.0.tar.gz", hash = "sha256:3ede829ed8d842f6cd48fc7081d7a41001a56f1f38603f9d49bf3020d59a31ad", size = 45875, upload-time = "2025-10-06T05:38:17.865Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bc/03/077f869d540370db12165c0aa51640a873fb661d8b315d1d4d67b284d7ac/frozenlist-1.8.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:09474e9831bc2b2199fad6da3c14c7b0fbdd377cce9d3d77131be28906cb7d84", size = 86912, upload-time = "2025-10-06T05:35:45.98Z" },
+    { url = "https://files.pythonhosted.org/packages/df/b5/7610b6bd13e4ae77b96ba85abea1c8cb249683217ef09ac9e0ae93f25a91/frozenlist-1.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:17c883ab0ab67200b5f964d2b9ed6b00971917d5d8a92df149dc2c9779208ee9", size = 50046, upload-time = "2025-10-06T05:35:47.009Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/ef/0e8f1fe32f8a53dd26bdd1f9347efe0778b0fddf62789ea683f4cc7d787d/frozenlist-1.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fa47e444b8ba08fffd1c18e8cdb9a75db1b6a27f17507522834ad13ed5922b93", size = 50119, upload-time = "2025-10-06T05:35:48.38Z" },
+    { url = "https://files.pythonhosted.org/packages/11/b1/71a477adc7c36e5fb628245dfbdea2166feae310757dea848d02bd0689fd/frozenlist-1.8.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2552f44204b744fba866e573be4c1f9048d6a324dfe14475103fd51613eb1d1f", size = 231067, upload-time = "2025-10-06T05:35:49.97Z" },
+    { url = "https://files.pythonhosted.org/packages/45/7e/afe40eca3a2dc19b9904c0f5d7edfe82b5304cb831391edec0ac04af94c2/frozenlist-1.8.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:957e7c38f250991e48a9a73e6423db1bb9dd14e722a10f6b8bb8e16a0f55f695", size = 233160, upload-time = "2025-10-06T05:35:51.729Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/aa/7416eac95603ce428679d273255ffc7c998d4132cfae200103f164b108aa/frozenlist-1.8.0-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:8585e3bb2cdea02fc88ffa245069c36555557ad3609e83be0ec71f54fd4abb52", size = 228544, upload-time = "2025-10-06T05:35:53.246Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/3d/2a2d1f683d55ac7e3875e4263d28410063e738384d3adc294f5ff3d7105e/frozenlist-1.8.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:edee74874ce20a373d62dc28b0b18b93f645633c2943fd90ee9d898550770581", size = 243797, upload-time = "2025-10-06T05:35:54.497Z" },
+    { url = "https://files.pythonhosted.org/packages/78/1e/2d5565b589e580c296d3bb54da08d206e797d941a83a6fdea42af23be79c/frozenlist-1.8.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c9a63152fe95756b85f31186bddf42e4c02c6321207fd6601a1c89ebac4fe567", size = 247923, upload-time = "2025-10-06T05:35:55.861Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/c3/65872fcf1d326a7f101ad4d86285c403c87be7d832b7470b77f6d2ed5ddc/frozenlist-1.8.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b6db2185db9be0a04fecf2f241c70b63b1a242e2805be291855078f2b404dd6b", size = 230886, upload-time = "2025-10-06T05:35:57.399Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/76/ac9ced601d62f6956f03cc794f9e04c81719509f85255abf96e2510f4265/frozenlist-1.8.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:f4be2e3d8bc8aabd566f8d5b8ba7ecc09249d74ba3c9ed52e54dc23a293f0b92", size = 245731, upload-time = "2025-10-06T05:35:58.563Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/49/ecccb5f2598daf0b4a1415497eba4c33c1e8ce07495eb07d2860c731b8d5/frozenlist-1.8.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:c8d1634419f39ea6f5c427ea2f90ca85126b54b50837f31497f3bf38266e853d", size = 241544, upload-time = "2025-10-06T05:35:59.719Z" },
+    { url = "https://files.pythonhosted.org/packages/53/4b/ddf24113323c0bbcc54cb38c8b8916f1da7165e07b8e24a717b4a12cbf10/frozenlist-1.8.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:1a7fa382a4a223773ed64242dbe1c9c326ec09457e6b8428efb4118c685c3dfd", size = 241806, upload-time = "2025-10-06T05:36:00.959Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/fb/9b9a084d73c67175484ba2789a59f8eebebd0827d186a8102005ce41e1ba/frozenlist-1.8.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:11847b53d722050808926e785df837353bd4d75f1d494377e59b23594d834967", size = 229382, upload-time = "2025-10-06T05:36:02.22Z" },
+    { url = "https://files.pythonhosted.org/packages/95/a3/c8fb25aac55bf5e12dae5c5aa6a98f85d436c1dc658f21c3ac73f9fa95e5/frozenlist-1.8.0-cp311-cp311-win32.whl", hash = "sha256:27c6e8077956cf73eadd514be8fb04d77fc946a7fe9f7fe167648b0b9085cc25", size = 39647, upload-time = "2025-10-06T05:36:03.409Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/f5/603d0d6a02cfd4c8f2a095a54672b3cf967ad688a60fb9faf04fc4887f65/frozenlist-1.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:ac913f8403b36a2c8610bbfd25b8013488533e71e62b4b4adce9c86c8cea905b", size = 44064, upload-time = "2025-10-06T05:36:04.368Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/16/c2c9ab44e181f043a86f9a8f84d5124b62dbcb3a02c0977ec72b9ac1d3e0/frozenlist-1.8.0-cp311-cp311-win_arm64.whl", hash = "sha256:d4d3214a0f8394edfa3e303136d0575eece0745ff2b47bd2cb2e66dd92d4351a", size = 39937, upload-time = "2025-10-06T05:36:05.669Z" },
+    { url = "https://files.pythonhosted.org/packages/69/29/948b9aa87e75820a38650af445d2ef2b6b8a6fab1a23b6bb9e4ef0be2d59/frozenlist-1.8.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:78f7b9e5d6f2fdb88cdde9440dc147259b62b9d3b019924def9f6478be254ac1", size = 87782, upload-time = "2025-10-06T05:36:06.649Z" },
+    { url = "https://files.pythonhosted.org/packages/64/80/4f6e318ee2a7c0750ed724fa33a4bdf1eacdc5a39a7a24e818a773cd91af/frozenlist-1.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:229bf37d2e4acdaf808fd3f06e854a4a7a3661e871b10dc1f8f1896a3b05f18b", size = 50594, upload-time = "2025-10-06T05:36:07.69Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/94/5c8a2b50a496b11dd519f4a24cb5496cf125681dd99e94c604ccdea9419a/frozenlist-1.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f833670942247a14eafbb675458b4e61c82e002a148f49e68257b79296e865c4", size = 50448, upload-time = "2025-10-06T05:36:08.78Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/bd/d91c5e39f490a49df14320f4e8c80161cfcce09f1e2cde1edd16a551abb3/frozenlist-1.8.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:494a5952b1c597ba44e0e78113a7266e656b9794eec897b19ead706bd7074383", size = 242411, upload-time = "2025-10-06T05:36:09.801Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/83/f61505a05109ef3293dfb1ff594d13d64a2324ac3482be2cedc2be818256/frozenlist-1.8.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96f423a119f4777a4a056b66ce11527366a8bb92f54e541ade21f2374433f6d4", size = 243014, upload-time = "2025-10-06T05:36:11.394Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/cb/cb6c7b0f7d4023ddda30cf56b8b17494eb3a79e3fda666bf735f63118b35/frozenlist-1.8.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3462dd9475af2025c31cc61be6652dfa25cbfb56cbbf52f4ccfe029f38decaf8", size = 234909, upload-time = "2025-10-06T05:36:12.598Z" },
+    { url = "https://files.pythonhosted.org/packages/31/c5/cd7a1f3b8b34af009fb17d4123c5a778b44ae2804e3ad6b86204255f9ec5/frozenlist-1.8.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c4c800524c9cd9bac5166cd6f55285957fcfc907db323e193f2afcd4d9abd69b", size = 250049, upload-time = "2025-10-06T05:36:14.065Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/01/2f95d3b416c584a1e7f0e1d6d31998c4a795f7544069ee2e0962a4b60740/frozenlist-1.8.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d6a5df73acd3399d893dafc71663ad22534b5aa4f94e8a2fabfe856c3c1b6a52", size = 256485, upload-time = "2025-10-06T05:36:15.39Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/03/024bf7720b3abaebcff6d0793d73c154237b85bdf67b7ed55e5e9596dc9a/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:405e8fe955c2280ce66428b3ca55e12b3c4e9c336fb2103a4937e891c69a4a29", size = 237619, upload-time = "2025-10-06T05:36:16.558Z" },
+    { url = "https://files.pythonhosted.org/packages/69/fa/f8abdfe7d76b731f5d8bd217827cf6764d4f1d9763407e42717b4bed50a0/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:908bd3f6439f2fef9e85031b59fd4f1297af54415fb60e4254a95f75b3cab3f3", size = 250320, upload-time = "2025-10-06T05:36:17.821Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/3c/b051329f718b463b22613e269ad72138cc256c540f78a6de89452803a47d/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:294e487f9ec720bd8ffcebc99d575f7eff3568a08a253d1ee1a0378754b74143", size = 246820, upload-time = "2025-10-06T05:36:19.046Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/ae/58282e8f98e444b3f4dd42448ff36fa38bef29e40d40f330b22e7108f565/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:74c51543498289c0c43656701be6b077f4b265868fa7f8a8859c197006efb608", size = 250518, upload-time = "2025-10-06T05:36:20.763Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/96/007e5944694d66123183845a106547a15944fbbb7154788cbf7272789536/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:776f352e8329135506a1d6bf16ac3f87bc25b28e765949282dcc627af36123aa", size = 239096, upload-time = "2025-10-06T05:36:22.129Z" },
+    { url = "https://files.pythonhosted.org/packages/66/bb/852b9d6db2fa40be96f29c0d1205c306288f0684df8fd26ca1951d461a56/frozenlist-1.8.0-cp312-cp312-win32.whl", hash = "sha256:433403ae80709741ce34038da08511d4a77062aa924baf411ef73d1146e74faf", size = 39985, upload-time = "2025-10-06T05:36:23.661Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/af/38e51a553dd66eb064cdf193841f16f077585d4d28394c2fa6235cb41765/frozenlist-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:34187385b08f866104f0c0617404c8eb08165ab1272e884abc89c112e9c00746", size = 44591, upload-time = "2025-10-06T05:36:24.958Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/06/1dc65480ab147339fecc70797e9c2f69d9cea9cf38934ce08df070fdb9cb/frozenlist-1.8.0-cp312-cp312-win_arm64.whl", hash = "sha256:fe3c58d2f5db5fbd18c2987cba06d51b0529f52bc3a6cdc33d3f4eab725104bd", size = 40102, upload-time = "2025-10-06T05:36:26.333Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/40/0832c31a37d60f60ed79e9dfb5a92e1e2af4f40a16a29abcc7992af9edff/frozenlist-1.8.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8d92f1a84bb12d9e56f818b3a746f3efba93c1b63c8387a73dde655e1e42282a", size = 85717, upload-time = "2025-10-06T05:36:27.341Z" },
+    { url = "https://files.pythonhosted.org/packages/30/ba/b0b3de23f40bc55a7057bd38434e25c34fa48e17f20ee273bbde5e0650f3/frozenlist-1.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:96153e77a591c8adc2ee805756c61f59fef4cf4073a9275ee86fe8cba41241f7", size = 49651, upload-time = "2025-10-06T05:36:28.855Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/ab/6e5080ee374f875296c4243c381bbdef97a9ac39c6e3ce1d5f7d42cb78d6/frozenlist-1.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f21f00a91358803399890ab167098c131ec2ddd5f8f5fd5fe9c9f2c6fcd91e40", size = 49417, upload-time = "2025-10-06T05:36:29.877Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/4e/e4691508f9477ce67da2015d8c00acd751e6287739123113a9fca6f1604e/frozenlist-1.8.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:fb30f9626572a76dfe4293c7194a09fb1fe93ba94c7d4f720dfae3b646b45027", size = 234391, upload-time = "2025-10-06T05:36:31.301Z" },
+    { url = "https://files.pythonhosted.org/packages/40/76/c202df58e3acdf12969a7895fd6f3bc016c642e6726aa63bd3025e0fc71c/frozenlist-1.8.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eaa352d7047a31d87dafcacbabe89df0aa506abb5b1b85a2fb91bc3faa02d822", size = 233048, upload-time = "2025-10-06T05:36:32.531Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/c0/8746afb90f17b73ca5979c7a3958116e105ff796e718575175319b5bb4ce/frozenlist-1.8.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:03ae967b4e297f58f8c774c7eabcce57fe3c2434817d4385c50661845a058121", size = 226549, upload-time = "2025-10-06T05:36:33.706Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/eb/4c7eefc718ff72f9b6c4893291abaae5fbc0c82226a32dcd8ef4f7a5dbef/frozenlist-1.8.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f6292f1de555ffcc675941d65fffffb0a5bcd992905015f85d0592201793e0e5", size = 239833, upload-time = "2025-10-06T05:36:34.947Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/4e/e5c02187cf704224f8b21bee886f3d713ca379535f16893233b9d672ea71/frozenlist-1.8.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:29548f9b5b5e3460ce7378144c3010363d8035cea44bc0bf02d57f5a685e084e", size = 245363, upload-time = "2025-10-06T05:36:36.534Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/96/cb85ec608464472e82ad37a17f844889c36100eed57bea094518bf270692/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ec3cc8c5d4084591b4237c0a272cc4f50a5b03396a47d9caaf76f5d7b38a4f11", size = 229314, upload-time = "2025-10-06T05:36:38.582Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/6f/4ae69c550e4cee66b57887daeebe006fe985917c01d0fff9caab9883f6d0/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:517279f58009d0b1f2e7c1b130b377a349405da3f7621ed6bfae50b10adf20c1", size = 243365, upload-time = "2025-10-06T05:36:40.152Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/58/afd56de246cf11780a40a2c28dc7cbabbf06337cc8ddb1c780a2d97e88d8/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:db1e72ede2d0d7ccb213f218df6a078a9c09a7de257c2fe8fcef16d5925230b1", size = 237763, upload-time = "2025-10-06T05:36:41.355Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/36/cdfaf6ed42e2644740d4a10452d8e97fa1c062e2a8006e4b09f1b5fd7d63/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:b4dec9482a65c54a5044486847b8a66bf10c9cb4926d42927ec4e8fd5db7fed8", size = 240110, upload-time = "2025-10-06T05:36:42.716Z" },
+    { url = "https://files.pythonhosted.org/packages/03/a8/9ea226fbefad669f11b52e864c55f0bd57d3c8d7eb07e9f2e9a0b39502e1/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:21900c48ae04d13d416f0e1e0c4d81f7931f73a9dfa0b7a8746fb2fe7dd970ed", size = 233717, upload-time = "2025-10-06T05:36:44.251Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/0b/1b5531611e83ba7d13ccc9988967ea1b51186af64c42b7a7af465dcc9568/frozenlist-1.8.0-cp313-cp313-win32.whl", hash = "sha256:8b7b94a067d1c504ee0b16def57ad5738701e4ba10cec90529f13fa03c833496", size = 39628, upload-time = "2025-10-06T05:36:45.423Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/cf/174c91dbc9cc49bc7b7aab74d8b734e974d1faa8f191c74af9b7e80848e6/frozenlist-1.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:878be833caa6a3821caf85eb39c5ba92d28e85df26d57afb06b35b2efd937231", size = 43882, upload-time = "2025-10-06T05:36:46.796Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/17/502cd212cbfa96eb1388614fe39a3fc9ab87dbbe042b66f97acb57474834/frozenlist-1.8.0-cp313-cp313-win_arm64.whl", hash = "sha256:44389d135b3ff43ba8cc89ff7f51f5a0bb6b63d829c8300f79a2fe4fe61bcc62", size = 39676, upload-time = "2025-10-06T05:36:47.8Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/5c/3bbfaa920dfab09e76946a5d2833a7cbdf7b9b4a91c714666ac4855b88b4/frozenlist-1.8.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:e25ac20a2ef37e91c1b39938b591457666a0fa835c7783c3a8f33ea42870db94", size = 89235, upload-time = "2025-10-06T05:36:48.78Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/d6/f03961ef72166cec1687e84e8925838442b615bd0b8854b54923ce5b7b8a/frozenlist-1.8.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:07cdca25a91a4386d2e76ad992916a85038a9b97561bf7a3fd12d5d9ce31870c", size = 50742, upload-time = "2025-10-06T05:36:49.837Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/bb/a6d12b7ba4c3337667d0e421f7181c82dda448ce4e7ad7ecd249a16fa806/frozenlist-1.8.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4e0c11f2cc6717e0a741f84a527c52616140741cd812a50422f83dc31749fb52", size = 51725, upload-time = "2025-10-06T05:36:50.851Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/71/d1fed0ffe2c2ccd70b43714c6cab0f4188f09f8a67a7914a6b46ee30f274/frozenlist-1.8.0-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b3210649ee28062ea6099cfda39e147fa1bc039583c8ee4481cb7811e2448c51", size = 284533, upload-time = "2025-10-06T05:36:51.898Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/1f/fb1685a7b009d89f9bf78a42d94461bc06581f6e718c39344754a5d9bada/frozenlist-1.8.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:581ef5194c48035a7de2aefc72ac6539823bb71508189e5de01d60c9dcd5fa65", size = 292506, upload-time = "2025-10-06T05:36:53.101Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/3b/b991fe1612703f7e0d05c0cf734c1b77aaf7c7d321df4572e8d36e7048c8/frozenlist-1.8.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3ef2d026f16a2b1866e1d86fc4e1291e1ed8a387b2c333809419a2f8b3a77b82", size = 274161, upload-time = "2025-10-06T05:36:54.309Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/ec/c5c618767bcdf66e88945ec0157d7f6c4a1322f1473392319b7a2501ded7/frozenlist-1.8.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5500ef82073f599ac84d888e3a8c1f77ac831183244bfd7f11eaa0289fb30714", size = 294676, upload-time = "2025-10-06T05:36:55.566Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/ce/3934758637d8f8a88d11f0585d6495ef54b2044ed6ec84492a91fa3b27aa/frozenlist-1.8.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:50066c3997d0091c411a66e710f4e11752251e6d2d73d70d8d5d4c76442a199d", size = 300638, upload-time = "2025-10-06T05:36:56.758Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/4f/a7e4d0d467298f42de4b41cbc7ddaf19d3cfeabaf9ff97c20c6c7ee409f9/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5c1c8e78426e59b3f8005e9b19f6ff46e5845895adbde20ece9218319eca6506", size = 283067, upload-time = "2025-10-06T05:36:57.965Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/48/c7b163063d55a83772b268e6d1affb960771b0e203b632cfe09522d67ea5/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:eefdba20de0d938cec6a89bd4d70f346a03108a19b9df4248d3cf0d88f1b0f51", size = 292101, upload-time = "2025-10-06T05:36:59.237Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/d0/2366d3c4ecdc2fd391e0afa6e11500bfba0ea772764d631bbf82f0136c9d/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:cf253e0e1c3ceb4aaff6df637ce033ff6535fb8c70a764a8f46aafd3d6ab798e", size = 289901, upload-time = "2025-10-06T05:37:00.811Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/94/daff920e82c1b70e3618a2ac39fbc01ae3e2ff6124e80739ce5d71c9b920/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:032efa2674356903cd0261c4317a561a6850f3ac864a63fc1583147fb05a79b0", size = 289395, upload-time = "2025-10-06T05:37:02.115Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/20/bba307ab4235a09fdcd3cc5508dbabd17c4634a1af4b96e0f69bfe551ebd/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6da155091429aeba16851ecb10a9104a108bcd32f6c1642867eadaee401c1c41", size = 283659, upload-time = "2025-10-06T05:37:03.711Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/00/04ca1c3a7a124b6de4f8a9a17cc2fcad138b4608e7a3fc5877804b8715d7/frozenlist-1.8.0-cp313-cp313t-win32.whl", hash = "sha256:0f96534f8bfebc1a394209427d0f8a63d343c9779cda6fc25e8e121b5fd8555b", size = 43492, upload-time = "2025-10-06T05:37:04.915Z" },
+    { url = "https://files.pythonhosted.org/packages/59/5e/c69f733a86a94ab10f68e496dc6b7e8bc078ebb415281d5698313e3af3a1/frozenlist-1.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5d63a068f978fc69421fb0e6eb91a9603187527c86b7cd3f534a5b77a592b888", size = 48034, upload-time = "2025-10-06T05:37:06.343Z" },
+    { url = "https://files.pythonhosted.org/packages/16/6c/be9d79775d8abe79b05fa6d23da99ad6e7763a1d080fbae7290b286093fd/frozenlist-1.8.0-cp313-cp313t-win_arm64.whl", hash = "sha256:bf0a7e10b077bf5fb9380ad3ae8ce20ef919a6ad93b4552896419ac7e1d8e042", size = 41749, upload-time = "2025-10-06T05:37:07.431Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/c8/85da824b7e7b9b6e7f7705b2ecaf9591ba6f79c1177f324c2735e41d36a2/frozenlist-1.8.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:cee686f1f4cadeb2136007ddedd0aaf928ab95216e7691c63e50a8ec066336d0", size = 86127, upload-time = "2025-10-06T05:37:08.438Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/e8/a1185e236ec66c20afd72399522f142c3724c785789255202d27ae992818/frozenlist-1.8.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:119fb2a1bd47307e899c2fac7f28e85b9a543864df47aa7ec9d3c1b4545f096f", size = 49698, upload-time = "2025-10-06T05:37:09.48Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/93/72b1736d68f03fda5fdf0f2180fb6caaae3894f1b854d006ac61ecc727ee/frozenlist-1.8.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4970ece02dbc8c3a92fcc5228e36a3e933a01a999f7094ff7c23fbd2beeaa67c", size = 49749, upload-time = "2025-10-06T05:37:10.569Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/b2/fabede9fafd976b991e9f1b9c8c873ed86f202889b864756f240ce6dd855/frozenlist-1.8.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:cba69cb73723c3f329622e34bdbf5ce1f80c21c290ff04256cff1cd3c2036ed2", size = 231298, upload-time = "2025-10-06T05:37:11.993Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/3b/d9b1e0b0eed36e70477ffb8360c49c85c8ca8ef9700a4e6711f39a6e8b45/frozenlist-1.8.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:778a11b15673f6f1df23d9586f83c4846c471a8af693a22e066508b77d201ec8", size = 232015, upload-time = "2025-10-06T05:37:13.194Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/94/be719d2766c1138148564a3960fc2c06eb688da592bdc25adcf856101be7/frozenlist-1.8.0-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0325024fe97f94c41c08872db482cf8ac4800d80e79222c6b0b7b162d5b13686", size = 225038, upload-time = "2025-10-06T05:37:14.577Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/09/6712b6c5465f083f52f50cf74167b92d4ea2f50e46a9eea0523d658454ae/frozenlist-1.8.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:97260ff46b207a82a7567b581ab4190bd4dfa09f4db8a8b49d1a958f6aa4940e", size = 240130, upload-time = "2025-10-06T05:37:15.781Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/d4/cd065cdcf21550b54f3ce6a22e143ac9e4836ca42a0de1022da8498eac89/frozenlist-1.8.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:54b2077180eb7f83dd52c40b2750d0a9f175e06a42e3213ce047219de902717a", size = 242845, upload-time = "2025-10-06T05:37:17.037Z" },
+    { url = "https://files.pythonhosted.org/packages/62/c3/f57a5c8c70cd1ead3d5d5f776f89d33110b1addae0ab010ad774d9a44fb9/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2f05983daecab868a31e1da44462873306d3cbfd76d1f0b5b69c473d21dbb128", size = 229131, upload-time = "2025-10-06T05:37:18.221Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/52/232476fe9cb64f0742f3fde2b7d26c1dac18b6d62071c74d4ded55e0ef94/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:33f48f51a446114bc5d251fb2954ab0164d5be02ad3382abcbfe07e2531d650f", size = 240542, upload-time = "2025-10-06T05:37:19.771Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/85/07bf3f5d0fb5414aee5f47d33c6f5c77bfe49aac680bfece33d4fdf6a246/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:154e55ec0655291b5dd1b8731c637ecdb50975a2ae70c606d100750a540082f7", size = 237308, upload-time = "2025-10-06T05:37:20.969Z" },
+    { url = "https://files.pythonhosted.org/packages/11/99/ae3a33d5befd41ac0ca2cc7fd3aa707c9c324de2e89db0e0f45db9a64c26/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:4314debad13beb564b708b4a496020e5306c7333fa9a3ab90374169a20ffab30", size = 238210, upload-time = "2025-10-06T05:37:22.252Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/60/b1d2da22f4970e7a155f0adde9b1435712ece01b3cd45ba63702aea33938/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:073f8bf8becba60aa931eb3bc420b217bb7d5b8f4750e6f8b3be7f3da85d38b7", size = 231972, upload-time = "2025-10-06T05:37:23.5Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/ab/945b2f32de889993b9c9133216c068b7fcf257d8595a0ac420ac8677cab0/frozenlist-1.8.0-cp314-cp314-win32.whl", hash = "sha256:bac9c42ba2ac65ddc115d930c78d24ab8d4f465fd3fc473cdedfccadb9429806", size = 40536, upload-time = "2025-10-06T05:37:25.581Z" },
+    { url = "https://files.pythonhosted.org/packages/59/ad/9caa9b9c836d9ad6f067157a531ac48b7d36499f5036d4141ce78c230b1b/frozenlist-1.8.0-cp314-cp314-win_amd64.whl", hash = "sha256:3e0761f4d1a44f1d1a47996511752cf3dcec5bbdd9cc2b4fe595caf97754b7a0", size = 44330, upload-time = "2025-10-06T05:37:26.928Z" },
+    { url = "https://files.pythonhosted.org/packages/82/13/e6950121764f2676f43534c555249f57030150260aee9dcf7d64efda11dd/frozenlist-1.8.0-cp314-cp314-win_arm64.whl", hash = "sha256:d1eaff1d00c7751b7c6662e9c5ba6eb2c17a2306ba5e2a37f24ddf3cc953402b", size = 40627, upload-time = "2025-10-06T05:37:28.075Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/c7/43200656ecc4e02d3f8bc248df68256cd9572b3f0017f0a0c4e93440ae23/frozenlist-1.8.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:d3bb933317c52d7ea5004a1c442eef86f426886fba134ef8cf4226ea6ee1821d", size = 89238, upload-time = "2025-10-06T05:37:29.373Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/29/55c5f0689b9c0fb765055629f472c0de484dcaf0acee2f7707266ae3583c/frozenlist-1.8.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:8009897cdef112072f93a0efdce29cd819e717fd2f649ee3016efd3cd885a7ed", size = 50738, upload-time = "2025-10-06T05:37:30.792Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/7d/b7282a445956506fa11da8c2db7d276adcbf2b17d8bb8407a47685263f90/frozenlist-1.8.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2c5dcbbc55383e5883246d11fd179782a9d07a986c40f49abe89ddf865913930", size = 51739, upload-time = "2025-10-06T05:37:32.127Z" },
+    { url = "https://files.pythonhosted.org/packages/62/1c/3d8622e60d0b767a5510d1d3cf21065b9db874696a51ea6d7a43180a259c/frozenlist-1.8.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:39ecbc32f1390387d2aa4f5a995e465e9e2f79ba3adcac92d68e3e0afae6657c", size = 284186, upload-time = "2025-10-06T05:37:33.21Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/14/aa36d5f85a89679a85a1d44cd7a6657e0b1c75f61e7cad987b203d2daca8/frozenlist-1.8.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92db2bf818d5cc8d9c1f1fc56b897662e24ea5adb36ad1f1d82875bd64e03c24", size = 292196, upload-time = "2025-10-06T05:37:36.107Z" },
+    { url = "https://files.pythonhosted.org/packages/05/23/6bde59eb55abd407d34f77d39a5126fb7b4f109a3f611d3929f14b700c66/frozenlist-1.8.0-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:2dc43a022e555de94c3b68a4ef0b11c4f747d12c024a520c7101709a2144fb37", size = 273830, upload-time = "2025-10-06T05:37:37.663Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/3f/22cff331bfad7a8afa616289000ba793347fcd7bc275f3b28ecea2a27909/frozenlist-1.8.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:cb89a7f2de3602cfed448095bab3f178399646ab7c61454315089787df07733a", size = 294289, upload-time = "2025-10-06T05:37:39.261Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/89/5b057c799de4838b6c69aa82b79705f2027615e01be996d2486a69ca99c4/frozenlist-1.8.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:33139dc858c580ea50e7e60a1b0ea003efa1fd42e6ec7fdbad78fff65fad2fd2", size = 300318, upload-time = "2025-10-06T05:37:43.213Z" },
+    { url = "https://files.pythonhosted.org/packages/30/de/2c22ab3eb2a8af6d69dc799e48455813bab3690c760de58e1bf43b36da3e/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:168c0969a329b416119507ba30b9ea13688fafffac1b7822802537569a1cb0ef", size = 282814, upload-time = "2025-10-06T05:37:45.337Z" },
+    { url = "https://files.pythonhosted.org/packages/59/f7/970141a6a8dbd7f556d94977858cfb36fa9b66e0892c6dd780d2219d8cd8/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:28bd570e8e189d7f7b001966435f9dac6718324b5be2990ac496cf1ea9ddb7fe", size = 291762, upload-time = "2025-10-06T05:37:46.657Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/15/ca1adae83a719f82df9116d66f5bb28bb95557b3951903d39135620ef157/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:b2a095d45c5d46e5e79ba1e5b9cb787f541a8dee0433836cea4b96a2c439dcd8", size = 289470, upload-time = "2025-10-06T05:37:47.946Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/83/dca6dc53bf657d371fbc88ddeb21b79891e747189c5de990b9dfff2ccba1/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:eab8145831a0d56ec9c4139b6c3e594c7a83c2c8be25d5bcf2d86136a532287a", size = 289042, upload-time = "2025-10-06T05:37:49.499Z" },
+    { url = "https://files.pythonhosted.org/packages/96/52/abddd34ca99be142f354398700536c5bd315880ed0a213812bc491cff5e4/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:974b28cf63cc99dfb2188d8d222bc6843656188164848c4f679e63dae4b0708e", size = 283148, upload-time = "2025-10-06T05:37:50.745Z" },
+    { url = "https://files.pythonhosted.org/packages/af/d3/76bd4ed4317e7119c2b7f57c3f6934aba26d277acc6309f873341640e21f/frozenlist-1.8.0-cp314-cp314t-win32.whl", hash = "sha256:342c97bf697ac5480c0a7ec73cd700ecfa5a8a40ac923bd035484616efecc2df", size = 44676, upload-time = "2025-10-06T05:37:52.222Z" },
+    { url = "https://files.pythonhosted.org/packages/89/76/c615883b7b521ead2944bb3480398cbb07e12b7b4e4d073d3752eb721558/frozenlist-1.8.0-cp314-cp314t-win_amd64.whl", hash = "sha256:06be8f67f39c8b1dc671f5d83aaefd3358ae5cdcf8314552c57e7ed3e6475bdd", size = 49451, upload-time = "2025-10-06T05:37:53.425Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/a3/5982da14e113d07b325230f95060e2169f5311b1017ea8af2a29b374c289/frozenlist-1.8.0-cp314-cp314t-win_arm64.whl", hash = "sha256:102e6314ca4da683dca92e3b1355490fed5f313b768500084fbe6371fddfdb79", size = 42507, upload-time = "2025-10-06T05:37:54.513Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/9a/e35b4a917281c0b8419d4207f4334c8e8c5dbf4f3f5f9ada73958d937dcc/frozenlist-1.8.0-py3-none-any.whl", hash = "sha256:0c18a16eab41e82c295618a77502e17b195883241c563b00f0aa5106fc4eaa0d", size = 13409, upload-time = "2025-10-06T05:38:16.721Z" },
+]
+
+[[package]]
+name = "idna"
+version = "3.11"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
+]
+
+[[package]]
+name = "importlib-metadata"
+version = "8.7.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "zipp" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/76/66/650a33bd90f786193e4de4b3ad86ea60b53c89b669a5c7be931fac31cdb0/importlib_metadata-8.7.0.tar.gz", hash = "sha256:d13b81ad223b890aa16c5471f2ac3056cf76c5f10f82d6f9292f0b415f389000", size = 56641, upload-time = "2025-04-27T15:29:01.736Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656, upload-time = "2025-04-27T15:29:00.214Z" },
+]
+
+[[package]]
+name = "jaraco-classes"
+version = "3.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "more-itertools" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/06/c0/ed4a27bc5571b99e3cff68f8a9fa5b56ff7df1c2251cc715a652ddd26402/jaraco.classes-3.4.0.tar.gz", hash = "sha256:47a024b51d0239c0dd8c8540c6c7f484be3b8fcf0b2d85c13825780d3b3f3acd", size = 11780, upload-time = "2024-03-31T07:27:36.643Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7f/66/b15ce62552d84bbfcec9a4873ab79d993a1dd4edb922cbfccae192bd5b5f/jaraco.classes-3.4.0-py3-none-any.whl", hash = "sha256:f662826b6bed8cace05e7ff873ce0f9283b5c924470fe664fff1c2f00f581790", size = 6777, upload-time = "2024-03-31T07:27:34.792Z" },
+]
+
+[[package]]
+name = "jaraco-context"
+version = "6.0.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "backports-tarfile", marker = "python_full_version < '3.12'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/df/ad/f3777b81bf0b6e7bc7514a1656d3e637b2e8e15fab2ce3235730b3e7a4e6/jaraco_context-6.0.1.tar.gz", hash = "sha256:9bae4ea555cf0b14938dc0aee7c9f32ed303aa20a3b73e7dc80111628792d1b3", size = 13912, upload-time = "2024-08-20T03:39:27.358Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ff/db/0c52c4cf5e4bd9f5d7135ec7669a3a767af21b3a308e1ed3674881e52b62/jaraco.context-6.0.1-py3-none-any.whl", hash = "sha256:f797fc481b490edb305122c9181830a3a5b76d84ef6d1aef2fb9b47ab956f9e4", size = 6825, upload-time = "2024-08-20T03:39:25.966Z" },
+]
+
+[[package]]
+name = "jaraco-functools"
+version = "4.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "more-itertools" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f7/ed/1aa2d585304ec07262e1a83a9889880701079dde796ac7b1d1826f40c63d/jaraco_functools-4.3.0.tar.gz", hash = "sha256:cfd13ad0dd2c47a3600b439ef72d8615d482cedcff1632930d6f28924d92f294", size = 19755, upload-time = "2025-08-18T20:05:09.91Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b4/09/726f168acad366b11e420df31bf1c702a54d373a83f968d94141a8c3fde0/jaraco_functools-4.3.0-py3-none-any.whl", hash = "sha256:227ff8ed6f7b8f62c56deff101545fa7543cf2c8e7b82a7c2116e672f29c26e8", size = 10408, upload-time = "2025-08-18T20:05:08.69Z" },
+]
+
+[[package]]
+name = "jeepney"
+version = "0.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7b/6f/357efd7602486741aa73ffc0617fb310a29b588ed0fd69c2399acbb85b0c/jeepney-0.9.0.tar.gz", hash = "sha256:cf0e9e845622b81e4a28df94c40345400256ec608d0e55bb8a3feaa9163f5732", size = 106758, upload-time = "2025-02-27T18:51:01.684Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b2/a3/e137168c9c44d18eff0376253da9f1e9234d0239e0ee230d2fee6cea8e55/jeepney-0.9.0-py3-none-any.whl", hash = "sha256:97e5714520c16fc0a45695e5365a2e11b81ea79bba796e26f9f1d178cb182683", size = 49010, upload-time = "2025-02-27T18:51:00.104Z" },
+]
+
+[[package]]
+name = "keyring"
+version = "25.6.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "importlib-metadata", marker = "python_full_version < '3.12'" },
+    { name = "jaraco-classes" },
+    { name = "jaraco-context" },
+    { name = "jaraco-functools" },
+    { name = "jeepney", marker = "sys_platform == 'linux'" },
+    { name = "pywin32-ctypes", marker = "sys_platform == 'win32'" },
+    { name = "secretstorage", marker = "sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/70/09/d904a6e96f76ff214be59e7aa6ef7190008f52a0ab6689760a98de0bf37d/keyring-25.6.0.tar.gz", hash = "sha256:0b39998aa941431eb3d9b0d4b2460bc773b9df6fed7621c2dfb291a7e0187a66", size = 62750, upload-time = "2024-12-25T15:26:45.782Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d3/32/da7f44bcb1105d3e88a0b74ebdca50c59121d2ddf71c9e34ba47df7f3a56/keyring-25.6.0-py3-none-any.whl", hash = "sha256:552a3f7af126ece7ed5c89753650eec89c7eaae8617d0aa4d9ad2b75111266bd", size = 39085, upload-time = "2024-12-25T15:26:44.377Z" },
+]
+
+[[package]]
+name = "markdown-it-py"
+version = "4.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mdurl" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" },
+]
+
+[[package]]
+name = "mdurl"
+version = "0.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
+]
+
+[[package]]
+name = "more-itertools"
+version = "10.8.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ea/5d/38b681d3fce7a266dd9ab73c66959406d565b3e85f21d5e66e1181d93721/more_itertools-10.8.0.tar.gz", hash = "sha256:f638ddf8a1a0d134181275fb5d58b086ead7c6a72429ad725c67503f13ba30bd", size = 137431, upload-time = "2025-09-02T15:23:11.018Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a4/8e/469e5a4a2f5855992e425f3cb33804cc07bf18d48f2db061aec61ce50270/more_itertools-10.8.0-py3-none-any.whl", hash = "sha256:52d4362373dcf7c52546bc4af9a86ee7c4579df9a8dc268be0a2f949d376cc9b", size = 69667, upload-time = "2025-09-02T15:23:09.635Z" },
+]
+
+[[package]]
+name = "multidict"
+version = "6.7.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/80/1e/5492c365f222f907de1039b91f922b93fa4f764c713ee858d235495d8f50/multidict-6.7.0.tar.gz", hash = "sha256:c6e99d9a65ca282e578dfea819cfa9c0a62b2499d8677392e09feaf305e9e6f5", size = 101834, upload-time = "2025-10-06T14:52:30.657Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/34/9e/5c727587644d67b2ed479041e4b1c58e30afc011e3d45d25bbe35781217c/multidict-6.7.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4d409aa42a94c0b3fa617708ef5276dfe81012ba6753a0370fcc9d0195d0a1fc", size = 76604, upload-time = "2025-10-06T14:48:54.277Z" },
+    { url = "https://files.pythonhosted.org/packages/17/e4/67b5c27bd17c085a5ea8f1ec05b8a3e5cba0ca734bfcad5560fb129e70ca/multidict-6.7.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:14c9e076eede3b54c636f8ce1c9c252b5f057c62131211f0ceeec273810c9721", size = 44715, upload-time = "2025-10-06T14:48:55.445Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/e1/866a5d77be6ea435711bef2a4291eed11032679b6b28b56b4776ab06ba3e/multidict-6.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4c09703000a9d0fa3c3404b27041e574cc7f4df4c6563873246d0e11812a94b6", size = 44332, upload-time = "2025-10-06T14:48:56.706Z" },
+    { url = "https://files.pythonhosted.org/packages/31/61/0c2d50241ada71ff61a79518db85ada85fdabfcf395d5968dae1cbda04e5/multidict-6.7.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a265acbb7bb33a3a2d626afbe756371dce0279e7b17f4f4eda406459c2b5ff1c", size = 245212, upload-time = "2025-10-06T14:48:58.042Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/e0/919666a4e4b57fff1b57f279be1c9316e6cdc5de8a8b525d76f6598fefc7/multidict-6.7.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:51cb455de290ae462593e5b1cb1118c5c22ea7f0d3620d9940bf695cea5a4bd7", size = 246671, upload-time = "2025-10-06T14:49:00.004Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/cc/d027d9c5a520f3321b65adea289b965e7bcbd2c34402663f482648c716ce/multidict-6.7.0-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:db99677b4457c7a5c5a949353e125ba72d62b35f74e26da141530fbb012218a7", size = 225491, upload-time = "2025-10-06T14:49:01.393Z" },
+    { url = "https://files.pythonhosted.org/packages/75/c4/bbd633980ce6155a28ff04e6a6492dd3335858394d7bb752d8b108708558/multidict-6.7.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f470f68adc395e0183b92a2f4689264d1ea4b40504a24d9882c27375e6662bb9", size = 257322, upload-time = "2025-10-06T14:49:02.745Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/6d/d622322d344f1f053eae47e033b0b3f965af01212de21b10bcf91be991fb/multidict-6.7.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0db4956f82723cc1c270de9c6e799b4c341d327762ec78ef82bb962f79cc07d8", size = 254694, upload-time = "2025-10-06T14:49:04.15Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/9f/78f8761c2705d4c6d7516faed63c0ebdac569f6db1bef95e0d5218fdc146/multidict-6.7.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3e56d780c238f9e1ae66a22d2adf8d16f485381878250db8d496623cd38b22bd", size = 246715, upload-time = "2025-10-06T14:49:05.967Z" },
+    { url = "https://files.pythonhosted.org/packages/78/59/950818e04f91b9c2b95aab3d923d9eabd01689d0dcd889563988e9ea0fd8/multidict-6.7.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9d14baca2ee12c1a64740d4531356ba50b82543017f3ad6de0deb943c5979abb", size = 243189, upload-time = "2025-10-06T14:49:07.37Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/3d/77c79e1934cad2ee74991840f8a0110966d9599b3af95964c0cd79bb905b/multidict-6.7.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:295a92a76188917c7f99cda95858c822f9e4aae5824246bba9b6b44004ddd0a6", size = 237845, upload-time = "2025-10-06T14:49:08.759Z" },
+    { url = "https://files.pythonhosted.org/packages/63/1b/834ce32a0a97a3b70f86437f685f880136677ac00d8bce0027e9fd9c2db7/multidict-6.7.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:39f1719f57adbb767ef592a50ae5ebb794220d1188f9ca93de471336401c34d2", size = 246374, upload-time = "2025-10-06T14:49:10.574Z" },
+    { url = "https://files.pythonhosted.org/packages/23/ef/43d1c3ba205b5dec93dc97f3fba179dfa47910fc73aaaea4f7ceb41cec2a/multidict-6.7.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:0a13fb8e748dfc94749f622de065dd5c1def7e0d2216dba72b1d8069a389c6ff", size = 253345, upload-time = "2025-10-06T14:49:12.331Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/03/eaf95bcc2d19ead522001f6a650ef32811aa9e3624ff0ad37c445c7a588c/multidict-6.7.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:e3aa16de190d29a0ea1b48253c57d99a68492c8dd8948638073ab9e74dc9410b", size = 246940, upload-time = "2025-10-06T14:49:13.821Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/df/ec8a5fd66ea6cd6f525b1fcbb23511b033c3e9bc42b81384834ffa484a62/multidict-6.7.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a048ce45dcdaaf1defb76b2e684f997fb5abf74437b6cb7b22ddad934a964e34", size = 242229, upload-time = "2025-10-06T14:49:15.603Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/a2/59b405d59fd39ec86d1142630e9049243015a5f5291ba49cadf3c090c541/multidict-6.7.0-cp311-cp311-win32.whl", hash = "sha256:a90af66facec4cebe4181b9e62a68be65e45ac9b52b67de9eec118701856e7ff", size = 41308, upload-time = "2025-10-06T14:49:16.871Z" },
+    { url = "https://files.pythonhosted.org/packages/32/0f/13228f26f8b882c34da36efa776c3b7348455ec383bab4a66390e42963ae/multidict-6.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:95b5ffa4349df2887518bb839409bcf22caa72d82beec453216802f475b23c81", size = 46037, upload-time = "2025-10-06T14:49:18.457Z" },
+    { url = "https://files.pythonhosted.org/packages/84/1f/68588e31b000535a3207fd3c909ebeec4fb36b52c442107499c18a896a2a/multidict-6.7.0-cp311-cp311-win_arm64.whl", hash = "sha256:329aa225b085b6f004a4955271a7ba9f1087e39dcb7e65f6284a988264a63912", size = 43023, upload-time = "2025-10-06T14:49:19.648Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/9e/9f61ac18d9c8b475889f32ccfa91c9f59363480613fc807b6e3023d6f60b/multidict-6.7.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:8a3862568a36d26e650a19bb5cbbba14b71789032aebc0423f8cc5f150730184", size = 76877, upload-time = "2025-10-06T14:49:20.884Z" },
+    { url = "https://files.pythonhosted.org/packages/38/6f/614f09a04e6184f8824268fce4bc925e9849edfa654ddd59f0b64508c595/multidict-6.7.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:960c60b5849b9b4f9dcc9bea6e3626143c252c74113df2c1540aebce70209b45", size = 45467, upload-time = "2025-10-06T14:49:22.054Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/93/c4f67a436dd026f2e780c433277fff72be79152894d9fc36f44569cab1a6/multidict-6.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2049be98fb57a31b4ccf870bf377af2504d4ae35646a19037ec271e4c07998aa", size = 43834, upload-time = "2025-10-06T14:49:23.566Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/f5/013798161ca665e4a422afbc5e2d9e4070142a9ff8905e482139cd09e4d0/multidict-6.7.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0934f3843a1860dd465d38895c17fce1f1cb37295149ab05cd1b9a03afacb2a7", size = 250545, upload-time = "2025-10-06T14:49:24.882Z" },
+    { url = "https://files.pythonhosted.org/packages/71/2f/91dbac13e0ba94669ea5119ba267c9a832f0cb65419aca75549fcf09a3dc/multidict-6.7.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b3e34f3a1b8131ba06f1a73adab24f30934d148afcd5f5de9a73565a4404384e", size = 258305, upload-time = "2025-10-06T14:49:26.778Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/b0/754038b26f6e04488b48ac621f779c341338d78503fb45403755af2df477/multidict-6.7.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:efbb54e98446892590dc2458c19c10344ee9a883a79b5cec4bc34d6656e8d546", size = 242363, upload-time = "2025-10-06T14:49:28.562Z" },
+    { url = "https://files.pythonhosted.org/packages/87/15/9da40b9336a7c9fa606c4cf2ed80a649dffeb42b905d4f63a1d7eb17d746/multidict-6.7.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a35c5fc61d4f51eb045061e7967cfe3123d622cd500e8868e7c0c592a09fedc4", size = 268375, upload-time = "2025-10-06T14:49:29.96Z" },
+    { url = "https://files.pythonhosted.org/packages/82/72/c53fcade0cc94dfaad583105fd92b3a783af2091eddcb41a6d5a52474000/multidict-6.7.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:29fe6740ebccba4175af1b9b87bf553e9c15cd5868ee967e010efcf94e4fd0f1", size = 269346, upload-time = "2025-10-06T14:49:31.404Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/e2/9baffdae21a76f77ef8447f1a05a96ec4bc0a24dae08767abc0a2fe680b8/multidict-6.7.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:123e2a72e20537add2f33a79e605f6191fba2afda4cbb876e35c1a7074298a7d", size = 256107, upload-time = "2025-10-06T14:49:32.974Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/06/3f06f611087dc60d65ef775f1fb5aca7c6d61c6db4990e7cda0cef9b1651/multidict-6.7.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b284e319754366c1aee2267a2036248b24eeb17ecd5dc16022095e747f2f4304", size = 253592, upload-time = "2025-10-06T14:49:34.52Z" },
+    { url = "https://files.pythonhosted.org/packages/20/24/54e804ec7945b6023b340c412ce9c3f81e91b3bf5fa5ce65558740141bee/multidict-6.7.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:803d685de7be4303b5a657b76e2f6d1240e7e0a8aa2968ad5811fa2285553a12", size = 251024, upload-time = "2025-10-06T14:49:35.956Z" },
+    { url = "https://files.pythonhosted.org/packages/14/48/011cba467ea0b17ceb938315d219391d3e421dfd35928e5dbdc3f4ae76ef/multidict-6.7.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c04a328260dfd5db8c39538f999f02779012268f54614902d0afc775d44e0a62", size = 251484, upload-time = "2025-10-06T14:49:37.631Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/2f/919258b43bb35b99fa127435cfb2d91798eb3a943396631ef43e3720dcf4/multidict-6.7.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:8a19cdb57cd3df4cd865849d93ee14920fb97224300c88501f16ecfa2604b4e0", size = 263579, upload-time = "2025-10-06T14:49:39.502Z" },
+    { url = "https://files.pythonhosted.org/packages/31/22/a0e884d86b5242b5a74cf08e876bdf299e413016b66e55511f7a804a366e/multidict-6.7.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9b2fd74c52accced7e75de26023b7dccee62511a600e62311b918ec5c168fc2a", size = 259654, upload-time = "2025-10-06T14:49:41.32Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/e5/17e10e1b5c5f5a40f2fcbb45953c9b215f8a4098003915e46a93f5fcaa8f/multidict-6.7.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3e8bfdd0e487acf992407a140d2589fe598238eaeffa3da8448d63a63cd363f8", size = 251511, upload-time = "2025-10-06T14:49:46.021Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/9a/201bb1e17e7af53139597069c375e7b0dcbd47594604f65c2d5359508566/multidict-6.7.0-cp312-cp312-win32.whl", hash = "sha256:dd32a49400a2c3d52088e120ee00c1e3576cbff7e10b98467962c74fdb762ed4", size = 41895, upload-time = "2025-10-06T14:49:48.718Z" },
+    { url = "https://files.pythonhosted.org/packages/46/e2/348cd32faad84eaf1d20cce80e2bb0ef8d312c55bca1f7fa9865e7770aaf/multidict-6.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:92abb658ef2d7ef22ac9f8bb88e8b6c3e571671534e029359b6d9e845923eb1b", size = 46073, upload-time = "2025-10-06T14:49:50.28Z" },
+    { url = "https://files.pythonhosted.org/packages/25/ec/aad2613c1910dce907480e0c3aa306905830f25df2e54ccc9dea450cb5aa/multidict-6.7.0-cp312-cp312-win_arm64.whl", hash = "sha256:490dab541a6a642ce1a9d61a4781656b346a55c13038f0b1244653828e3a83ec", size = 43226, upload-time = "2025-10-06T14:49:52.304Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/86/33272a544eeb36d66e4d9a920602d1a2f57d4ebea4ef3cdfe5a912574c95/multidict-6.7.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:bee7c0588aa0076ce77c0ea5d19a68d76ad81fcd9fe8501003b9a24f9d4000f6", size = 76135, upload-time = "2025-10-06T14:49:54.26Z" },
+    { url = "https://files.pythonhosted.org/packages/91/1c/eb97db117a1ebe46d457a3d235a7b9d2e6dcab174f42d1b67663dd9e5371/multidict-6.7.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7ef6b61cad77091056ce0e7ce69814ef72afacb150b7ac6a3e9470def2198159", size = 45117, upload-time = "2025-10-06T14:49:55.82Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/d8/6c3442322e41fb1dd4de8bd67bfd11cd72352ac131f6368315617de752f1/multidict-6.7.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9c0359b1ec12b1d6849c59f9d319610b7f20ef990a6d454ab151aa0e3b9f78ca", size = 43472, upload-time = "2025-10-06T14:49:57.048Z" },
+    { url = "https://files.pythonhosted.org/packages/75/3f/e2639e80325af0b6c6febdf8e57cc07043ff15f57fa1ef808f4ccb5ac4cd/multidict-6.7.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:cd240939f71c64bd658f186330603aac1a9a81bf6273f523fca63673cb7378a8", size = 249342, upload-time = "2025-10-06T14:49:58.368Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/cc/84e0585f805cbeaa9cbdaa95f9a3d6aed745b9d25700623ac89a6ecff400/multidict-6.7.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a60a4d75718a5efa473ebd5ab685786ba0c67b8381f781d1be14da49f1a2dc60", size = 257082, upload-time = "2025-10-06T14:49:59.89Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/9c/ac851c107c92289acbbf5cfb485694084690c1b17e555f44952c26ddc5bd/multidict-6.7.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:53a42d364f323275126aff81fb67c5ca1b7a04fda0546245730a55c8c5f24bc4", size = 240704, upload-time = "2025-10-06T14:50:01.485Z" },
+    { url = "https://files.pythonhosted.org/packages/50/cc/5f93e99427248c09da95b62d64b25748a5f5c98c7c2ab09825a1d6af0e15/multidict-6.7.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3b29b980d0ddbecb736735ee5bef69bb2ddca56eff603c86f3f29a1128299b4f", size = 266355, upload-time = "2025-10-06T14:50:02.955Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/0c/2ec1d883ceb79c6f7f6d7ad90c919c898f5d1c6ea96d322751420211e072/multidict-6.7.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f8a93b1c0ed2d04b97a5e9336fd2d33371b9a6e29ab7dd6503d63407c20ffbaf", size = 267259, upload-time = "2025-10-06T14:50:04.446Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/2d/f0b184fa88d6630aa267680bdb8623fb69cb0d024b8c6f0d23f9a0f406d3/multidict-6.7.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9ff96e8815eecacc6645da76c413eb3b3d34cfca256c70b16b286a687d013c32", size = 254903, upload-time = "2025-10-06T14:50:05.98Z" },
+    { url = "https://files.pythonhosted.org/packages/06/c9/11ea263ad0df7dfabcad404feb3c0dd40b131bc7f232d5537f2fb1356951/multidict-6.7.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7516c579652f6a6be0e266aec0acd0db80829ca305c3d771ed898538804c2036", size = 252365, upload-time = "2025-10-06T14:50:07.511Z" },
+    { url = "https://files.pythonhosted.org/packages/41/88/d714b86ee2c17d6e09850c70c9d310abac3d808ab49dfa16b43aba9d53fd/multidict-6.7.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:040f393368e63fb0f3330e70c26bfd336656bed925e5cbe17c9da839a6ab13ec", size = 250062, upload-time = "2025-10-06T14:50:09.074Z" },
+    { url = "https://files.pythonhosted.org/packages/15/fe/ad407bb9e818c2b31383f6131ca19ea7e35ce93cf1310fce69f12e89de75/multidict-6.7.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b3bc26a951007b1057a1c543af845f1c7e3e71cc240ed1ace7bf4484aa99196e", size = 249683, upload-time = "2025-10-06T14:50:10.714Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/a4/a89abdb0229e533fb925e7c6e5c40201c2873efebc9abaf14046a4536ee6/multidict-6.7.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:7b022717c748dd1992a83e219587aabe45980d88969f01b316e78683e6285f64", size = 261254, upload-time = "2025-10-06T14:50:12.28Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/aa/0e2b27bd88b40a4fb8dc53dd74eecac70edaa4c1dd0707eb2164da3675b3/multidict-6.7.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:9600082733859f00d79dee64effc7aef1beb26adb297416a4ad2116fd61374bd", size = 257967, upload-time = "2025-10-06T14:50:14.16Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/8e/0c67b7120d5d5f6d874ed85a085f9dc770a7f9d8813e80f44a9fec820bb7/multidict-6.7.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:94218fcec4d72bc61df51c198d098ce2b378e0ccbac41ddbed5ef44092913288", size = 250085, upload-time = "2025-10-06T14:50:15.639Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/55/b73e1d624ea4b8fd4dd07a3bb70f6e4c7c6c5d9d640a41c6ffe5cdbd2a55/multidict-6.7.0-cp313-cp313-win32.whl", hash = "sha256:a37bd74c3fa9d00be2d7b8eca074dc56bd8077ddd2917a839bd989612671ed17", size = 41713, upload-time = "2025-10-06T14:50:17.066Z" },
+    { url = "https://files.pythonhosted.org/packages/32/31/75c59e7d3b4205075b4c183fa4ca398a2daf2303ddf616b04ae6ef55cffe/multidict-6.7.0-cp313-cp313-win_amd64.whl", hash = "sha256:30d193c6cc6d559db42b6bcec8a5d395d34d60c9877a0b71ecd7c204fcf15390", size = 45915, upload-time = "2025-10-06T14:50:18.264Z" },
+    { url = "https://files.pythonhosted.org/packages/31/2a/8987831e811f1184c22bc2e45844934385363ee61c0a2dcfa8f71b87e608/multidict-6.7.0-cp313-cp313-win_arm64.whl", hash = "sha256:ea3334cabe4d41b7ccd01e4d349828678794edbc2d3ae97fc162a3312095092e", size = 43077, upload-time = "2025-10-06T14:50:19.853Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/68/7b3a5170a382a340147337b300b9eb25a9ddb573bcdfff19c0fa3f31ffba/multidict-6.7.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:ad9ce259f50abd98a1ca0aa6e490b58c316a0fce0617f609723e40804add2c00", size = 83114, upload-time = "2025-10-06T14:50:21.223Z" },
+    { url = "https://files.pythonhosted.org/packages/55/5c/3fa2d07c84df4e302060f555bbf539310980362236ad49f50eeb0a1c1eb9/multidict-6.7.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:07f5594ac6d084cbb5de2df218d78baf55ef150b91f0ff8a21cc7a2e3a5a58eb", size = 48442, upload-time = "2025-10-06T14:50:22.871Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/56/67212d33239797f9bd91962bb899d72bb0f4c35a8652dcdb8ed049bef878/multidict-6.7.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:0591b48acf279821a579282444814a2d8d0af624ae0bc600aa4d1b920b6e924b", size = 46885, upload-time = "2025-10-06T14:50:24.258Z" },
+    { url = "https://files.pythonhosted.org/packages/46/d1/908f896224290350721597a61a69cd19b89ad8ee0ae1f38b3f5cd12ea2ac/multidict-6.7.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:749a72584761531d2b9467cfbdfd29487ee21124c304c4b6cb760d8777b27f9c", size = 242588, upload-time = "2025-10-06T14:50:25.716Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/67/8604288bbd68680eee0ab568fdcb56171d8b23a01bcd5cb0c8fedf6e5d99/multidict-6.7.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b4c3d199f953acd5b446bf7c0de1fe25d94e09e79086f8dc2f48a11a129cdf1", size = 249966, upload-time = "2025-10-06T14:50:28.192Z" },
+    { url = "https://files.pythonhosted.org/packages/20/33/9228d76339f1ba51e3efef7da3ebd91964d3006217aae13211653193c3ff/multidict-6.7.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:9fb0211dfc3b51efea2f349ec92c114d7754dd62c01f81c3e32b765b70c45c9b", size = 228618, upload-time = "2025-10-06T14:50:29.82Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/2d/25d9b566d10cab1c42b3b9e5b11ef79c9111eaf4463b8c257a3bd89e0ead/multidict-6.7.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a027ec240fe73a8d6281872690b988eed307cd7d91b23998ff35ff577ca688b5", size = 257539, upload-time = "2025-10-06T14:50:31.731Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/b1/8d1a965e6637fc33de3c0d8f414485c2b7e4af00f42cab3d84e7b955c222/multidict-6.7.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1d964afecdf3a8288789df2f5751dc0a8261138c3768d9af117ed384e538fad", size = 256345, upload-time = "2025-10-06T14:50:33.26Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/0c/06b5a8adbdeedada6f4fb8d8f193d44a347223b11939b42953eeb6530b6b/multidict-6.7.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:caf53b15b1b7df9fbd0709aa01409000a2b4dd03a5f6f5cc548183c7c8f8b63c", size = 247934, upload-time = "2025-10-06T14:50:34.808Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/31/b2491b5fe167ca044c6eb4b8f2c9f3b8a00b24c432c365358eadac5d7625/multidict-6.7.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:654030da3197d927f05a536a66186070e98765aa5142794c9904555d3a9d8fb5", size = 245243, upload-time = "2025-10-06T14:50:36.436Z" },
+    { url = "https://files.pythonhosted.org/packages/61/1a/982913957cb90406c8c94f53001abd9eafc271cb3e70ff6371590bec478e/multidict-6.7.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:2090d3718829d1e484706a2f525e50c892237b2bf9b17a79b059cb98cddc2f10", size = 235878, upload-time = "2025-10-06T14:50:37.953Z" },
+    { url = "https://files.pythonhosted.org/packages/be/c0/21435d804c1a1cf7a2608593f4d19bca5bcbd7a81a70b253fdd1c12af9c0/multidict-6.7.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2d2cfeec3f6f45651b3d408c4acec0ebf3daa9bc8a112a084206f5db5d05b754", size = 243452, upload-time = "2025-10-06T14:50:39.574Z" },
+    { url = "https://files.pythonhosted.org/packages/54/0a/4349d540d4a883863191be6eb9a928846d4ec0ea007d3dcd36323bb058ac/multidict-6.7.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:4ef089f985b8c194d341eb2c24ae6e7408c9a0e2e5658699c92f497437d88c3c", size = 252312, upload-time = "2025-10-06T14:50:41.612Z" },
+    { url = "https://files.pythonhosted.org/packages/26/64/d5416038dbda1488daf16b676e4dbfd9674dde10a0cc8f4fc2b502d8125d/multidict-6.7.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:e93a0617cd16998784bf4414c7e40f17a35d2350e5c6f0bd900d3a8e02bd3762", size = 246935, upload-time = "2025-10-06T14:50:43.972Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/8c/8290c50d14e49f35e0bd4abc25e1bc7711149ca9588ab7d04f886cdf03d9/multidict-6.7.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f0feece2ef8ebc42ed9e2e8c78fc4aa3cf455733b507c09ef7406364c94376c6", size = 243385, upload-time = "2025-10-06T14:50:45.648Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/a0/f83ae75e42d694b3fbad3e047670e511c138be747bc713cf1b10d5096416/multidict-6.7.0-cp313-cp313t-win32.whl", hash = "sha256:19a1d55338ec1be74ef62440ca9e04a2f001a04d0cc49a4983dc320ff0f3212d", size = 47777, upload-time = "2025-10-06T14:50:47.154Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/80/9b174a92814a3830b7357307a792300f42c9e94664b01dee8e457551fa66/multidict-6.7.0-cp313-cp313t-win_amd64.whl", hash = "sha256:3da4fb467498df97e986af166b12d01f05d2e04f978a9c1c680ea1988e0bc4b6", size = 53104, upload-time = "2025-10-06T14:50:48.851Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/28/04baeaf0428d95bb7a7bea0e691ba2f31394338ba424fb0679a9ed0f4c09/multidict-6.7.0-cp313-cp313t-win_arm64.whl", hash = "sha256:b4121773c49a0776461f4a904cdf6264c88e42218aaa8407e803ca8025872792", size = 45503, upload-time = "2025-10-06T14:50:50.16Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/b1/3da6934455dd4b261d4c72f897e3a5728eba81db59959f3a639245891baa/multidict-6.7.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3bab1e4aff7adaa34410f93b1f8e57c4b36b9af0426a76003f441ee1d3c7e842", size = 75128, upload-time = "2025-10-06T14:50:51.92Z" },
+    { url = "https://files.pythonhosted.org/packages/14/2c/f069cab5b51d175a1a2cb4ccdf7a2c2dabd58aa5bd933fa036a8d15e2404/multidict-6.7.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b8512bac933afc3e45fb2b18da8e59b78d4f408399a960339598374d4ae3b56b", size = 44410, upload-time = "2025-10-06T14:50:53.275Z" },
+    { url = "https://files.pythonhosted.org/packages/42/e2/64bb41266427af6642b6b128e8774ed84c11b80a90702c13ac0a86bb10cc/multidict-6.7.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:79dcf9e477bc65414ebfea98ffd013cb39552b5ecd62908752e0e413d6d06e38", size = 43205, upload-time = "2025-10-06T14:50:54.911Z" },
+    { url = "https://files.pythonhosted.org/packages/02/68/6b086fef8a3f1a8541b9236c594f0c9245617c29841f2e0395d979485cde/multidict-6.7.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:31bae522710064b5cbeddaf2e9f32b1abab70ac6ac91d42572502299e9953128", size = 245084, upload-time = "2025-10-06T14:50:56.369Z" },
+    { url = "https://files.pythonhosted.org/packages/15/ee/f524093232007cd7a75c1d132df70f235cfd590a7c9eaccd7ff422ef4ae8/multidict-6.7.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a0df7ff02397bb63e2fd22af2c87dfa39e8c7f12947bc524dbdc528282c7e34", size = 252667, upload-time = "2025-10-06T14:50:57.991Z" },
+    { url = "https://files.pythonhosted.org/packages/02/a5/eeb3f43ab45878f1895118c3ef157a480db58ede3f248e29b5354139c2c9/multidict-6.7.0-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:7a0222514e8e4c514660e182d5156a415c13ef0aabbd71682fc714e327b95e99", size = 233590, upload-time = "2025-10-06T14:50:59.589Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/1e/76d02f8270b97269d7e3dbd45644b1785bda457b474315f8cf999525a193/multidict-6.7.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2397ab4daaf2698eb51a76721e98db21ce4f52339e535725de03ea962b5a3202", size = 264112, upload-time = "2025-10-06T14:51:01.183Z" },
+    { url = "https://files.pythonhosted.org/packages/76/0b/c28a70ecb58963847c2a8efe334904cd254812b10e535aefb3bcce513918/multidict-6.7.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8891681594162635948a636c9fe0ff21746aeb3dd5463f6e25d9bea3a8a39ca1", size = 261194, upload-time = "2025-10-06T14:51:02.794Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/63/2ab26e4209773223159b83aa32721b4021ffb08102f8ac7d689c943fded1/multidict-6.7.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18706cc31dbf402a7945916dd5cddf160251b6dab8a2c5f3d6d5a55949f676b3", size = 248510, upload-time = "2025-10-06T14:51:04.724Z" },
+    { url = "https://files.pythonhosted.org/packages/93/cd/06c1fa8282af1d1c46fd55c10a7930af652afdce43999501d4d68664170c/multidict-6.7.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f844a1bbf1d207dd311a56f383f7eda2d0e134921d45751842d8235e7778965d", size = 248395, upload-time = "2025-10-06T14:51:06.306Z" },
+    { url = "https://files.pythonhosted.org/packages/99/ac/82cb419dd6b04ccf9e7e61befc00c77614fc8134362488b553402ecd55ce/multidict-6.7.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:d4393e3581e84e5645506923816b9cc81f5609a778c7e7534054091acc64d1c6", size = 239520, upload-time = "2025-10-06T14:51:08.091Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/f3/a0f9bf09493421bd8716a362e0cd1d244f5a6550f5beffdd6b47e885b331/multidict-6.7.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:fbd18dc82d7bf274b37aa48d664534330af744e03bccf696d6f4c6042e7d19e7", size = 245479, upload-time = "2025-10-06T14:51:10.365Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/01/476d38fc73a212843f43c852b0eee266b6971f0e28329c2184a8df90c376/multidict-6.7.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:b6234e14f9314731ec45c42fc4554b88133ad53a09092cc48a88e771c125dadb", size = 258903, upload-time = "2025-10-06T14:51:12.466Z" },
+    { url = "https://files.pythonhosted.org/packages/49/6d/23faeb0868adba613b817d0e69c5f15531b24d462af8012c4f6de4fa8dc3/multidict-6.7.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:08d4379f9744d8f78d98c8673c06e202ffa88296f009c71bbafe8a6bf847d01f", size = 252333, upload-time = "2025-10-06T14:51:14.48Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/cc/48d02ac22b30fa247f7dad82866e4b1015431092f4ba6ebc7e77596e0b18/multidict-6.7.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:9fe04da3f79387f450fd0061d4dd2e45a72749d31bf634aecc9e27f24fdc4b3f", size = 243411, upload-time = "2025-10-06T14:51:16.072Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/03/29a8bf5a18abf1fe34535c88adbdfa88c9fb869b5a3b120692c64abe8284/multidict-6.7.0-cp314-cp314-win32.whl", hash = "sha256:fbafe31d191dfa7c4c51f7a6149c9fb7e914dcf9ffead27dcfd9f1ae382b3885", size = 40940, upload-time = "2025-10-06T14:51:17.544Z" },
+    { url = "https://files.pythonhosted.org/packages/82/16/7ed27b680791b939de138f906d5cf2b4657b0d45ca6f5dd6236fdddafb1a/multidict-6.7.0-cp314-cp314-win_amd64.whl", hash = "sha256:2f67396ec0310764b9222a1728ced1ab638f61aadc6226f17a71dd9324f9a99c", size = 45087, upload-time = "2025-10-06T14:51:18.875Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/3c/e3e62eb35a1950292fe39315d3c89941e30a9d07d5d2df42965ab041da43/multidict-6.7.0-cp314-cp314-win_arm64.whl", hash = "sha256:ba672b26069957ee369cfa7fc180dde1fc6f176eaf1e6beaf61fbebbd3d9c000", size = 42368, upload-time = "2025-10-06T14:51:20.225Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/40/cd499bd0dbc5f1136726db3153042a735fffd0d77268e2ee20d5f33c010f/multidict-6.7.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:c1dcc7524066fa918c6a27d61444d4ee7900ec635779058571f70d042d86ed63", size = 82326, upload-time = "2025-10-06T14:51:21.588Z" },
+    { url = "https://files.pythonhosted.org/packages/13/8a/18e031eca251c8df76daf0288e6790561806e439f5ce99a170b4af30676b/multidict-6.7.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:27e0b36c2d388dc7b6ced3406671b401e84ad7eb0656b8f3a2f46ed0ce483718", size = 48065, upload-time = "2025-10-06T14:51:22.93Z" },
+    { url = "https://files.pythonhosted.org/packages/40/71/5e6701277470a87d234e433fb0a3a7deaf3bcd92566e421e7ae9776319de/multidict-6.7.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2a7baa46a22e77f0988e3b23d4ede5513ebec1929e34ee9495be535662c0dfe2", size = 46475, upload-time = "2025-10-06T14:51:24.352Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/6a/bab00cbab6d9cfb57afe1663318f72ec28289ea03fd4e8236bb78429893a/multidict-6.7.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7bf77f54997a9166a2f5675d1201520586439424c2511723a7312bdb4bcc034e", size = 239324, upload-time = "2025-10-06T14:51:25.822Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/5f/8de95f629fc22a7769ade8b41028e3e5a822c1f8904f618d175945a81ad3/multidict-6.7.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e011555abada53f1578d63389610ac8a5400fc70ce71156b0aa30d326f1a5064", size = 246877, upload-time = "2025-10-06T14:51:27.604Z" },
+    { url = "https://files.pythonhosted.org/packages/23/b4/38881a960458f25b89e9f4a4fdcb02ac101cfa710190db6e5528841e67de/multidict-6.7.0-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:28b37063541b897fd6a318007373930a75ca6d6ac7c940dbe14731ffdd8d498e", size = 225824, upload-time = "2025-10-06T14:51:29.664Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/39/6566210c83f8a261575f18e7144736059f0c460b362e96e9cf797a24b8e7/multidict-6.7.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:05047ada7a2fde2631a0ed706f1fd68b169a681dfe5e4cf0f8e4cb6618bbc2cd", size = 253558, upload-time = "2025-10-06T14:51:31.684Z" },
+    { url = "https://files.pythonhosted.org/packages/00/a3/67f18315100f64c269f46e6c0319fa87ba68f0f64f2b8e7fd7c72b913a0b/multidict-6.7.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:716133f7d1d946a4e1b91b1756b23c088881e70ff180c24e864c26192ad7534a", size = 252339, upload-time = "2025-10-06T14:51:33.699Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/2a/1cb77266afee2458d82f50da41beba02159b1d6b1f7973afc9a1cad1499b/multidict-6.7.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d1bed1b467ef657f2a0ae62844a607909ef1c6889562de5e1d505f74457d0b96", size = 244895, upload-time = "2025-10-06T14:51:36.189Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/72/09fa7dd487f119b2eb9524946ddd36e2067c08510576d43ff68469563b3b/multidict-6.7.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ca43bdfa5d37bd6aee89d85e1d0831fb86e25541be7e9d376ead1b28974f8e5e", size = 241862, upload-time = "2025-10-06T14:51:41.291Z" },
+    { url = "https://files.pythonhosted.org/packages/65/92/bc1f8bd0853d8669300f732c801974dfc3702c3eeadae2f60cef54dc69d7/multidict-6.7.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:44b546bd3eb645fd26fb949e43c02a25a2e632e2ca21a35e2e132c8105dc8599", size = 232376, upload-time = "2025-10-06T14:51:43.55Z" },
+    { url = "https://files.pythonhosted.org/packages/09/86/ac39399e5cb9d0c2ac8ef6e10a768e4d3bc933ac808d49c41f9dc23337eb/multidict-6.7.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:a6ef16328011d3f468e7ebc326f24c1445f001ca1dec335b2f8e66bed3006394", size = 240272, upload-time = "2025-10-06T14:51:45.265Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/b6/fed5ac6b8563ec72df6cb1ea8dac6d17f0a4a1f65045f66b6d3bf1497c02/multidict-6.7.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:5aa873cbc8e593d361ae65c68f85faadd755c3295ea2c12040ee146802f23b38", size = 248774, upload-time = "2025-10-06T14:51:46.836Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/8d/b954d8c0dc132b68f760aefd45870978deec6818897389dace00fcde32ff/multidict-6.7.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:3d7b6ccce016e29df4b7ca819659f516f0bc7a4b3efa3bb2012ba06431b044f9", size = 242731, upload-time = "2025-10-06T14:51:48.541Z" },
+    { url = "https://files.pythonhosted.org/packages/16/9d/a2dac7009125d3540c2f54e194829ea18ac53716c61b655d8ed300120b0f/multidict-6.7.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:171b73bd4ee683d307599b66793ac80981b06f069b62eea1c9e29c9241aa66b0", size = 240193, upload-time = "2025-10-06T14:51:50.355Z" },
+    { url = "https://files.pythonhosted.org/packages/39/ca/c05f144128ea232ae2178b008d5011d4e2cea86e4ee8c85c2631b1b94802/multidict-6.7.0-cp314-cp314t-win32.whl", hash = "sha256:b2d7f80c4e1fd010b07cb26820aae86b7e73b681ee4889684fb8d2d4537aab13", size = 48023, upload-time = "2025-10-06T14:51:51.883Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/8f/0a60e501584145588be1af5cc829265701ba3c35a64aec8e07cbb71d39bb/multidict-6.7.0-cp314-cp314t-win_amd64.whl", hash = "sha256:09929cab6fcb68122776d575e03c6cc64ee0b8fca48d17e135474b042ce515cd", size = 53507, upload-time = "2025-10-06T14:51:53.672Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/ae/3148b988a9c6239903e786eac19c889fab607c31d6efa7fb2147e5680f23/multidict-6.7.0-cp314-cp314t-win_arm64.whl", hash = "sha256:cc41db090ed742f32bd2d2c721861725e6109681eddf835d0a82bd3a5c382827", size = 44804, upload-time = "2025-10-06T14:51:55.415Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/da/7d22601b625e241d4f23ef1ebff8acfc60da633c9e7e7922e24d10f592b3/multidict-6.7.0-py3-none-any.whl", hash = "sha256:394fc5c42a333c9ffc3e421a4c85e08580d990e08b99f6bf35b4132114c5dcb3", size = 12317, upload-time = "2025-10-06T14:52:29.272Z" },
+]
+
+[[package]]
+name = "pferd"
+source = { editable = "." }
+dependencies = [
+    { name = "aiohttp" },
+    { name = "beautifulsoup4" },
+    { name = "certifi" },
+    { name = "keyring" },
+    { name = "rich" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "aiohttp", specifier = ">=3.8.1" },
+    { name = "beautifulsoup4", specifier = ">=4.10.0" },
+    { name = "certifi", specifier = ">=2021.10.8" },
+    { name = "keyring", specifier = ">=23.5.0" },
+    { name = "rich", specifier = ">=11.0.0" },
+]
+
+[[package]]
+name = "propcache"
+version = "0.4.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/9e/da/e9fc233cf63743258bff22b3dfa7ea5baef7b5bc324af47a0ad89b8ffc6f/propcache-0.4.1.tar.gz", hash = "sha256:f48107a8c637e80362555f37ecf49abe20370e557cc4ab374f04ec4423c97c3d", size = 46442, upload-time = "2025-10-08T19:49:02.291Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8c/d4/4e2c9aaf7ac2242b9358f98dccd8f90f2605402f5afeff6c578682c2c491/propcache-0.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:60a8fda9644b7dfd5dece8c61d8a85e271cb958075bfc4e01083c148b61a7caf", size = 80208, upload-time = "2025-10-08T19:46:24.597Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/21/d7b68e911f9c8e18e4ae43bdbc1e1e9bbd971f8866eb81608947b6f585ff/propcache-0.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c30b53e7e6bda1d547cabb47c825f3843a0a1a42b0496087bb58d8fedf9f41b5", size = 45777, upload-time = "2025-10-08T19:46:25.733Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/1d/11605e99ac8ea9435651ee71ab4cb4bf03f0949586246476a25aadfec54a/propcache-0.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6918ecbd897443087a3b7cd978d56546a812517dcaaca51b49526720571fa93e", size = 47647, upload-time = "2025-10-08T19:46:27.304Z" },
+    { url = "https://files.pythonhosted.org/packages/58/1a/3c62c127a8466c9c843bccb503d40a273e5cc69838805f322e2826509e0d/propcache-0.4.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3d902a36df4e5989763425a8ab9e98cd8ad5c52c823b34ee7ef307fd50582566", size = 214929, upload-time = "2025-10-08T19:46:28.62Z" },
+    { url = "https://files.pythonhosted.org/packages/56/b9/8fa98f850960b367c4b8fe0592e7fc341daa7a9462e925228f10a60cf74f/propcache-0.4.1-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a9695397f85973bb40427dedddf70d8dc4a44b22f1650dd4af9eedf443d45165", size = 221778, upload-time = "2025-10-08T19:46:30.358Z" },
+    { url = "https://files.pythonhosted.org/packages/46/a6/0ab4f660eb59649d14b3d3d65c439421cf2f87fe5dd68591cbe3c1e78a89/propcache-0.4.1-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2bb07ffd7eaad486576430c89f9b215f9e4be68c4866a96e97db9e97fead85dc", size = 228144, upload-time = "2025-10-08T19:46:32.607Z" },
+    { url = "https://files.pythonhosted.org/packages/52/6a/57f43e054fb3d3a56ac9fc532bc684fc6169a26c75c353e65425b3e56eef/propcache-0.4.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fd6f30fdcf9ae2a70abd34da54f18da086160e4d7d9251f81f3da0ff84fc5a48", size = 210030, upload-time = "2025-10-08T19:46:33.969Z" },
+    { url = "https://files.pythonhosted.org/packages/40/e2/27e6feebb5f6b8408fa29f5efbb765cd54c153ac77314d27e457a3e993b7/propcache-0.4.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:fc38cba02d1acba4e2869eef1a57a43dfbd3d49a59bf90dda7444ec2be6a5570", size = 208252, upload-time = "2025-10-08T19:46:35.309Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/f8/91c27b22ccda1dbc7967f921c42825564fa5336a01ecd72eb78a9f4f53c2/propcache-0.4.1-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:67fad6162281e80e882fb3ec355398cf72864a54069d060321f6cd0ade95fe85", size = 202064, upload-time = "2025-10-08T19:46:36.993Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/26/7f00bd6bd1adba5aafe5f4a66390f243acab58eab24ff1a08bebb2ef9d40/propcache-0.4.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f10207adf04d08bec185bae14d9606a1444715bc99180f9331c9c02093e1959e", size = 212429, upload-time = "2025-10-08T19:46:38.398Z" },
+    { url = "https://files.pythonhosted.org/packages/84/89/fd108ba7815c1117ddca79c228f3f8a15fc82a73bca8b142eb5de13b2785/propcache-0.4.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:e9b0d8d0845bbc4cfcdcbcdbf5086886bc8157aa963c31c777ceff7846c77757", size = 216727, upload-time = "2025-10-08T19:46:39.732Z" },
+    { url = "https://files.pythonhosted.org/packages/79/37/3ec3f7e3173e73f1d600495d8b545b53802cbf35506e5732dd8578db3724/propcache-0.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:981333cb2f4c1896a12f4ab92a9cc8f09ea664e9b7dbdc4eff74627af3a11c0f", size = 205097, upload-time = "2025-10-08T19:46:41.025Z" },
+    { url = "https://files.pythonhosted.org/packages/61/b0/b2631c19793f869d35f47d5a3a56fb19e9160d3c119f15ac7344fc3ccae7/propcache-0.4.1-cp311-cp311-win32.whl", hash = "sha256:f1d2f90aeec838a52f1c1a32fe9a619fefd5e411721a9117fbf82aea638fe8a1", size = 38084, upload-time = "2025-10-08T19:46:42.693Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/78/6cce448e2098e9f3bfc91bb877f06aa24b6ccace872e39c53b2f707c4648/propcache-0.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:364426a62660f3f699949ac8c621aad6977be7126c5807ce48c0aeb8e7333ea6", size = 41637, upload-time = "2025-10-08T19:46:43.778Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/e9/754f180cccd7f51a39913782c74717c581b9cc8177ad0e949f4d51812383/propcache-0.4.1-cp311-cp311-win_arm64.whl", hash = "sha256:e53f3a38d3510c11953f3e6a33f205c6d1b001129f972805ca9b42fc308bc239", size = 38064, upload-time = "2025-10-08T19:46:44.872Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/0f/f17b1b2b221d5ca28b4b876e8bb046ac40466513960646bda8e1853cdfa2/propcache-0.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e153e9cd40cc8945138822807139367f256f89c6810c2634a4f6902b52d3b4e2", size = 80061, upload-time = "2025-10-08T19:46:46.075Z" },
+    { url = "https://files.pythonhosted.org/packages/76/47/8ccf75935f51448ba9a16a71b783eb7ef6b9ee60f5d14c7f8a8a79fbeed7/propcache-0.4.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:cd547953428f7abb73c5ad82cbb32109566204260d98e41e5dfdc682eb7f8403", size = 46037, upload-time = "2025-10-08T19:46:47.23Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/b6/5c9a0e42df4d00bfb4a3cbbe5cf9f54260300c88a0e9af1f47ca5ce17ac0/propcache-0.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f048da1b4f243fc44f205dfd320933a951b8d89e0afd4c7cacc762a8b9165207", size = 47324, upload-time = "2025-10-08T19:46:48.384Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/d3/6c7ee328b39a81ee877c962469f1e795f9db87f925251efeb0545e0020d0/propcache-0.4.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ec17c65562a827bba85e3872ead335f95405ea1674860d96483a02f5c698fa72", size = 225505, upload-time = "2025-10-08T19:46:50.055Z" },
+    { url = "https://files.pythonhosted.org/packages/01/5d/1c53f4563490b1d06a684742cc6076ef944bc6457df6051b7d1a877c057b/propcache-0.4.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:405aac25c6394ef275dee4c709be43745d36674b223ba4eb7144bf4d691b7367", size = 230242, upload-time = "2025-10-08T19:46:51.815Z" },
+    { url = "https://files.pythonhosted.org/packages/20/e1/ce4620633b0e2422207c3cb774a0ee61cac13abc6217763a7b9e2e3f4a12/propcache-0.4.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0013cb6f8dde4b2a2f66903b8ba740bdfe378c943c4377a200551ceb27f379e4", size = 238474, upload-time = "2025-10-08T19:46:53.208Z" },
+    { url = "https://files.pythonhosted.org/packages/46/4b/3aae6835b8e5f44ea6a68348ad90f78134047b503765087be2f9912140ea/propcache-0.4.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:15932ab57837c3368b024473a525e25d316d8353016e7cc0e5ba9eb343fbb1cf", size = 221575, upload-time = "2025-10-08T19:46:54.511Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/a5/8a5e8678bcc9d3a1a15b9a29165640d64762d424a16af543f00629c87338/propcache-0.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:031dce78b9dc099f4c29785d9cf5577a3faf9ebf74ecbd3c856a7b92768c3df3", size = 216736, upload-time = "2025-10-08T19:46:56.212Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/63/b7b215eddeac83ca1c6b934f89d09a625aa9ee4ba158338854c87210cc36/propcache-0.4.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:ab08df6c9a035bee56e31af99be621526bd237bea9f32def431c656b29e41778", size = 213019, upload-time = "2025-10-08T19:46:57.595Z" },
+    { url = "https://files.pythonhosted.org/packages/57/74/f580099a58c8af587cac7ba19ee7cb418506342fbbe2d4a4401661cca886/propcache-0.4.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4d7af63f9f93fe593afbf104c21b3b15868efb2c21d07d8732c0c4287e66b6a6", size = 220376, upload-time = "2025-10-08T19:46:59.067Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/ee/542f1313aff7eaf19c2bb758c5d0560d2683dac001a1c96d0774af799843/propcache-0.4.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:cfc27c945f422e8b5071b6e93169679e4eb5bf73bbcbf1ba3ae3a83d2f78ebd9", size = 226988, upload-time = "2025-10-08T19:47:00.544Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/18/9c6b015dd9c6930f6ce2229e1f02fb35298b847f2087ea2b436a5bfa7287/propcache-0.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:35c3277624a080cc6ec6f847cbbbb5b49affa3598c4535a0a4682a697aaa5c75", size = 215615, upload-time = "2025-10-08T19:47:01.968Z" },
+    { url = "https://files.pythonhosted.org/packages/80/9e/e7b85720b98c45a45e1fca6a177024934dc9bc5f4d5dd04207f216fc33ed/propcache-0.4.1-cp312-cp312-win32.whl", hash = "sha256:671538c2262dadb5ba6395e26c1731e1d52534bfe9ae56d0b5573ce539266aa8", size = 38066, upload-time = "2025-10-08T19:47:03.503Z" },
+    { url = "https://files.pythonhosted.org/packages/54/09/d19cff2a5aaac632ec8fc03737b223597b1e347416934c1b3a7df079784c/propcache-0.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:cb2d222e72399fcf5890d1d5cc1060857b9b236adff2792ff48ca2dfd46c81db", size = 41655, upload-time = "2025-10-08T19:47:04.973Z" },
+    { url = "https://files.pythonhosted.org/packages/68/ab/6b5c191bb5de08036a8c697b265d4ca76148efb10fa162f14af14fb5f076/propcache-0.4.1-cp312-cp312-win_arm64.whl", hash = "sha256:204483131fb222bdaaeeea9f9e6c6ed0cac32731f75dfc1d4a567fc1926477c1", size = 37789, upload-time = "2025-10-08T19:47:06.077Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/df/6d9c1b6ac12b003837dde8a10231a7344512186e87b36e855bef32241942/propcache-0.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:43eedf29202c08550aac1d14e0ee619b0430aaef78f85864c1a892294fbc28cf", size = 77750, upload-time = "2025-10-08T19:47:07.648Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/e8/677a0025e8a2acf07d3418a2e7ba529c9c33caf09d3c1f25513023c1db56/propcache-0.4.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d62cdfcfd89ccb8de04e0eda998535c406bf5e060ffd56be6c586cbcc05b3311", size = 44780, upload-time = "2025-10-08T19:47:08.851Z" },
+    { url = "https://files.pythonhosted.org/packages/89/a4/92380f7ca60f99ebae761936bc48a72a639e8a47b29050615eef757cb2a7/propcache-0.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cae65ad55793da34db5f54e4029b89d3b9b9490d8abe1b4c7ab5d4b8ec7ebf74", size = 46308, upload-time = "2025-10-08T19:47:09.982Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/48/c5ac64dee5262044348d1d78a5f85dd1a57464a60d30daee946699963eb3/propcache-0.4.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:333ddb9031d2704a301ee3e506dc46b1fe5f294ec198ed6435ad5b6a085facfe", size = 208182, upload-time = "2025-10-08T19:47:11.319Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/0c/cd762dd011a9287389a6a3eb43aa30207bde253610cca06824aeabfe9653/propcache-0.4.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:fd0858c20f078a32cf55f7e81473d96dcf3b93fd2ccdb3d40fdf54b8573df3af", size = 211215, upload-time = "2025-10-08T19:47:13.146Z" },
+    { url = "https://files.pythonhosted.org/packages/30/3e/49861e90233ba36890ae0ca4c660e95df565b2cd15d4a68556ab5865974e/propcache-0.4.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:678ae89ebc632c5c204c794f8dab2837c5f159aeb59e6ed0539500400577298c", size = 218112, upload-time = "2025-10-08T19:47:14.913Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/8b/544bc867e24e1bd48f3118cecd3b05c694e160a168478fa28770f22fd094/propcache-0.4.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d472aeb4fbf9865e0c6d622d7f4d54a4e101a89715d8904282bb5f9a2f476c3f", size = 204442, upload-time = "2025-10-08T19:47:16.277Z" },
+    { url = "https://files.pythonhosted.org/packages/50/a6/4282772fd016a76d3e5c0df58380a5ea64900afd836cec2c2f662d1b9bb3/propcache-0.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4d3df5fa7e36b3225954fba85589da77a0fe6a53e3976de39caf04a0db4c36f1", size = 199398, upload-time = "2025-10-08T19:47:17.962Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/ec/d8a7cd406ee1ddb705db2139f8a10a8a427100347bd698e7014351c7af09/propcache-0.4.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:ee17f18d2498f2673e432faaa71698032b0127ebf23ae5974eeaf806c279df24", size = 196920, upload-time = "2025-10-08T19:47:19.355Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/6c/f38ab64af3764f431e359f8baf9e0a21013e24329e8b85d2da32e8ed07ca/propcache-0.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:580e97762b950f993ae618e167e7be9256b8353c2dcd8b99ec100eb50f5286aa", size = 203748, upload-time = "2025-10-08T19:47:21.338Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/e3/fa846bd70f6534d647886621388f0a265254d30e3ce47e5c8e6e27dbf153/propcache-0.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:501d20b891688eb8e7aa903021f0b72d5a55db40ffaab27edefd1027caaafa61", size = 205877, upload-time = "2025-10-08T19:47:23.059Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/39/8163fc6f3133fea7b5f2827e8eba2029a0277ab2c5beee6c1db7b10fc23d/propcache-0.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a0bd56e5b100aef69bd8562b74b46254e7c8812918d3baa700c8a8009b0af66", size = 199437, upload-time = "2025-10-08T19:47:24.445Z" },
+    { url = "https://files.pythonhosted.org/packages/93/89/caa9089970ca49c7c01662bd0eeedfe85494e863e8043565aeb6472ce8fe/propcache-0.4.1-cp313-cp313-win32.whl", hash = "sha256:bcc9aaa5d80322bc2fb24bb7accb4a30f81e90ab8d6ba187aec0744bc302ad81", size = 37586, upload-time = "2025-10-08T19:47:25.736Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/ab/f76ec3c3627c883215b5c8080debb4394ef5a7a29be811f786415fc1e6fd/propcache-0.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:381914df18634f5494334d201e98245c0596067504b9372d8cf93f4bb23e025e", size = 40790, upload-time = "2025-10-08T19:47:26.847Z" },
+    { url = "https://files.pythonhosted.org/packages/59/1b/e71ae98235f8e2ba5004d8cb19765a74877abf189bc53fc0c80d799e56c3/propcache-0.4.1-cp313-cp313-win_arm64.whl", hash = "sha256:8873eb4460fd55333ea49b7d189749ecf6e55bf85080f11b1c4530ed3034cba1", size = 37158, upload-time = "2025-10-08T19:47:27.961Z" },
+    { url = "https://files.pythonhosted.org/packages/83/ce/a31bbdfc24ee0dcbba458c8175ed26089cf109a55bbe7b7640ed2470cfe9/propcache-0.4.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:92d1935ee1f8d7442da9c0c4fa7ac20d07e94064184811b685f5c4fada64553b", size = 81451, upload-time = "2025-10-08T19:47:29.445Z" },
+    { url = "https://files.pythonhosted.org/packages/25/9c/442a45a470a68456e710d96cacd3573ef26a1d0a60067e6a7d5e655621ed/propcache-0.4.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:473c61b39e1460d386479b9b2f337da492042447c9b685f28be4f74d3529e566", size = 46374, upload-time = "2025-10-08T19:47:30.579Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/bf/b1d5e21dbc3b2e889ea4327044fb16312a736d97640fb8b6aa3f9c7b3b65/propcache-0.4.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:c0ef0aaafc66fbd87842a3fe3902fd889825646bc21149eafe47be6072725835", size = 48396, upload-time = "2025-10-08T19:47:31.79Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/04/5b4c54a103d480e978d3c8a76073502b18db0c4bc17ab91b3cb5092ad949/propcache-0.4.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f95393b4d66bfae908c3ca8d169d5f79cd65636ae15b5e7a4f6e67af675adb0e", size = 275950, upload-time = "2025-10-08T19:47:33.481Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/c1/86f846827fb969c4b78b0af79bba1d1ea2156492e1b83dea8b8a6ae27395/propcache-0.4.1-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c07fda85708bc48578467e85099645167a955ba093be0a2dcba962195676e859", size = 273856, upload-time = "2025-10-08T19:47:34.906Z" },
+    { url = "https://files.pythonhosted.org/packages/36/1d/fc272a63c8d3bbad6878c336c7a7dea15e8f2d23a544bda43205dfa83ada/propcache-0.4.1-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:af223b406d6d000830c6f65f1e6431783fc3f713ba3e6cc8c024d5ee96170a4b", size = 280420, upload-time = "2025-10-08T19:47:36.338Z" },
+    { url = "https://files.pythonhosted.org/packages/07/0c/01f2219d39f7e53d52e5173bcb09c976609ba30209912a0680adfb8c593a/propcache-0.4.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a78372c932c90ee474559c5ddfffd718238e8673c340dc21fe45c5b8b54559a0", size = 263254, upload-time = "2025-10-08T19:47:37.692Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/18/cd28081658ce597898f0c4d174d4d0f3c5b6d4dc27ffafeef835c95eb359/propcache-0.4.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:564d9f0d4d9509e1a870c920a89b2fec951b44bf5ba7d537a9e7c1ccec2c18af", size = 261205, upload-time = "2025-10-08T19:47:39.659Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/71/1f9e22eb8b8316701c2a19fa1f388c8a3185082607da8e406a803c9b954e/propcache-0.4.1-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:17612831fda0138059cc5546f4d12a2aacfb9e47068c06af35c400ba58ba7393", size = 247873, upload-time = "2025-10-08T19:47:41.084Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/65/3d4b61f36af2b4eddba9def857959f1016a51066b4f1ce348e0cf7881f58/propcache-0.4.1-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:41a89040cb10bd345b3c1a873b2bf36413d48da1def52f268a055f7398514874", size = 262739, upload-time = "2025-10-08T19:47:42.51Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/42/26746ab087faa77c1c68079b228810436ccd9a5ce9ac85e2b7307195fd06/propcache-0.4.1-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:e35b88984e7fa64aacecea39236cee32dd9bd8c55f57ba8a75cf2399553f9bd7", size = 263514, upload-time = "2025-10-08T19:47:43.927Z" },
+    { url = "https://files.pythonhosted.org/packages/94/13/630690fe201f5502d2403dd3cfd451ed8858fe3c738ee88d095ad2ff407b/propcache-0.4.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6f8b465489f927b0df505cbe26ffbeed4d6d8a2bbc61ce90eb074ff129ef0ab1", size = 257781, upload-time = "2025-10-08T19:47:45.448Z" },
+    { url = "https://files.pythonhosted.org/packages/92/f7/1d4ec5841505f423469efbfc381d64b7b467438cd5a4bbcbb063f3b73d27/propcache-0.4.1-cp313-cp313t-win32.whl", hash = "sha256:2ad890caa1d928c7c2965b48f3a3815c853180831d0e5503d35cf00c472f4717", size = 41396, upload-time = "2025-10-08T19:47:47.202Z" },
+    { url = "https://files.pythonhosted.org/packages/48/f0/615c30622316496d2cbbc29f5985f7777d3ada70f23370608c1d3e081c1f/propcache-0.4.1-cp313-cp313t-win_amd64.whl", hash = "sha256:f7ee0e597f495cf415bcbd3da3caa3bd7e816b74d0d52b8145954c5e6fd3ff37", size = 44897, upload-time = "2025-10-08T19:47:48.336Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/ca/6002e46eccbe0e33dcd4069ef32f7f1c9e243736e07adca37ae8c4830ec3/propcache-0.4.1-cp313-cp313t-win_arm64.whl", hash = "sha256:929d7cbe1f01bb7baffb33dc14eb5691c95831450a26354cd210a8155170c93a", size = 39789, upload-time = "2025-10-08T19:47:49.876Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/5c/bca52d654a896f831b8256683457ceddd490ec18d9ec50e97dfd8fc726a8/propcache-0.4.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3f7124c9d820ba5548d431afb4632301acf965db49e666aa21c305cbe8c6de12", size = 78152, upload-time = "2025-10-08T19:47:51.051Z" },
+    { url = "https://files.pythonhosted.org/packages/65/9b/03b04e7d82a5f54fb16113d839f5ea1ede58a61e90edf515f6577c66fa8f/propcache-0.4.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:c0d4b719b7da33599dfe3b22d3db1ef789210a0597bc650b7cee9c77c2be8c5c", size = 44869, upload-time = "2025-10-08T19:47:52.594Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/fa/89a8ef0468d5833a23fff277b143d0573897cf75bd56670a6d28126c7d68/propcache-0.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9f302f4783709a78240ebc311b793f123328716a60911d667e0c036bc5dcbded", size = 46596, upload-time = "2025-10-08T19:47:54.073Z" },
+    { url = "https://files.pythonhosted.org/packages/86/bd/47816020d337f4a746edc42fe8d53669965138f39ee117414c7d7a340cfe/propcache-0.4.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c80ee5802e3fb9ea37938e7eecc307fb984837091d5fd262bb37238b1ae97641", size = 206981, upload-time = "2025-10-08T19:47:55.715Z" },
+    { url = "https://files.pythonhosted.org/packages/df/f6/c5fa1357cc9748510ee55f37173eb31bfde6d94e98ccd9e6f033f2fc06e1/propcache-0.4.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ed5a841e8bb29a55fb8159ed526b26adc5bdd7e8bd7bf793ce647cb08656cdf4", size = 211490, upload-time = "2025-10-08T19:47:57.499Z" },
+    { url = "https://files.pythonhosted.org/packages/80/1e/e5889652a7c4a3846683401a48f0f2e5083ce0ec1a8a5221d8058fbd1adf/propcache-0.4.1-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:55c72fd6ea2da4c318e74ffdf93c4fe4e926051133657459131a95c846d16d44", size = 215371, upload-time = "2025-10-08T19:47:59.317Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/f2/889ad4b2408f72fe1a4f6a19491177b30ea7bf1a0fd5f17050ca08cfc882/propcache-0.4.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8326e144341460402713f91df60ade3c999d601e7eb5ff8f6f7862d54de0610d", size = 201424, upload-time = "2025-10-08T19:48:00.67Z" },
+    { url = "https://files.pythonhosted.org/packages/27/73/033d63069b57b0812c8bd19f311faebeceb6ba31b8f32b73432d12a0b826/propcache-0.4.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:060b16ae65bc098da7f6d25bf359f1f31f688384858204fe5d652979e0015e5b", size = 197566, upload-time = "2025-10-08T19:48:02.604Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/89/ce24f3dc182630b4e07aa6d15f0ff4b14ed4b9955fae95a0b54c58d66c05/propcache-0.4.1-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:89eb3fa9524f7bec9de6e83cf3faed9d79bffa560672c118a96a171a6f55831e", size = 193130, upload-time = "2025-10-08T19:48:04.499Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/24/ef0d5fd1a811fb5c609278d0209c9f10c35f20581fcc16f818da959fc5b4/propcache-0.4.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:dee69d7015dc235f526fe80a9c90d65eb0039103fe565776250881731f06349f", size = 202625, upload-time = "2025-10-08T19:48:06.213Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/02/98ec20ff5546f68d673df2f7a69e8c0d076b5abd05ca882dc7ee3a83653d/propcache-0.4.1-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:5558992a00dfd54ccbc64a32726a3357ec93825a418a401f5cc67df0ac5d9e49", size = 204209, upload-time = "2025-10-08T19:48:08.432Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/87/492694f76759b15f0467a2a93ab68d32859672b646aa8a04ce4864e7932d/propcache-0.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:c9b822a577f560fbd9554812526831712c1436d2c046cedee4c3796d3543b144", size = 197797, upload-time = "2025-10-08T19:48:09.968Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/36/66367de3575db1d2d3f3d177432bd14ee577a39d3f5d1b3d5df8afe3b6e2/propcache-0.4.1-cp314-cp314-win32.whl", hash = "sha256:ab4c29b49d560fe48b696cdcb127dd36e0bc2472548f3bf56cc5cb3da2b2984f", size = 38140, upload-time = "2025-10-08T19:48:11.232Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/2a/a758b47de253636e1b8aef181c0b4f4f204bf0dd964914fb2af90a95b49b/propcache-0.4.1-cp314-cp314-win_amd64.whl", hash = "sha256:5a103c3eb905fcea0ab98be99c3a9a5ab2de60228aa5aceedc614c0281cf6153", size = 41257, upload-time = "2025-10-08T19:48:12.707Z" },
+    { url = "https://files.pythonhosted.org/packages/34/5e/63bd5896c3fec12edcbd6f12508d4890d23c265df28c74b175e1ef9f4f3b/propcache-0.4.1-cp314-cp314-win_arm64.whl", hash = "sha256:74c1fb26515153e482e00177a1ad654721bf9207da8a494a0c05e797ad27b992", size = 38097, upload-time = "2025-10-08T19:48:13.923Z" },
+    { url = "https://files.pythonhosted.org/packages/99/85/9ff785d787ccf9bbb3f3106f79884a130951436f58392000231b4c737c80/propcache-0.4.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:824e908bce90fb2743bd6b59db36eb4f45cd350a39637c9f73b1c1ea66f5b75f", size = 81455, upload-time = "2025-10-08T19:48:15.16Z" },
+    { url = "https://files.pythonhosted.org/packages/90/85/2431c10c8e7ddb1445c1f7c4b54d886e8ad20e3c6307e7218f05922cad67/propcache-0.4.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c2b5e7db5328427c57c8e8831abda175421b709672f6cfc3d630c3b7e2146393", size = 46372, upload-time = "2025-10-08T19:48:16.424Z" },
+    { url = "https://files.pythonhosted.org/packages/01/20/b0972d902472da9bcb683fa595099911f4d2e86e5683bcc45de60dd05dc3/propcache-0.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:6f6ff873ed40292cd4969ef5310179afd5db59fdf055897e282485043fc80ad0", size = 48411, upload-time = "2025-10-08T19:48:17.577Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/e3/7dc89f4f21e8f99bad3d5ddb3a3389afcf9da4ac69e3deb2dcdc96e74169/propcache-0.4.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:49a2dc67c154db2c1463013594c458881a069fcf98940e61a0569016a583020a", size = 275712, upload-time = "2025-10-08T19:48:18.901Z" },
+    { url = "https://files.pythonhosted.org/packages/20/67/89800c8352489b21a8047c773067644e3897f02ecbbd610f4d46b7f08612/propcache-0.4.1-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:005f08e6a0529984491e37d8dbc3dd86f84bd78a8ceb5fa9a021f4c48d4984be", size = 273557, upload-time = "2025-10-08T19:48:20.762Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/a1/b52b055c766a54ce6d9c16d9aca0cad8059acd9637cdf8aa0222f4a026ef/propcache-0.4.1-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5c3310452e0d31390da9035c348633b43d7e7feb2e37be252be6da45abd1abcc", size = 280015, upload-time = "2025-10-08T19:48:22.592Z" },
+    { url = "https://files.pythonhosted.org/packages/48/c8/33cee30bd890672c63743049f3c9e4be087e6780906bfc3ec58528be59c1/propcache-0.4.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4c3c70630930447f9ef1caac7728c8ad1c56bc5015338b20fed0d08ea2480b3a", size = 262880, upload-time = "2025-10-08T19:48:23.947Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/b1/8f08a143b204b418285c88b83d00edbd61afbc2c6415ffafc8905da7038b/propcache-0.4.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8e57061305815dfc910a3634dcf584f08168a8836e6999983569f51a8544cd89", size = 260938, upload-time = "2025-10-08T19:48:25.656Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/12/96e4664c82ca2f31e1c8dff86afb867348979eb78d3cb8546a680287a1e9/propcache-0.4.1-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:521a463429ef54143092c11a77e04056dd00636f72e8c45b70aaa3140d639726", size = 247641, upload-time = "2025-10-08T19:48:27.207Z" },
+    { url = "https://files.pythonhosted.org/packages/18/ed/e7a9cfca28133386ba52278136d42209d3125db08d0a6395f0cba0c0285c/propcache-0.4.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:120c964da3fdc75e3731aa392527136d4ad35868cc556fd09bb6d09172d9a367", size = 262510, upload-time = "2025-10-08T19:48:28.65Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/76/16d8bf65e8845dd62b4e2b57444ab81f07f40caa5652b8969b87ddcf2ef6/propcache-0.4.1-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:d8f353eb14ee3441ee844ade4277d560cdd68288838673273b978e3d6d2c8f36", size = 263161, upload-time = "2025-10-08T19:48:30.133Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/70/c99e9edb5d91d5ad8a49fa3c1e8285ba64f1476782fed10ab251ff413ba1/propcache-0.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ab2943be7c652f09638800905ee1bab2c544e537edb57d527997a24c13dc1455", size = 257393, upload-time = "2025-10-08T19:48:31.567Z" },
+    { url = "https://files.pythonhosted.org/packages/08/02/87b25304249a35c0915d236575bc3574a323f60b47939a2262b77632a3ee/propcache-0.4.1-cp314-cp314t-win32.whl", hash = "sha256:05674a162469f31358c30bcaa8883cb7829fa3110bf9c0991fe27d7896c42d85", size = 42546, upload-time = "2025-10-08T19:48:32.872Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/ef/3c6ecf8b317aa982f309835e8f96987466123c6e596646d4e6a1dfcd080f/propcache-0.4.1-cp314-cp314t-win_amd64.whl", hash = "sha256:990f6b3e2a27d683cb7602ed6c86f15ee6b43b1194736f9baaeb93d0016633b1", size = 46259, upload-time = "2025-10-08T19:48:34.226Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/2d/346e946d4951f37eca1e4f55be0f0174c52cd70720f84029b02f296f4a38/propcache-0.4.1-cp314-cp314t-win_arm64.whl", hash = "sha256:ecef2343af4cc68e05131e45024ba34f6095821988a9d0a02aa7c73fcc448aa9", size = 40428, upload-time = "2025-10-08T19:48:35.441Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/5a/bc7b4a4ef808fa59a816c17b20c4bef6884daebbdf627ff2a161da67da19/propcache-0.4.1-py3-none-any.whl", hash = "sha256:af2a6052aeb6cf17d3e46ee169099044fd8224cbaf75c76a2ef596e8163e2237", size = 13305, upload-time = "2025-10-08T19:49:00.792Z" },
+]
+
+[[package]]
+name = "pycparser"
+version = "2.23"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fe/cf/d2d3b9f5699fb1e4615c8e32ff220203e43b248e1dfcc6736ad9057731ca/pycparser-2.23.tar.gz", hash = "sha256:78816d4f24add8f10a06d6f05b4d424ad9e96cfebf68a4ddc99c65c0720d00c2", size = 173734, upload-time = "2025-09-09T13:23:47.91Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a0/e3/59cd50310fc9b59512193629e1984c1f95e5c8ae6e5d8c69532ccc65a7fe/pycparser-2.23-py3-none-any.whl", hash = "sha256:e5c6e8d3fbad53479cab09ac03729e0a9faf2bee3db8208a550daf5af81a5934", size = 118140, upload-time = "2025-09-09T13:23:46.651Z" },
+]
+
+[[package]]
+name = "pygments"
+version = "2.19.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
+]
+
+[[package]]
+name = "pywin32-ctypes"
+version = "0.2.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/85/9f/01a1a99704853cb63f253eea009390c88e7131c67e66a0a02099a8c917cb/pywin32-ctypes-0.2.3.tar.gz", hash = "sha256:d162dc04946d704503b2edc4d55f3dba5c1d539ead017afa00142c38b9885755", size = 29471, upload-time = "2024-08-14T10:15:34.626Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/de/3d/8161f7711c017e01ac9f008dfddd9410dff3674334c233bde66e7ba65bbf/pywin32_ctypes-0.2.3-py3-none-any.whl", hash = "sha256:8a1513379d709975552d202d942d9837758905c8d01eb82b8bcc30918929e7b8", size = 30756, upload-time = "2024-08-14T10:15:33.187Z" },
+]
+
+[[package]]
+name = "rich"
+version = "14.2.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markdown-it-py" },
+    { name = "pygments" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fb/d2/8920e102050a0de7bfabeb4c4614a49248cf8d5d7a8d01885fbb24dc767a/rich-14.2.0.tar.gz", hash = "sha256:73ff50c7c0c1c77c8243079283f4edb376f0f6442433aecb8ce7e6d0b92d1fe4", size = 219990, upload-time = "2025-10-09T14:16:53.064Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/25/7a/b0178788f8dc6cafce37a212c99565fa1fe7872c70c6c9c1e1a372d9d88f/rich-14.2.0-py3-none-any.whl", hash = "sha256:76bc51fe2e57d2b1be1f96c524b890b816e334ab4c1e45888799bfaab0021edd", size = 243393, upload-time = "2025-10-09T14:16:51.245Z" },
+]
+
+[[package]]
+name = "secretstorage"
+version = "3.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cryptography" },
+    { name = "jeepney" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/31/9f/11ef35cf1027c1339552ea7bfe6aaa74a8516d8b5caf6e7d338daf54fd80/secretstorage-3.4.0.tar.gz", hash = "sha256:c46e216d6815aff8a8a18706a2fbfd8d53fcbb0dce99301881687a1b0289ef7c", size = 19748, upload-time = "2025-09-09T16:42:13.859Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/91/ff/2e2eed29e02c14a5cb6c57f09b2d5b40e65d6cc71f45b52e0be295ccbc2f/secretstorage-3.4.0-py3-none-any.whl", hash = "sha256:0e3b6265c2c63509fb7415717607e4b2c9ab767b7f344a57473b779ca13bd02e", size = 15272, upload-time = "2025-09-09T16:42:12.744Z" },
+]
+
+[[package]]
+name = "soupsieve"
+version = "2.8"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6d/e6/21ccce3262dd4889aa3332e5a119a3491a95e8f60939870a3a035aabac0d/soupsieve-2.8.tar.gz", hash = "sha256:e2dd4a40a628cb5f28f6d4b0db8800b8f581b65bb380b97de22ba5ca8d72572f", size = 103472, upload-time = "2025-08-27T15:39:51.78Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/14/a0/bb38d3b76b8cae341dad93a2dd83ab7462e6dbcdd84d43f54ee60a8dc167/soupsieve-2.8-py3-none-any.whl", hash = "sha256:0cc76456a30e20f5d7f2e14a98a4ae2ee4e5abdc7c5ea0aafe795f344bc7984c", size = 36679, upload-time = "2025-08-27T15:39:50.179Z" },
+]
+
+[[package]]
+name = "typing-extensions"
+version = "4.15.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" },
+]
+
+[[package]]
+name = "yarl"
+version = "1.22.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "idna" },
+    { name = "multidict" },
+    { name = "propcache" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/57/63/0c6ebca57330cd313f6102b16dd57ffaf3ec4c83403dcb45dbd15c6f3ea1/yarl-1.22.0.tar.gz", hash = "sha256:bebf8557577d4401ba8bd9ff33906f1376c877aa78d1fe216ad01b4d6745af71", size = 187169, upload-time = "2025-10-06T14:12:55.963Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4d/27/5ab13fc84c76a0250afd3d26d5936349a35be56ce5785447d6c423b26d92/yarl-1.22.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:1ab72135b1f2db3fed3997d7e7dc1b80573c67138023852b6efb336a5eae6511", size = 141607, upload-time = "2025-10-06T14:09:16.298Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/a1/d065d51d02dc02ce81501d476b9ed2229d9a990818332242a882d5d60340/yarl-1.22.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:669930400e375570189492dc8d8341301578e8493aec04aebc20d4717f899dd6", size = 94027, upload-time = "2025-10-06T14:09:17.786Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/da/8da9f6a53f67b5106ffe902c6fa0164e10398d4e150d85838b82f424072a/yarl-1.22.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:792a2af6d58177ef7c19cbf0097aba92ca1b9cb3ffdd9c7470e156c8f9b5e028", size = 94963, upload-time = "2025-10-06T14:09:19.662Z" },
+    { url = "https://files.pythonhosted.org/packages/68/fe/2c1f674960c376e29cb0bec1249b117d11738db92a6ccc4a530b972648db/yarl-1.22.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ea66b1c11c9150f1372f69afb6b8116f2dd7286f38e14ea71a44eee9ec51b9d", size = 368406, upload-time = "2025-10-06T14:09:21.402Z" },
+    { url = "https://files.pythonhosted.org/packages/95/26/812a540e1c3c6418fec60e9bbd38e871eaba9545e94fa5eff8f4a8e28e1e/yarl-1.22.0-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3e2daa88dc91870215961e96a039ec73e4937da13cf77ce17f9cad0c18df3503", size = 336581, upload-time = "2025-10-06T14:09:22.98Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/f5/5777b19e26fdf98563985e481f8be3d8a39f8734147a6ebf459d0dab5a6b/yarl-1.22.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ba440ae430c00eee41509353628600212112cd5018d5def7e9b05ea7ac34eb65", size = 388924, upload-time = "2025-10-06T14:09:24.655Z" },
+    { url = "https://files.pythonhosted.org/packages/86/08/24bd2477bd59c0bbd994fe1d93b126e0472e4e3df5a96a277b0a55309e89/yarl-1.22.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e6438cc8f23a9c1478633d216b16104a586b9761db62bfacb6425bac0a36679e", size = 392890, upload-time = "2025-10-06T14:09:26.617Z" },
+    { url = "https://files.pythonhosted.org/packages/46/00/71b90ed48e895667ecfb1eaab27c1523ee2fa217433ed77a73b13205ca4b/yarl-1.22.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4c52a6e78aef5cf47a98ef8e934755abf53953379b7d53e68b15ff4420e6683d", size = 365819, upload-time = "2025-10-06T14:09:28.544Z" },
+    { url = "https://files.pythonhosted.org/packages/30/2d/f715501cae832651d3282387c6a9236cd26bd00d0ff1e404b3dc52447884/yarl-1.22.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:3b06bcadaac49c70f4c88af4ffcfbe3dc155aab3163e75777818092478bcbbe7", size = 363601, upload-time = "2025-10-06T14:09:30.568Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/f9/a678c992d78e394e7126ee0b0e4e71bd2775e4334d00a9278c06a6cce96a/yarl-1.22.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:6944b2dc72c4d7f7052683487e3677456050ff77fcf5e6204e98caf785ad1967", size = 358072, upload-time = "2025-10-06T14:09:32.528Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/d1/b49454411a60edb6fefdcad4f8e6dbba7d8019e3a508a1c5836cba6d0781/yarl-1.22.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:d5372ca1df0f91a86b047d1277c2aaf1edb32d78bbcefffc81b40ffd18f027ed", size = 385311, upload-time = "2025-10-06T14:09:34.634Z" },
+    { url = "https://files.pythonhosted.org/packages/87/e5/40d7a94debb8448c7771a916d1861d6609dddf7958dc381117e7ba36d9e8/yarl-1.22.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:51af598701f5299012b8416486b40fceef8c26fc87dc6d7d1f6fc30609ea0aa6", size = 381094, upload-time = "2025-10-06T14:09:36.268Z" },
+    { url = "https://files.pythonhosted.org/packages/35/d8/611cc282502381ad855448643e1ad0538957fc82ae83dfe7762c14069e14/yarl-1.22.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b266bd01fedeffeeac01a79ae181719ff848a5a13ce10075adbefc8f1daee70e", size = 370944, upload-time = "2025-10-06T14:09:37.872Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/df/fadd00fb1c90e1a5a8bd731fa3d3de2e165e5a3666a095b04e31b04d9cb6/yarl-1.22.0-cp311-cp311-win32.whl", hash = "sha256:a9b1ba5610a4e20f655258d5a1fdc7ebe3d837bb0e45b581398b99eb98b1f5ca", size = 81804, upload-time = "2025-10-06T14:09:39.359Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/f7/149bb6f45f267cb5c074ac40c01c6b3ea6d8a620d34b337f6321928a1b4d/yarl-1.22.0-cp311-cp311-win_amd64.whl", hash = "sha256:078278b9b0b11568937d9509b589ee83ef98ed6d561dfe2020e24a9fd08eaa2b", size = 86858, upload-time = "2025-10-06T14:09:41.068Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/13/88b78b93ad3f2f0b78e13bfaaa24d11cbc746e93fe76d8c06bf139615646/yarl-1.22.0-cp311-cp311-win_arm64.whl", hash = "sha256:b6a6f620cfe13ccec221fa312139135166e47ae169f8253f72a0abc0dae94376", size = 81637, upload-time = "2025-10-06T14:09:42.712Z" },
+    { url = "https://files.pythonhosted.org/packages/75/ff/46736024fee3429b80a165a732e38e5d5a238721e634ab41b040d49f8738/yarl-1.22.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e340382d1afa5d32b892b3ff062436d592ec3d692aeea3bef3a5cfe11bbf8c6f", size = 142000, upload-time = "2025-10-06T14:09:44.631Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/9a/b312ed670df903145598914770eb12de1bac44599549b3360acc96878df8/yarl-1.22.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f1e09112a2c31ffe8d80be1b0988fa6a18c5d5cad92a9ffbb1c04c91bfe52ad2", size = 94338, upload-time = "2025-10-06T14:09:46.372Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/f5/0601483296f09c3c65e303d60c070a5c19fcdbc72daa061e96170785bc7d/yarl-1.22.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:939fe60db294c786f6b7c2d2e121576628468f65453d86b0fe36cb52f987bd74", size = 94909, upload-time = "2025-10-06T14:09:48.648Z" },
+    { url = "https://files.pythonhosted.org/packages/60/41/9a1fe0b73dbcefce72e46cf149b0e0a67612d60bfc90fb59c2b2efdfbd86/yarl-1.22.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e1651bf8e0398574646744c1885a41198eba53dc8a9312b954073f845c90a8df", size = 372940, upload-time = "2025-10-06T14:09:50.089Z" },
+    { url = "https://files.pythonhosted.org/packages/17/7a/795cb6dfee561961c30b800f0ed616b923a2ec6258b5def2a00bf8231334/yarl-1.22.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b8a0588521a26bf92a57a1705b77b8b59044cdceccac7151bd8d229e66b8dedb", size = 345825, upload-time = "2025-10-06T14:09:52.142Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/93/a58f4d596d2be2ae7bab1a5846c4d270b894958845753b2c606d666744d3/yarl-1.22.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:42188e6a615c1a75bcaa6e150c3fe8f3e8680471a6b10150c5f7e83f47cc34d2", size = 386705, upload-time = "2025-10-06T14:09:54.128Z" },
+    { url = "https://files.pythonhosted.org/packages/61/92/682279d0e099d0e14d7fd2e176bd04f48de1484f56546a3e1313cd6c8e7c/yarl-1.22.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f6d2cb59377d99718913ad9a151030d6f83ef420a2b8f521d94609ecc106ee82", size = 396518, upload-time = "2025-10-06T14:09:55.762Z" },
+    { url = "https://files.pythonhosted.org/packages/db/0f/0d52c98b8a885aeda831224b78f3be7ec2e1aa4a62091f9f9188c3c65b56/yarl-1.22.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:50678a3b71c751d58d7908edc96d332af328839eea883bb554a43f539101277a", size = 377267, upload-time = "2025-10-06T14:09:57.958Z" },
+    { url = "https://files.pythonhosted.org/packages/22/42/d2685e35908cbeaa6532c1fc73e89e7f2efb5d8a7df3959ea8e37177c5a3/yarl-1.22.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1e8fbaa7cec507aa24ea27a01456e8dd4b6fab829059b69844bd348f2d467124", size = 365797, upload-time = "2025-10-06T14:09:59.527Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/83/cf8c7bcc6355631762f7d8bdab920ad09b82efa6b722999dfb05afa6cfac/yarl-1.22.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:433885ab5431bc3d3d4f2f9bd15bfa1614c522b0f1405d62c4f926ccd69d04fa", size = 365535, upload-time = "2025-10-06T14:10:01.139Z" },
+    { url = "https://files.pythonhosted.org/packages/25/e1/5302ff9b28f0c59cac913b91fe3f16c59a033887e57ce9ca5d41a3a94737/yarl-1.22.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:b790b39c7e9a4192dc2e201a282109ed2985a1ddbd5ac08dc56d0e121400a8f7", size = 382324, upload-time = "2025-10-06T14:10:02.756Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/cd/4617eb60f032f19ae3a688dc990d8f0d89ee0ea378b61cac81ede3e52fae/yarl-1.22.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:31f0b53913220599446872d757257be5898019c85e7971599065bc55065dc99d", size = 383803, upload-time = "2025-10-06T14:10:04.552Z" },
+    { url = "https://files.pythonhosted.org/packages/59/65/afc6e62bb506a319ea67b694551dab4a7e6fb7bf604e9bd9f3e11d575fec/yarl-1.22.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a49370e8f711daec68d09b821a34e1167792ee2d24d405cbc2387be4f158b520", size = 374220, upload-time = "2025-10-06T14:10:06.489Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/3d/68bf18d50dc674b942daec86a9ba922d3113d8399b0e52b9897530442da2/yarl-1.22.0-cp312-cp312-win32.whl", hash = "sha256:70dfd4f241c04bd9239d53b17f11e6ab672b9f1420364af63e8531198e3f5fe8", size = 81589, upload-time = "2025-10-06T14:10:09.254Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/9a/6ad1a9b37c2f72874f93e691b2e7ecb6137fb2b899983125db4204e47575/yarl-1.22.0-cp312-cp312-win_amd64.whl", hash = "sha256:8884d8b332a5e9b88e23f60bb166890009429391864c685e17bd73a9eda9105c", size = 87213, upload-time = "2025-10-06T14:10:11.369Z" },
+    { url = "https://files.pythonhosted.org/packages/44/c5/c21b562d1680a77634d748e30c653c3ca918beb35555cff24986fff54598/yarl-1.22.0-cp312-cp312-win_arm64.whl", hash = "sha256:ea70f61a47f3cc93bdf8b2f368ed359ef02a01ca6393916bc8ff877427181e74", size = 81330, upload-time = "2025-10-06T14:10:13.112Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/f3/d67de7260456ee105dc1d162d43a019ecad6b91e2f51809d6cddaa56690e/yarl-1.22.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8dee9c25c74997f6a750cd317b8ca63545169c098faee42c84aa5e506c819b53", size = 139980, upload-time = "2025-10-06T14:10:14.601Z" },
+    { url = "https://files.pythonhosted.org/packages/01/88/04d98af0b47e0ef42597b9b28863b9060bb515524da0a65d5f4db160b2d5/yarl-1.22.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:01e73b85a5434f89fc4fe27dcda2aff08ddf35e4d47bbbea3bdcd25321af538a", size = 93424, upload-time = "2025-10-06T14:10:16.115Z" },
+    { url = "https://files.pythonhosted.org/packages/18/91/3274b215fd8442a03975ce6bee5fe6aa57a8326b29b9d3d56234a1dca244/yarl-1.22.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:22965c2af250d20c873cdbee8ff958fb809940aeb2e74ba5f20aaf6b7ac8c70c", size = 93821, upload-time = "2025-10-06T14:10:17.993Z" },
+    { url = "https://files.pythonhosted.org/packages/61/3a/caf4e25036db0f2da4ca22a353dfeb3c9d3c95d2761ebe9b14df8fc16eb0/yarl-1.22.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b4f15793aa49793ec8d1c708ab7f9eded1aa72edc5174cae703651555ed1b601", size = 373243, upload-time = "2025-10-06T14:10:19.44Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/9e/51a77ac7516e8e7803b06e01f74e78649c24ee1021eca3d6a739cb6ea49c/yarl-1.22.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5542339dcf2747135c5c85f68680353d5cb9ffd741c0f2e8d832d054d41f35a", size = 342361, upload-time = "2025-10-06T14:10:21.124Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/f8/33b92454789dde8407f156c00303e9a891f1f51a0330b0fad7c909f87692/yarl-1.22.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5c401e05ad47a75869c3ab3e35137f8468b846770587e70d71e11de797d113df", size = 387036, upload-time = "2025-10-06T14:10:22.902Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/9a/c5db84ea024f76838220280f732970aa4ee154015d7f5c1bfb60a267af6f/yarl-1.22.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:243dda95d901c733f5b59214d28b0120893d91777cb8aa043e6ef059d3cddfe2", size = 397671, upload-time = "2025-10-06T14:10:24.523Z" },
+    { url = "https://files.pythonhosted.org/packages/11/c9/cd8538dc2e7727095e0c1d867bad1e40c98f37763e6d995c1939f5fdc7b1/yarl-1.22.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bec03d0d388060058f5d291a813f21c011041938a441c593374da6077fe21b1b", size = 377059, upload-time = "2025-10-06T14:10:26.406Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/b9/ab437b261702ced75122ed78a876a6dec0a1b0f5e17a4ac7a9a2482d8abe/yarl-1.22.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b0748275abb8c1e1e09301ee3cf90c8a99678a4e92e4373705f2a2570d581273", size = 365356, upload-time = "2025-10-06T14:10:28.461Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/9d/8e1ae6d1d008a9567877b08f0ce4077a29974c04c062dabdb923ed98e6fe/yarl-1.22.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:47fdb18187e2a4e18fda2c25c05d8251a9e4a521edaed757fef033e7d8498d9a", size = 361331, upload-time = "2025-10-06T14:10:30.541Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/5a/09b7be3905962f145b73beb468cdd53db8aa171cf18c80400a54c5b82846/yarl-1.22.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c7044802eec4524fde550afc28edda0dd5784c4c45f0be151a2d3ba017daca7d", size = 382590, upload-time = "2025-10-06T14:10:33.352Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/7f/59ec509abf90eda5048b0bc3e2d7b5099dffdb3e6b127019895ab9d5ef44/yarl-1.22.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:139718f35149ff544caba20fce6e8a2f71f1e39b92c700d8438a0b1d2a631a02", size = 385316, upload-time = "2025-10-06T14:10:35.034Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/84/891158426bc8036bfdfd862fabd0e0fa25df4176ec793e447f4b85cf1be4/yarl-1.22.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e1b51bebd221006d3d2f95fbe124b22b247136647ae5dcc8c7acafba66e5ee67", size = 374431, upload-time = "2025-10-06T14:10:37.76Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/49/03da1580665baa8bef5e8ed34c6df2c2aca0a2f28bf397ed238cc1bbc6f2/yarl-1.22.0-cp313-cp313-win32.whl", hash = "sha256:d3e32536234a95f513bd374e93d717cf6b2231a791758de6c509e3653f234c95", size = 81555, upload-time = "2025-10-06T14:10:39.649Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/ee/450914ae11b419eadd067c6183ae08381cfdfcb9798b90b2b713bbebddda/yarl-1.22.0-cp313-cp313-win_amd64.whl", hash = "sha256:47743b82b76d89a1d20b83e60d5c20314cbd5ba2befc9cda8f28300c4a08ed4d", size = 86965, upload-time = "2025-10-06T14:10:41.313Z" },
+    { url = "https://files.pythonhosted.org/packages/98/4d/264a01eae03b6cf629ad69bae94e3b0e5344741e929073678e84bf7a3e3b/yarl-1.22.0-cp313-cp313-win_arm64.whl", hash = "sha256:5d0fcda9608875f7d052eff120c7a5da474a6796fe4d83e152e0e4d42f6d1a9b", size = 81205, upload-time = "2025-10-06T14:10:43.167Z" },
+    { url = "https://files.pythonhosted.org/packages/88/fc/6908f062a2f77b5f9f6d69cecb1747260831ff206adcbc5b510aff88df91/yarl-1.22.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:719ae08b6972befcba4310e49edb1161a88cdd331e3a694b84466bd938a6ab10", size = 146209, upload-time = "2025-10-06T14:10:44.643Z" },
+    { url = "https://files.pythonhosted.org/packages/65/47/76594ae8eab26210b4867be6f49129861ad33da1f1ebdf7051e98492bf62/yarl-1.22.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:47d8a5c446df1c4db9d21b49619ffdba90e77c89ec6e283f453856c74b50b9e3", size = 95966, upload-time = "2025-10-06T14:10:46.554Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/ce/05e9828a49271ba6b5b038b15b3934e996980dd78abdfeb52a04cfb9467e/yarl-1.22.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cfebc0ac8333520d2d0423cbbe43ae43c8838862ddb898f5ca68565e395516e9", size = 97312, upload-time = "2025-10-06T14:10:48.007Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/c5/7dffad5e4f2265b29c9d7ec869c369e4223166e4f9206fc2243ee9eea727/yarl-1.22.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4398557cbf484207df000309235979c79c4356518fd5c99158c7d38203c4da4f", size = 361967, upload-time = "2025-10-06T14:10:49.997Z" },
+    { url = "https://files.pythonhosted.org/packages/50/b2/375b933c93a54bff7fc041e1a6ad2c0f6f733ffb0c6e642ce56ee3b39970/yarl-1.22.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:2ca6fd72a8cd803be290d42f2dec5cdcd5299eeb93c2d929bf060ad9efaf5de0", size = 323949, upload-time = "2025-10-06T14:10:52.004Z" },
+    { url = "https://files.pythonhosted.org/packages/66/50/bfc2a29a1d78644c5a7220ce2f304f38248dc94124a326794e677634b6cf/yarl-1.22.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ca1f59c4e1ab6e72f0a23c13fca5430f889634166be85dbf1013683e49e3278e", size = 361818, upload-time = "2025-10-06T14:10:54.078Z" },
+    { url = "https://files.pythonhosted.org/packages/46/96/f3941a46af7d5d0f0498f86d71275696800ddcdd20426298e572b19b91ff/yarl-1.22.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6c5010a52015e7c70f86eb967db0f37f3c8bd503a695a49f8d45700144667708", size = 372626, upload-time = "2025-10-06T14:10:55.767Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/42/8b27c83bb875cd89448e42cd627e0fb971fa1675c9ec546393d18826cb50/yarl-1.22.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d7672ecf7557476642c88497c2f8d8542f8e36596e928e9bcba0e42e1e7d71f", size = 341129, upload-time = "2025-10-06T14:10:57.985Z" },
+    { url = "https://files.pythonhosted.org/packages/49/36/99ca3122201b382a3cf7cc937b95235b0ac944f7e9f2d5331d50821ed352/yarl-1.22.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:3b7c88eeef021579d600e50363e0b6ee4f7f6f728cd3486b9d0f3ee7b946398d", size = 346776, upload-time = "2025-10-06T14:10:59.633Z" },
+    { url = "https://files.pythonhosted.org/packages/85/b4/47328bf996acd01a4c16ef9dcd2f59c969f495073616586f78cd5f2efb99/yarl-1.22.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:f4afb5c34f2c6fecdcc182dfcfc6af6cccf1aa923eed4d6a12e9d96904e1a0d8", size = 334879, upload-time = "2025-10-06T14:11:01.454Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/ad/b77d7b3f14a4283bffb8e92c6026496f6de49751c2f97d4352242bba3990/yarl-1.22.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:59c189e3e99a59cf8d83cbb31d4db02d66cda5a1a4374e8a012b51255341abf5", size = 350996, upload-time = "2025-10-06T14:11:03.452Z" },
+    { url = "https://files.pythonhosted.org/packages/81/c8/06e1d69295792ba54d556f06686cbd6a7ce39c22307100e3fb4a2c0b0a1d/yarl-1.22.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:5a3bf7f62a289fa90f1990422dc8dff5a458469ea71d1624585ec3a4c8d6960f", size = 356047, upload-time = "2025-10-06T14:11:05.115Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/b8/4c0e9e9f597074b208d18cef227d83aac36184bfbc6eab204ea55783dbc5/yarl-1.22.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:de6b9a04c606978fdfe72666fa216ffcf2d1a9f6a381058d4378f8d7b1e5de62", size = 342947, upload-time = "2025-10-06T14:11:08.137Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/e5/11f140a58bf4c6ad7aca69a892bff0ee638c31bea4206748fc0df4ebcb3a/yarl-1.22.0-cp313-cp313t-win32.whl", hash = "sha256:1834bb90991cc2999f10f97f5f01317f99b143284766d197e43cd5b45eb18d03", size = 86943, upload-time = "2025-10-06T14:11:10.284Z" },
+    { url = "https://files.pythonhosted.org/packages/31/74/8b74bae38ed7fe6793d0c15a0c8207bbb819cf287788459e5ed230996cdd/yarl-1.22.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ff86011bd159a9d2dfc89c34cfd8aff12875980e3bd6a39ff097887520e60249", size = 93715, upload-time = "2025-10-06T14:11:11.739Z" },
+    { url = "https://files.pythonhosted.org/packages/69/66/991858aa4b5892d57aef7ee1ba6b4d01ec3b7eb3060795d34090a3ca3278/yarl-1.22.0-cp313-cp313t-win_arm64.whl", hash = "sha256:7861058d0582b847bc4e3a4a4c46828a410bca738673f35a29ba3ca5db0b473b", size = 83857, upload-time = "2025-10-06T14:11:13.586Z" },
+    { url = "https://files.pythonhosted.org/packages/46/b3/e20ef504049f1a1c54a814b4b9bed96d1ac0e0610c3b4da178f87209db05/yarl-1.22.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:34b36c2c57124530884d89d50ed2c1478697ad7473efd59cfd479945c95650e4", size = 140520, upload-time = "2025-10-06T14:11:15.465Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/04/3532d990fdbab02e5ede063676b5c4260e7f3abea2151099c2aa745acc4c/yarl-1.22.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:0dd9a702591ca2e543631c2a017e4a547e38a5c0f29eece37d9097e04a7ac683", size = 93504, upload-time = "2025-10-06T14:11:17.106Z" },
+    { url = "https://files.pythonhosted.org/packages/11/63/ff458113c5c2dac9a9719ac68ee7c947cb621432bcf28c9972b1c0e83938/yarl-1.22.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:594fcab1032e2d2cc3321bb2e51271e7cd2b516c7d9aee780ece81b07ff8244b", size = 94282, upload-time = "2025-10-06T14:11:19.064Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/bc/315a56aca762d44a6aaaf7ad253f04d996cb6b27bad34410f82d76ea8038/yarl-1.22.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f3d7a87a78d46a2e3d5b72587ac14b4c16952dd0887dbb051451eceac774411e", size = 372080, upload-time = "2025-10-06T14:11:20.996Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/3f/08e9b826ec2e099ea6e7c69a61272f4f6da62cb5b1b63590bb80ca2e4a40/yarl-1.22.0-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:852863707010316c973162e703bddabec35e8757e67fcb8ad58829de1ebc8590", size = 338696, upload-time = "2025-10-06T14:11:22.847Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/9f/90360108e3b32bd76789088e99538febfea24a102380ae73827f62073543/yarl-1.22.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:131a085a53bfe839a477c0845acf21efc77457ba2bcf5899618136d64f3303a2", size = 387121, upload-time = "2025-10-06T14:11:24.889Z" },
+    { url = "https://files.pythonhosted.org/packages/98/92/ab8d4657bd5b46a38094cfaea498f18bb70ce6b63508fd7e909bd1f93066/yarl-1.22.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:078a8aefd263f4d4f923a9677b942b445a2be970ca24548a8102689a3a8ab8da", size = 394080, upload-time = "2025-10-06T14:11:27.307Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/e7/d8c5a7752fef68205296201f8ec2bf718f5c805a7a7e9880576c67600658/yarl-1.22.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bca03b91c323036913993ff5c738d0842fc9c60c4648e5c8d98331526df89784", size = 372661, upload-time = "2025-10-06T14:11:29.387Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/2e/f4d26183c8db0bb82d491b072f3127fb8c381a6206a3a56332714b79b751/yarl-1.22.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:68986a61557d37bb90d3051a45b91fa3d5c516d177dfc6dd6f2f436a07ff2b6b", size = 364645, upload-time = "2025-10-06T14:11:31.423Z" },
+    { url = "https://files.pythonhosted.org/packages/80/7c/428e5812e6b87cd00ee8e898328a62c95825bf37c7fa87f0b6bb2ad31304/yarl-1.22.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:4792b262d585ff0dff6bcb787f8492e40698443ec982a3568c2096433660c694", size = 355361, upload-time = "2025-10-06T14:11:33.055Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/2a/249405fd26776f8b13c067378ef4d7dd49c9098d1b6457cdd152a99e96a9/yarl-1.22.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:ebd4549b108d732dba1d4ace67614b9545b21ece30937a63a65dd34efa19732d", size = 381451, upload-time = "2025-10-06T14:11:35.136Z" },
+    { url = "https://files.pythonhosted.org/packages/67/a8/fb6b1adbe98cf1e2dd9fad71003d3a63a1bc22459c6e15f5714eb9323b93/yarl-1.22.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:f87ac53513d22240c7d59203f25cc3beac1e574c6cd681bbfd321987b69f95fd", size = 383814, upload-time = "2025-10-06T14:11:37.094Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/f9/3aa2c0e480fb73e872ae2814c43bc1e734740bb0d54e8cb2a95925f98131/yarl-1.22.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:22b029f2881599e2f1b06f8f1db2ee63bd309e2293ba2d566e008ba12778b8da", size = 370799, upload-time = "2025-10-06T14:11:38.83Z" },
+    { url = "https://files.pythonhosted.org/packages/50/3c/af9dba3b8b5eeb302f36f16f92791f3ea62e3f47763406abf6d5a4a3333b/yarl-1.22.0-cp314-cp314-win32.whl", hash = "sha256:6a635ea45ba4ea8238463b4f7d0e721bad669f80878b7bfd1f89266e2ae63da2", size = 82990, upload-time = "2025-10-06T14:11:40.624Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/30/ac3a0c5bdc1d6efd1b41fa24d4897a4329b3b1e98de9449679dd327af4f0/yarl-1.22.0-cp314-cp314-win_amd64.whl", hash = "sha256:0d6e6885777af0f110b0e5d7e5dda8b704efed3894da26220b7f3d887b839a79", size = 88292, upload-time = "2025-10-06T14:11:42.578Z" },
+    { url = "https://files.pythonhosted.org/packages/df/0a/227ab4ff5b998a1b7410abc7b46c9b7a26b0ca9e86c34ba4b8d8bc7c63d5/yarl-1.22.0-cp314-cp314-win_arm64.whl", hash = "sha256:8218f4e98d3c10d683584cb40f0424f4b9fd6e95610232dd75e13743b070ee33", size = 82888, upload-time = "2025-10-06T14:11:44.863Z" },
+    { url = "https://files.pythonhosted.org/packages/06/5e/a15eb13db90abd87dfbefb9760c0f3f257ac42a5cac7e75dbc23bed97a9f/yarl-1.22.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:45c2842ff0e0d1b35a6bf1cd6c690939dacb617a70827f715232b2e0494d55d1", size = 146223, upload-time = "2025-10-06T14:11:46.796Z" },
+    { url = "https://files.pythonhosted.org/packages/18/82/9665c61910d4d84f41a5bf6837597c89e665fa88aa4941080704645932a9/yarl-1.22.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:d947071e6ebcf2e2bee8fce76e10faca8f7a14808ca36a910263acaacef08eca", size = 95981, upload-time = "2025-10-06T14:11:48.845Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/9a/2f65743589809af4d0a6d3aa749343c4b5f4c380cc24a8e94a3c6625a808/yarl-1.22.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:334b8721303e61b00019474cc103bdac3d7b1f65e91f0bfedeec2d56dfe74b53", size = 97303, upload-time = "2025-10-06T14:11:50.897Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/ab/5b13d3e157505c43c3b43b5a776cbf7b24a02bc4cccc40314771197e3508/yarl-1.22.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1e7ce67c34138a058fd092f67d07a72b8e31ff0c9236e751957465a24b28910c", size = 361820, upload-time = "2025-10-06T14:11:52.549Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/76/242a5ef4677615cf95330cfc1b4610e78184400699bdda0acb897ef5e49a/yarl-1.22.0-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d77e1b2c6d04711478cb1c4ab90db07f1609ccf06a287d5607fcd90dc9863acf", size = 323203, upload-time = "2025-10-06T14:11:54.225Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/96/475509110d3f0153b43d06164cf4195c64d16999e0c7e2d8a099adcd6907/yarl-1.22.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c4647674b6150d2cae088fc07de2738a84b8bcedebef29802cf0b0a82ab6face", size = 363173, upload-time = "2025-10-06T14:11:56.069Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/66/59db471aecfbd559a1fd48aedd954435558cd98c7d0da8b03cc6c140a32c/yarl-1.22.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:efb07073be061c8f79d03d04139a80ba33cbd390ca8f0297aae9cce6411e4c6b", size = 373562, upload-time = "2025-10-06T14:11:58.783Z" },
+    { url = "https://files.pythonhosted.org/packages/03/1f/c5d94abc91557384719da10ff166b916107c1b45e4d0423a88457071dd88/yarl-1.22.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e51ac5435758ba97ad69617e13233da53908beccc6cfcd6c34bbed8dcbede486", size = 339828, upload-time = "2025-10-06T14:12:00.686Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/97/aa6a143d3afba17b6465733681c70cf175af89f76ec8d9286e08437a7454/yarl-1.22.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:33e32a0dd0c8205efa8e83d04fc9f19313772b78522d1bdc7d9aed706bfd6138", size = 347551, upload-time = "2025-10-06T14:12:02.628Z" },
+    { url = "https://files.pythonhosted.org/packages/43/3c/45a2b6d80195959239a7b2a8810506d4eea5487dce61c2a3393e7fc3c52e/yarl-1.22.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:bf4a21e58b9cde0e401e683ebd00f6ed30a06d14e93f7c8fd059f8b6e8f87b6a", size = 334512, upload-time = "2025-10-06T14:12:04.871Z" },
+    { url = "https://files.pythonhosted.org/packages/86/a0/c2ab48d74599c7c84cb104ebd799c5813de252bea0f360ffc29d270c2caa/yarl-1.22.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:e4b582bab49ac33c8deb97e058cd67c2c50dac0dd134874106d9c774fd272529", size = 352400, upload-time = "2025-10-06T14:12:06.624Z" },
+    { url = "https://files.pythonhosted.org/packages/32/75/f8919b2eafc929567d3d8411f72bdb1a2109c01caaab4ebfa5f8ffadc15b/yarl-1.22.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:0b5bcc1a9c4839e7e30b7b30dd47fe5e7e44fb7054ec29b5bb8d526aa1041093", size = 357140, upload-time = "2025-10-06T14:12:08.362Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/72/6a85bba382f22cf78add705d8c3731748397d986e197e53ecc7835e76de7/yarl-1.22.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c0232bce2170103ec23c454e54a57008a9a72b5d1c3105dc2496750da8cfa47c", size = 341473, upload-time = "2025-10-06T14:12:10.994Z" },
+    { url = "https://files.pythonhosted.org/packages/35/18/55e6011f7c044dc80b98893060773cefcfdbf60dfefb8cb2f58b9bacbd83/yarl-1.22.0-cp314-cp314t-win32.whl", hash = "sha256:8009b3173bcd637be650922ac455946197d858b3630b6d8787aa9e5c4564533e", size = 89056, upload-time = "2025-10-06T14:12:13.317Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/86/0f0dccb6e59a9e7f122c5afd43568b1d31b8ab7dda5f1b01fb5c7025c9a9/yarl-1.22.0-cp314-cp314t-win_amd64.whl", hash = "sha256:9fb17ea16e972c63d25d4a97f016d235c78dd2344820eb35bc034bc32012ee27", size = 96292, upload-time = "2025-10-06T14:12:15.398Z" },
+    { url = "https://files.pythonhosted.org/packages/48/b7/503c98092fb3b344a179579f55814b613c1fbb1c23b3ec14a7b008a66a6e/yarl-1.22.0-cp314-cp314t-win_arm64.whl", hash = "sha256:9f6d73c1436b934e3f01df1e1b21ff765cd1d28c77dfb9ace207f746d4610ee1", size = 85171, upload-time = "2025-10-06T14:12:16.935Z" },
+    { url = "https://files.pythonhosted.org/packages/73/ae/b48f95715333080afb75a4504487cbe142cae1268afc482d06692d605ae6/yarl-1.22.0-py3-none-any.whl", hash = "sha256:1380560bdba02b6b6c90de54133c81c9f2a453dee9912fe58c1dcced1edb7cff", size = 46814, upload-time = "2025-10-06T14:12:53.872Z" },
+]
+
+[[package]]
+name = "zipp"
+version = "3.23.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e3/02/0f2892c661036d50ede074e376733dca2ae7c6eb617489437771209d4180/zipp-3.23.0.tar.gz", hash = "sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166", size = 25547, upload-time = "2025-06-08T17:06:39.4Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2e/54/647ade08bf0db230bfea292f893923872fd20be6ac6f53b2b936ba839d75/zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e", size = 10276, upload-time = "2025-06-08T17:06:38.034Z" },
+]

From 6e563134b2e31b4ad939929de5973eefec04bdc5 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 19 Oct 2025 15:25:40 +0200
Subject: [PATCH 137/147] Fix ruff errors

---
 PFERD/auth/__init__.py                     |  4 +-
 PFERD/auth/authenticator.py                |  3 +-
 PFERD/auth/credential_file.py              |  7 +-
 PFERD/auth/keyring.py                      |  4 +-
 PFERD/auth/pass_.py                        |  9 +--
 PFERD/auth/simple.py                       |  4 +-
 PFERD/auth/tfa.py                          |  4 +-
 PFERD/cli/parser.py                        |  9 ++-
 PFERD/config.py                            | 26 +++---
 PFERD/crawl/__init__.py                    |  6 +-
 PFERD/crawl/crawler.py                     | 14 ++--
 PFERD/crawl/http_crawler.py                |  8 +-
 PFERD/crawl/ilias/async_helper.py          |  7 +-
 PFERD/crawl/ilias/file_templates.py        |  6 +-
 PFERD/crawl/ilias/ilias_web_crawler.py     | 93 ++++++++++------------
 PFERD/crawl/ilias/kit_ilias_html.py        | 58 +++++++-------
 PFERD/crawl/ilias/kit_ilias_web_crawler.py |  4 +-
 PFERD/crawl/kit_ipd_crawler.py             | 16 ++--
 PFERD/deduplicator.py                      |  4 +-
 PFERD/limiter.py                           |  3 +-
 PFERD/logging.py                           |  5 +-
 PFERD/output_dir.py                        | 19 +++--
 PFERD/pferd.py                             | 27 +++----
 PFERD/report.py                            | 30 +++----
 PFERD/transformer.py                       | 26 +++---
 PFERD/utils.py                             |  7 +-
 26 files changed, 194 insertions(+), 209 deletions(-)

diff --git a/PFERD/auth/__init__.py b/PFERD/auth/__init__.py
index 80d4586..7295c7a 100644
--- a/PFERD/auth/__init__.py
+++ b/PFERD/auth/__init__.py
@@ -1,5 +1,5 @@
+from collections.abc import Callable
 from configparser import SectionProxy
-from typing import Callable, Dict
 
 from ..config import Config
 from .authenticator import Authenticator, AuthError, AuthLoadError, AuthSection  # noqa: F401
@@ -18,7 +18,7 @@ AuthConstructor = Callable[
     Authenticator,
 ]
 
-AUTHENTICATORS: Dict[str, AuthConstructor] = {
+AUTHENTICATORS: dict[str, AuthConstructor] = {
     "credential-file": lambda n, s, c: CredentialFileAuthenticator(n, CredentialFileAuthSection(s), c),
     "keyring": lambda n, s, c: KeyringAuthenticator(n, KeyringAuthSection(s)),
     "pass": lambda n, s, c: PassAuthenticator(n, PassAuthSection(s)),
diff --git a/PFERD/auth/authenticator.py b/PFERD/auth/authenticator.py
index 643a2d5..417b7ba 100644
--- a/PFERD/auth/authenticator.py
+++ b/PFERD/auth/authenticator.py
@@ -1,5 +1,4 @@
 from abc import ABC, abstractmethod
-from typing import Tuple
 
 from ..config import Section
 
@@ -35,7 +34,7 @@ class Authenticator(ABC):
         self.name = name
 
     @abstractmethod
-    async def credentials(self) -> Tuple[str, str]:
+    async def credentials(self) -> tuple[str, str]:
         pass
 
     async def username(self) -> str:
diff --git a/PFERD/auth/credential_file.py b/PFERD/auth/credential_file.py
index 94ffa73..cb7834c 100644
--- a/PFERD/auth/credential_file.py
+++ b/PFERD/auth/credential_file.py
@@ -1,5 +1,4 @@
 from pathlib import Path
-from typing import Tuple
 
 from ..config import Config
 from ..utils import fmt_real_path
@@ -23,7 +22,9 @@ class CredentialFileAuthenticator(Authenticator):
             with open(path, encoding="utf-8") as f:
                 lines = list(f)
         except UnicodeDecodeError:
-            raise AuthLoadError(f"Credential file at {fmt_real_path(path)} is not encoded using UTF-8")
+            raise AuthLoadError(
+                f"Credential file at {fmt_real_path(path)} is not encoded using UTF-8"
+            ) from None
         except OSError as e:
             raise AuthLoadError(f"No credential file at {fmt_real_path(path)}") from e
 
@@ -42,5 +43,5 @@ class CredentialFileAuthenticator(Authenticator):
         self._username = uline[9:]
         self._password = pline[9:]
 
-    async def credentials(self) -> Tuple[str, str]:
+    async def credentials(self) -> tuple[str, str]:
         return self._username, self._password
diff --git a/PFERD/auth/keyring.py b/PFERD/auth/keyring.py
index 7ff2673..e69a69e 100644
--- a/PFERD/auth/keyring.py
+++ b/PFERD/auth/keyring.py
@@ -1,4 +1,4 @@
-from typing import Optional, Tuple, cast
+from typing import Optional, cast
 
 import keyring
 
@@ -27,7 +27,7 @@ class KeyringAuthenticator(Authenticator):
         self._password_invalidated = False
         self._username_fixed = section.username() is not None
 
-    async def credentials(self) -> Tuple[str, str]:
+    async def credentials(self) -> tuple[str, str]:
         # Request the username
         if self._username is None:
             async with log.exclusive_output():
diff --git a/PFERD/auth/pass_.py b/PFERD/auth/pass_.py
index 4c8e775..c5d9b24 100644
--- a/PFERD/auth/pass_.py
+++ b/PFERD/auth/pass_.py
@@ -1,6 +1,5 @@
 import re
 import subprocess
-from typing import List, Tuple
 
 from ..logging import log
 from .authenticator import Authenticator, AuthError, AuthSection
@@ -12,11 +11,11 @@ class PassAuthSection(AuthSection):
             self.missing_value("passname")
         return value
 
-    def username_prefixes(self) -> List[str]:
+    def username_prefixes(self) -> list[str]:
         value = self.s.get("username_prefixes", "login,username,user")
         return [prefix.lower() for prefix in value.split(",")]
 
-    def password_prefixes(self) -> List[str]:
+    def password_prefixes(self) -> list[str]:
         value = self.s.get("password_prefixes", "password,pass,secret")
         return [prefix.lower() for prefix in value.split(",")]
 
@@ -31,14 +30,14 @@ class PassAuthenticator(Authenticator):
         self._username_prefixes = section.username_prefixes()
         self._password_prefixes = section.password_prefixes()
 
-    async def credentials(self) -> Tuple[str, str]:
+    async def credentials(self) -> tuple[str, str]:
         log.explain_topic("Obtaining credentials from pass")
 
         try:
             log.explain(f"Calling 'pass show {self._passname}'")
             result = subprocess.check_output(["pass", "show", self._passname], text=True)
         except subprocess.CalledProcessError as e:
-            raise AuthError(f"Failed to get password info from {self._passname}: {e}")
+            raise AuthError(f"Failed to get password info from {self._passname}: {e}") from e
 
         prefixed = {}
         unprefixed = []
diff --git a/PFERD/auth/simple.py b/PFERD/auth/simple.py
index 831c12f..dea4b67 100644
--- a/PFERD/auth/simple.py
+++ b/PFERD/auth/simple.py
@@ -1,4 +1,4 @@
-from typing import Optional, Tuple
+from typing import Optional
 
 from ..logging import log
 from ..utils import agetpass, ainput
@@ -23,7 +23,7 @@ class SimpleAuthenticator(Authenticator):
         self._username_fixed = self.username is not None
         self._password_fixed = self.password is not None
 
-    async def credentials(self) -> Tuple[str, str]:
+    async def credentials(self) -> tuple[str, str]:
         if self._username is not None and self._password is not None:
             return self._username, self._password
 
diff --git a/PFERD/auth/tfa.py b/PFERD/auth/tfa.py
index 26b1383..6ae48fe 100644
--- a/PFERD/auth/tfa.py
+++ b/PFERD/auth/tfa.py
@@ -1,5 +1,3 @@
-from typing import Tuple
-
 from ..logging import log
 from ..utils import ainput
 from .authenticator import Authenticator, AuthError
@@ -17,7 +15,7 @@ class TfaAuthenticator(Authenticator):
             code = await ainput("TFA code: ")
             return code
 
-    async def credentials(self) -> Tuple[str, str]:
+    async def credentials(self) -> tuple[str, str]:
         raise AuthError("TFA authenticator does not support usernames")
 
     def invalidate_username(self) -> None:
diff --git a/PFERD/cli/parser.py b/PFERD/cli/parser.py
index 12bfeac..c9bec13 100644
--- a/PFERD/cli/parser.py
+++ b/PFERD/cli/parser.py
@@ -1,8 +1,9 @@
 import argparse
 import configparser
 from argparse import ArgumentTypeError
+from collections.abc import Callable, Sequence
 from pathlib import Path
-from typing import Any, Callable, List, Optional, Sequence, Union
+from typing import Any, Optional
 
 from ..output_dir import OnConflict, Redownload
 from ..version import NAME, VERSION
@@ -16,7 +17,7 @@ class ParserLoadError(Exception):
 class BooleanOptionalAction(argparse.Action):
     def __init__(
         self,
-        option_strings: List[str],
+        option_strings: list[str],
         dest: Any,
         default: Any = None,
         type: Any = None,
@@ -51,7 +52,7 @@ class BooleanOptionalAction(argparse.Action):
         self,
         parser: argparse.ArgumentParser,
         namespace: argparse.Namespace,
-        values: Union[str, Sequence[Any], None],
+        values: str | Sequence[Any] | None,
         option_string: Optional[str] = None,
     ) -> None:
         if option_string and option_string in self.option_strings:
@@ -72,7 +73,7 @@ def show_value_error(inner: Callable[[str], Any]) -> Callable[[str], Any]:
         try:
             return inner(input)
         except ValueError as e:
-            raise ArgumentTypeError(e)
+            raise ArgumentTypeError(e) from e
 
     return wrapper
 
diff --git a/PFERD/config.py b/PFERD/config.py
index 1a0f017..7da2889 100644
--- a/PFERD/config.py
+++ b/PFERD/config.py
@@ -3,7 +3,7 @@ import os
 import sys
 from configparser import ConfigParser, SectionProxy
 from pathlib import Path
-from typing import Any, List, NoReturn, Optional, Tuple
+from typing import Any, NoReturn, Optional
 
 from rich.markup import escape
 
@@ -126,13 +126,13 @@ class Config:
             with open(path, encoding="utf-8") as f:
                 parser.read_file(f, source=str(path))
         except FileNotFoundError:
-            raise ConfigLoadError(path, "File does not exist")
+            raise ConfigLoadError(path, "File does not exist") from None
         except IsADirectoryError:
-            raise ConfigLoadError(path, "That's a directory, not a file")
+            raise ConfigLoadError(path, "That's a directory, not a file") from None
         except PermissionError:
-            raise ConfigLoadError(path, "Insufficient permissions")
+            raise ConfigLoadError(path, "Insufficient permissions") from None
         except UnicodeDecodeError:
-            raise ConfigLoadError(path, "File is not encoded using UTF-8")
+            raise ConfigLoadError(path, "File is not encoded using UTF-8") from None
 
     def dump(self, path: Optional[Path] = None) -> None:
         """
@@ -150,8 +150,8 @@ class Config:
 
         try:
             path.parent.mkdir(parents=True, exist_ok=True)
-        except PermissionError:
-            raise ConfigDumpError(path, "Could not create parent directory")
+        except PermissionError as e:
+            raise ConfigDumpError(path, "Could not create parent directory") from e
 
         try:
             # Ensuring we don't accidentally overwrite any existing files by
@@ -167,16 +167,16 @@ class Config:
                     with open(path, "w", encoding="utf-8") as f:
                         self._parser.write(f)
                 else:
-                    raise ConfigDumpError(path, "File already exists")
+                    raise ConfigDumpError(path, "File already exists") from None
         except IsADirectoryError:
-            raise ConfigDumpError(path, "That's a directory, not a file")
-        except PermissionError:
-            raise ConfigDumpError(path, "Insufficient permissions")
+            raise ConfigDumpError(path, "That's a directory, not a file") from None
+        except PermissionError as e:
+            raise ConfigDumpError(path, "Insufficient permissions") from e
 
     def dump_to_stdout(self) -> None:
         self._parser.write(sys.stdout)
 
-    def crawl_sections(self) -> List[Tuple[str, SectionProxy]]:
+    def crawl_sections(self) -> list[tuple[str, SectionProxy]]:
         result = []
         for name, proxy in self._parser.items():
             if name.startswith("crawl:"):
@@ -184,7 +184,7 @@ class Config:
 
         return result
 
-    def auth_sections(self) -> List[Tuple[str, SectionProxy]]:
+    def auth_sections(self) -> list[tuple[str, SectionProxy]]:
         result = []
         for name, proxy in self._parser.items():
             if name.startswith("auth:"):
diff --git a/PFERD/crawl/__init__.py b/PFERD/crawl/__init__.py
index 04a5e3f..6032c97 100644
--- a/PFERD/crawl/__init__.py
+++ b/PFERD/crawl/__init__.py
@@ -1,5 +1,5 @@
+from collections.abc import Callable
 from configparser import SectionProxy
-from typing import Callable, Dict
 
 from ..auth import Authenticator
 from ..config import Config
@@ -13,12 +13,12 @@ CrawlerConstructor = Callable[
         str,  # Name (without the "crawl:" prefix)
         SectionProxy,  # Crawler's section of global config
         Config,  # Global config
-        Dict[str, Authenticator],  # Loaded authenticators by name
+        dict[str, Authenticator],  # Loaded authenticators by name
     ],
     Crawler,
 ]
 
-CRAWLERS: Dict[str, CrawlerConstructor] = {
+CRAWLERS: dict[str, CrawlerConstructor] = {
     "local": lambda n, s, c, a: LocalCrawler(n, LocalCrawlerSection(s), c),
     "ilias-web": lambda n, s, c, a: IliasWebCrawler(n, IliasWebCrawlerSection(s), c, a),
     "kit-ilias-web": lambda n, s, c, a: KitIliasWebCrawler(n, KitIliasWebCrawlerSection(s), c, a),
diff --git a/PFERD/crawl/crawler.py b/PFERD/crawl/crawler.py
index f1aec5a..e2cdf30 100644
--- a/PFERD/crawl/crawler.py
+++ b/PFERD/crawl/crawler.py
@@ -1,10 +1,10 @@
 import asyncio
 import os
 from abc import ABC, abstractmethod
-from collections.abc import Awaitable, Coroutine
+from collections.abc import Awaitable, Callable, Coroutine, Sequence
 from datetime import datetime
 from pathlib import Path, PurePath
-from typing import Any, Callable, Dict, List, Optional, Sequence, Set, Tuple, TypeVar
+from typing import Any, Optional, TypeVar
 
 from ..auth import Authenticator
 from ..config import Config, Section
@@ -116,7 +116,7 @@ class CrawlToken(ReusableAsyncContextManager[ProgressBar]):
         return bar
 
 
-class DownloadToken(ReusableAsyncContextManager[Tuple[ProgressBar, FileSink]]):
+class DownloadToken(ReusableAsyncContextManager[tuple[ProgressBar, FileSink]]):
     def __init__(self, limiter: Limiter, fs_token: FileSinkToken, path: PurePath):
         super().__init__()
 
@@ -128,7 +128,7 @@ class DownloadToken(ReusableAsyncContextManager[Tuple[ProgressBar, FileSink]]):
     def path(self) -> PurePath:
         return self._path
 
-    async def _on_aenter(self) -> Tuple[ProgressBar, FileSink]:
+    async def _on_aenter(self) -> tuple[ProgressBar, FileSink]:
         await self._stack.enter_async_context(self._limiter.limit_download())
         sink = await self._stack.enter_async_context(self._fs_token)
         # The "Downloaded ..." message is printed in the output dir, not here
@@ -205,7 +205,7 @@ class CrawlerSection(Section):
         on_windows = os.name == "nt"
         return self.s.getboolean("windows_paths", fallback=on_windows)
 
-    def auth(self, authenticators: Dict[str, Authenticator]) -> Authenticator:
+    def auth(self, authenticators: dict[str, Authenticator]) -> Authenticator:
         value = self.s.get("auth")
         if value is None:
             self.missing_value("auth")
@@ -262,7 +262,7 @@ class Crawler(ABC):
         return self._output_dir
 
     @staticmethod
-    async def gather(awaitables: Sequence[Awaitable[Any]]) -> List[Any]:
+    async def gather(awaitables: Sequence[Awaitable[Any]]) -> list[Any]:
         """
         Similar to asyncio.gather. However, in the case of an exception, all
         still running tasks are cancelled and the exception is rethrown.
@@ -394,7 +394,7 @@ class Crawler(ABC):
             log.warn("Couldn't find or load old report")
             return
 
-        seen: Set[PurePath] = set()
+        seen: set[PurePath] = set()
         for known in sorted(self.prev_report.found_paths):
             looking_at = list(reversed(known.parents)) + [known]
             for path in looking_at:
diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index 572b39d..830f537 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -3,7 +3,7 @@ import http.cookies
 import ssl
 from datetime import datetime
 from pathlib import Path, PurePath
-from typing import Any, Dict, List, Optional, Tuple, cast
+from typing import Any, Optional, cast
 
 import aiohttp
 import certifi
@@ -43,7 +43,7 @@ class HttpCrawler(Crawler):
         self._http_timeout = section.http_timeout()
 
         self._cookie_jar_path = self._output_dir.resolve(self.COOKIE_FILE)
-        self._shared_cookie_jar_paths: Optional[List[Path]] = None
+        self._shared_cookie_jar_paths: Optional[list[Path]] = None
         self._shared_auth = shared_auth
 
         self._output_dir.register_reserved(self.COOKIE_FILE)
@@ -98,7 +98,7 @@ class HttpCrawler(Crawler):
         """
         raise RuntimeError("_authenticate() was called but crawler doesn't provide an implementation")
 
-    def share_cookies(self, shared: Dict[Authenticator, List[Path]]) -> None:
+    def share_cookies(self, shared: dict[Authenticator, list[Path]]) -> None:
         if not self._shared_auth:
             return
 
@@ -219,7 +219,7 @@ class HttpCrawler(Crawler):
         etags[str(path)] = etag
         self._output_dir.report.add_custom_value(ETAGS_CUSTOM_REPORT_VALUE_KEY, etags)
 
-    async def _request_resource_version(self, resource_url: str) -> Tuple[Optional[str], Optional[datetime]]:
+    async def _request_resource_version(self, resource_url: str) -> tuple[Optional[str], Optional[datetime]]:
         """
         Requests the ETag and Last-Modified headers of a resource via a HEAD request.
         If no entity tag / modification date can be obtained, the according value will be None.
diff --git a/PFERD/crawl/ilias/async_helper.py b/PFERD/crawl/ilias/async_helper.py
index 5e586b1..2e6b301 100644
--- a/PFERD/crawl/ilias/async_helper.py
+++ b/PFERD/crawl/ilias/async_helper.py
@@ -1,5 +1,6 @@
 import asyncio
-from typing import Any, Callable, Optional
+from collections.abc import Callable
+from typing import Any, Optional
 
 import aiohttp
 
@@ -15,9 +16,9 @@ def _iorepeat(attempts: int, name: str, failure_is_error: bool = False) -> Calla
                 try:
                     return await f(*args, **kwargs)
                 except aiohttp.ContentTypeError:  # invalid content type
-                    raise CrawlWarning("ILIAS returned an invalid content type")
+                    raise CrawlWarning("ILIAS returned an invalid content type") from None
                 except aiohttp.TooManyRedirects:
-                    raise CrawlWarning("Got stuck in a redirect loop")
+                    raise CrawlWarning("Got stuck in a redirect loop") from None
                 except aiohttp.ClientPayloadError as e:  # encoding or not enough bytes
                     last_exception = e
                 except aiohttp.ClientConnectionError as e:  # e.g. timeout, disconnect, resolve failed, etc.
diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index 814bb7b..37691b2 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -297,9 +297,7 @@ class Links(Enum):
         raise ValueError("Missing switch case")
 
     def collection_as_one(self) -> bool:
-        if self == Links.FANCY:
-            return True
-        return False
+        return self == Links.FANCY
 
     def extension(self) -> Optional[str]:
         if self == Links.FANCY:
@@ -355,4 +353,4 @@ class Links(Enum):
             return Links(string)
         except ValueError:
             options = [f"'{option.value}'" for option in Links]
-            raise ValueError(f"must be one of {', '.join(options)}")
+            raise ValueError(f"must be one of {', '.join(options)}") from None
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index e6929b5..b8212a4 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -4,7 +4,7 @@ import os
 import re
 from collections.abc import Awaitable, Coroutine
 from pathlib import PurePath
-from typing import Any, Dict, List, Literal, Optional, Set, Union, cast
+from typing import Any, Literal, Optional, cast
 from urllib.parse import urljoin
 
 import aiohttp
@@ -33,7 +33,7 @@ from .kit_ilias_html import (
 )
 from .shibboleth_login import ShibbolethLogin
 
-TargetType = Union[str, int]
+TargetType = str | int
 
 
 class LoginTypeLocal:
@@ -49,7 +49,7 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
 
         return base_url
 
-    def login(self) -> Union[Literal["shibboleth"], LoginTypeLocal]:
+    def login(self) -> Literal["shibboleth"] | LoginTypeLocal:
         login_type = self.s.get("login_type")
         if not login_type:
             self.missing_value("login_type")
@@ -63,7 +63,7 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
 
         self.invalid_value("login_type", login_type, "Should be <shibboleth | local>")
 
-    def tfa_auth(self, authenticators: Dict[str, Authenticator]) -> Optional[Authenticator]:
+    def tfa_auth(self, authenticators: dict[str, Authenticator]) -> Optional[Authenticator]:
         value: Optional[str] = self.s.get("tfa_auth")
         if value is None:
             return None
@@ -110,7 +110,7 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
         return self.s.getboolean("forums", fallback=False)
 
 
-_DIRECTORY_PAGES: Set[IliasElementType] = {
+_DIRECTORY_PAGES: set[IliasElementType] = {
     IliasElementType.EXERCISE,
     IliasElementType.EXERCISE_FILES,
     IliasElementType.EXERCISE_OVERVIEW,
@@ -122,7 +122,7 @@ _DIRECTORY_PAGES: Set[IliasElementType] = {
     IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
 }
 
-_VIDEO_ELEMENTS: Set[IliasElementType] = {
+_VIDEO_ELEMENTS: set[IliasElementType] = {
     IliasElementType.MEDIACAST_VIDEO,
     IliasElementType.MEDIACAST_VIDEO_FOLDER,
     IliasElementType.OPENCAST_VIDEO,
@@ -172,7 +172,7 @@ class IliasWebCrawler(HttpCrawler):
         name: str,
         section: IliasWebCrawlerSection,
         config: Config,
-        authenticators: Dict[str, Authenticator],
+        authenticators: dict[str, Authenticator],
     ):
         # Setting a main authenticator for cookie sharing
         auth = section.auth(authenticators)
@@ -201,7 +201,7 @@ instance's greatest bottleneck.
         self._links = section.links()
         self._videos = section.videos()
         self._forums = section.forums()
-        self._visited_urls: Dict[str, PurePath] = dict()
+        self._visited_urls: dict[str, PurePath] = dict()
 
     async def _run(self) -> None:
         if isinstance(self._target, int):
@@ -264,9 +264,9 @@ instance's greatest bottleneck.
         expected_course_id: Optional[int] = None,
         crawl_nested_courses: bool = False,
     ) -> None:
-        elements: List[IliasPageElement] = []
+        elements: list[IliasPageElement] = []
         # A list as variable redefinitions are not propagated to outer scopes
-        description: List[BeautifulSoup] = []
+        description: list[BeautifulSoup] = []
 
         @_iorepeat(3, "crawling folder")
         async def gather_elements() -> None:
@@ -309,7 +309,7 @@ instance's greatest bottleneck.
 
         elements.sort(key=lambda e: e.id())
 
-        tasks: List[Awaitable[None]] = []
+        tasks: list[Awaitable[None]] = []
         for element in elements:
             if handle := await self._handle_ilias_element(cl.path, element, crawl_nested_courses):
                 tasks.append(asyncio.create_task(handle))
@@ -340,15 +340,14 @@ instance's greatest bottleneck.
             )
             return None
 
-        if element.type in _VIDEO_ELEMENTS:
-            if not self._videos:
-                log.status(
-                    "[bold bright_black]",
-                    "Ignored",
-                    fmt_path(element_path),
-                    "[bright_black](enable with option 'videos')",
-                )
-                return None
+        if element.type in _VIDEO_ELEMENTS and not self._videos:
+            log.status(
+                "[bold bright_black]",
+                "Ignored",
+                fmt_path(element_path),
+                "[bright_black](enable with option 'videos')",
+            )
+            return None
 
         if element.type == IliasElementType.FILE:
             return await self._handle_file(element, element_path)
@@ -522,8 +521,8 @@ instance's greatest bottleneck.
             sink.file.write(rendered.encode("utf-8"))
             sink.done()
 
-    async def _resolve_link_target(self, export_url: str) -> Union[BeautifulSoup, Literal["none"]]:
-        async def impl() -> Optional[Union[BeautifulSoup, Literal["none"]]]:
+    async def _resolve_link_target(self, export_url: str) -> BeautifulSoup | Literal["none"]:
+        async def impl() -> Optional[BeautifulSoup | Literal["none"]]:
             async with self.session.get(export_url, allow_redirects=False) as resp:
                 # No redirect means we were authenticated
                 if hdrs.LOCATION not in resp.headers:
@@ -658,7 +657,7 @@ instance's greatest bottleneck.
 
     def _previous_contained_opencast_videos(
         self, element: IliasPageElement, element_path: PurePath
-    ) -> List[PurePath]:
+    ) -> list[PurePath]:
         if not self.prev_report:
             return []
         custom_value = self.prev_report.get_custom_value(_get_video_cache_key(element))
@@ -714,7 +713,7 @@ instance's greatest bottleneck.
                 add_to_report([str(self._transformer.transform(dl.path))])
                 return
 
-        contained_video_paths: List[str] = []
+        contained_video_paths: list[str] = []
 
         for stream_element in stream_elements:
             video_path = dl.path.parent / stream_element.name
@@ -832,7 +831,7 @@ instance's greatest bottleneck.
 
             elements = parse_ilias_forum_export(soupify(export))
 
-        tasks: List[Awaitable[None]] = []
+        tasks: list[Awaitable[None]] = []
         for thread in elements:
             tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, thread, element.url)))
 
@@ -842,7 +841,7 @@ instance's greatest bottleneck.
     @anoncritical
     @_iorepeat(3, "saving forum thread")
     async def _download_forum_thread(
-        self, parent_path: PurePath, thread: Union[IliasForumThread, IliasPageElement], forum_url: str
+        self, parent_path: PurePath, thread: IliasForumThread | IliasPageElement, forum_url: str
     ) -> None:
         path = parent_path / (_sanitize_path_name(thread.name) + ".html")
         maybe_dl = await self.download(path, mtime=thread.mtime)
@@ -871,7 +870,7 @@ instance's greatest bottleneck.
     @_iorepeat(3, "crawling learning module")
     @anoncritical
     async def _crawl_learning_module(self, element: IliasPageElement, cl: CrawlToken) -> None:
-        elements: List[IliasLearningModulePage] = []
+        elements: list[IliasLearningModulePage] = []
 
         async with cl:
             log.explain_topic(f"Parsing initial HTML page for {fmt_path(cl.path)}")
@@ -891,7 +890,7 @@ instance's greatest bottleneck.
         for index, lm_element in enumerate(elements):
             lm_element.title = f"{index:02}_{lm_element.title}"
 
-        tasks: List[Awaitable[None]] = []
+        tasks: list[Awaitable[None]] = []
         for index, elem in enumerate(elements):
             prev_url = elements[index - 1].title if index > 0 else None
             next_url = elements[index + 1].title if index < len(elements) - 1 else None
@@ -906,10 +905,10 @@ instance's greatest bottleneck.
         self,
         path: PurePath,
         start_url: Optional[str],
-        dir: Union[Literal["left"], Literal["right"]],
+        dir: Literal["left"] | Literal["right"],
         parent_element: IliasPageElement,
-    ) -> List[IliasLearningModulePage]:
-        elements: List[IliasLearningModulePage] = []
+    ) -> list[IliasLearningModulePage]:
+        elements: list[IliasLearningModulePage] = []
 
         if not start_url:
             return elements
@@ -923,10 +922,7 @@ instance's greatest bottleneck.
             page = IliasPage(soup, parent_element)
             if next := page.get_learning_module_data():
                 elements.append(next)
-                if dir == "left":
-                    next_element_url = next.previous_url
-                else:
-                    next_element_url = next.next_url
+                next_element_url = next.previous_url if dir == "left" else next.next_url
             counter += 1
 
         return elements
@@ -950,16 +946,10 @@ instance's greatest bottleneck.
 
         if prev:
             prev_p = self._transformer.transform(parent_path / (_sanitize_path_name(prev) + ".html"))
-            if prev_p:
-                prev = cast(str, os.path.relpath(prev_p, my_path.parent))
-            else:
-                prev = None
+            prev = cast(str, os.path.relpath(prev_p, my_path.parent)) if prev_p else None
         if next:
             next_p = self._transformer.transform(parent_path / (_sanitize_path_name(next) + ".html"))
-            if next_p:
-                next = cast(str, os.path.relpath(next_p, my_path.parent))
-            else:
-                next = None
+            next = cast(str, os.path.relpath(next_p, my_path.parent)) if next_p else None
 
         async with maybe_dl as (bar, sink):
             content = element.content
@@ -973,14 +963,13 @@ instance's greatest bottleneck.
         """
         log.explain_topic("Internalizing images")
         for elem in tag.find_all(recursive=True):
-            if elem.name == "img":
-                if src := elem.attrs.get("src", None):
-                    url = urljoin(self._base_url, cast(str, src))
-                    if not url.startswith(self._base_url):
-                        continue
-                    log.explain(f"Internalizing {url!r}")
-                    img = await self._get_authenticated(url)
-                    elem.attrs["src"] = "data:;base64," + base64.b64encode(img).decode()
+            if elem.name == "img" and (src := elem.attrs.get("src", None)):
+                url = urljoin(self._base_url, cast(str, src))
+                if not url.startswith(self._base_url):
+                    continue
+                log.explain(f"Internalizing {url!r}")
+                img = await self._get_authenticated(url)
+                elem.attrs["src"] = "data:;base64," + base64.b64encode(img).decode()
             if elem.name == "iframe" and cast(str, elem.attrs.get("src", "")).startswith("//"):
                 # For unknown reasons the protocol seems to be stripped.
                 elem.attrs["src"] = "https:" + cast(str, elem.attrs["src"])
@@ -1025,7 +1014,7 @@ instance's greatest bottleneck.
             )
         return soup
 
-    async def _post(self, url: str, data: dict[str, Union[str, List[str]]]) -> bytes:
+    async def _post(self, url: str, data: dict[str, str | list[str]]) -> bytes:
         form_data = aiohttp.FormData()
         for key, val in data.items():
             form_data.add_field(key, val)
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index 4abb350..d7f6f8d 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -1,9 +1,10 @@
 import json
 import re
+from collections.abc import Callable
 from dataclasses import dataclass
 from datetime import date, datetime, timedelta
 from enum import Enum
-from typing import Callable, Dict, Optional, Union, cast
+from typing import Optional, cast
 from urllib.parse import urljoin, urlparse
 
 from bs4 import BeautifulSoup, Tag
@@ -13,7 +14,7 @@ from PFERD.crawl.crawler import CrawlWarning
 from PFERD.logging import log
 from PFERD.utils import url_set_query_params
 
-TargetType = Union[str, int]
+TargetType = str | int
 
 
 class TypeMatcher:
@@ -308,7 +309,7 @@ class IliasPageElement:
         """
 
         # This checks whether we can reach a `:` without passing a `-`
-        if re.search(r"^[^-]+: ", meeting_name):
+        if re.search(r"^[^-]+: ", meeting_name):  # noqa: SIM108
             # Meeting name only contains date: "05. Jan 2000:"
             split_delimiter = ":"
         else:
@@ -331,7 +332,7 @@ class IliasPageElement:
 @dataclass
 class IliasDownloadForumData:
     url: str
-    form_data: Dict[str, Union[str, list[str]]]
+    form_data: dict[str, str | list[str]]
     empty: bool
 
 
@@ -433,21 +434,20 @@ class IliasPage:
         for p in paragraphs:
             if p.find_parent(class_=is_interesting_class):
                 continue
-            if "ilc_media_cont_MediaContainer" in p["class"]:
+            if "ilc_media_cont_MediaContainer" in p["class"] and (video := p.select_one("video")):
                 # We have an embedded video which should be downloaded by _find_mob_videos
-                if video := p.select_one("video"):
-                    url, title = self._find_mob_video_url_title(video, p)
-                    raw_html += '<div style="min-width: 100px; min-height: 100px; border: 1px solid black;'
-                    raw_html += "display: flex; justify-content: center; align-items: center;"
-                    raw_html += ' margin: 0.5rem;">'
-                    if url is not None and urlparse(url).hostname != urlparse(self._page_url).hostname:
-                        if url.startswith("//"):
-                            url = "https:" + url
-                        raw_html += f'<a href="{url}" target="_blank">External Video: {title}</a>'
-                    else:
-                        raw_html += f"Video elided. Filename: '{title}'."
-                    raw_html += "</div>\n"
-                    continue
+                url, title = self._find_mob_video_url_title(video, p)
+                raw_html += '<div style="min-width: 100px; min-height: 100px; border: 1px solid black;'
+                raw_html += "display: flex; justify-content: center; align-items: center;"
+                raw_html += ' margin: 0.5rem;">'
+                if url is not None and urlparse(url).hostname != urlparse(self._page_url).hostname:
+                    if url.startswith("//"):
+                        url = "https:" + url
+                    raw_html += f'<a href="{url}" target="_blank">External Video: {title}</a>'
+                else:
+                    raw_html += f"Video elided. Filename: '{title}'."
+                raw_html += "</div>\n"
+                continue
 
             # Ignore special listings (like folder groupings)
             if "ilc_section_Special" in p["class"]:
@@ -794,7 +794,7 @@ class IliasPage:
 
         is_paginated = self._soup.find(id=re.compile(r"tab_page_sel.+")) is not None
 
-        if is_paginated and not self._page_type == IliasElementType.OPENCAST_VIDEO_FOLDER:
+        if is_paginated and self._page_type != IliasElementType.OPENCAST_VIDEO_FOLDER:
             # We are in stage 2 - try to break pagination
             return self._find_opencast_video_entries_paginated()
 
@@ -1164,6 +1164,9 @@ class IliasPage:
         """
         found_titles = []
 
+        if None == "hey":
+            pass
+
         outer_accordion_content: Optional[Tag] = None
 
         parents: list[Tag] = list(tag.parents)
@@ -1302,10 +1305,7 @@ class IliasPage:
                 ),
             )
             caption_container = caption_parent.find_next_sibling("div")
-            if caption_container:
-                description = caption_container.get_text().strip()
-            else:
-                description = None
+            description = caption_container.get_text().strip() if caption_container else None
 
             if not typ:
                 _unexpected_html_warning()
@@ -1444,9 +1444,7 @@ class IliasPage:
             return True
         # The individual video player wrapper page has nothing of the above.
         # Match it by its playerContainer.
-        if soup.select_one("#playerContainer") is not None:
-            return True
-        return False
+        return soup.select_one("#playerContainer") is not None
 
     @staticmethod
     def _find_date_in_text(text: str) -> Optional[datetime]:
@@ -1505,11 +1503,11 @@ def demangle_date(date_str: str, fail_silently: bool = False) -> Optional[dateti
         # Normalize whitespace because users
         date_str = re.sub(r"\s+", " ", date_str)
 
-        date_str = re.sub("Gestern|Yesterday", _format_date_english(_yesterday()), date_str, re.I)
-        date_str = re.sub("Heute|Today", _format_date_english(date.today()), date_str, re.I)
-        date_str = re.sub("Morgen|Tomorrow", _format_date_english(_tomorrow()), date_str, re.I)
+        date_str = re.sub("Gestern|Yesterday", _format_date_english(_yesterday()), date_str, flags=re.I)
+        date_str = re.sub("Heute|Today", _format_date_english(date.today()), date_str, flags=re.I)
+        date_str = re.sub("Morgen|Tomorrow", _format_date_english(_tomorrow()), date_str, flags=re.I)
         date_str = date_str.strip()
-        for german, english in zip(german_months, english_months):
+        for german, english in zip(german_months, english_months, strict=True):
             date_str = date_str.replace(german, english)
             # Remove trailing dots for abbreviations, e.g. "20. Apr. 2020" -> "20. Apr 2020"
             date_str = date_str.replace(english + ".", english)
diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
index fc1d58f..5088e01 100644
--- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py
@@ -1,4 +1,4 @@
-from typing import Dict, Literal
+from typing import Literal
 
 from ...auth import Authenticator
 from ...config import Config
@@ -26,7 +26,7 @@ class KitIliasWebCrawler(IliasWebCrawler):
         name: str,
         section: KitIliasWebCrawlerSection,
         config: Config,
-        authenticators: Dict[str, Authenticator],
+        authenticators: dict[str, Authenticator],
     ):
         super().__init__(name, section, config, authenticators)
 
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index f47c969..165a661 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -1,9 +1,11 @@
 import os
 import re
+from collections.abc import Awaitable, Generator, Iterable
 from dataclasses import dataclass
 from datetime import datetime
 from pathlib import PurePath
-from typing import Any, Awaitable, Generator, Iterable, List, Optional, Pattern, Tuple, Union, cast
+from re import Pattern
+from typing import Any, Optional, Union, cast
 from urllib.parse import urljoin
 
 from bs4 import BeautifulSoup, Tag
@@ -44,7 +46,7 @@ class KitIpdFile:
 @dataclass
 class KitIpdFolder:
     name: str
-    entries: List[Union[KitIpdFile, "KitIpdFolder"]]
+    entries: list[Union[KitIpdFile, "KitIpdFolder"]]
 
     def explain(self) -> None:
         log.explain_topic(f"Folder {self.name!r}")
@@ -68,7 +70,7 @@ class KitIpdCrawler(HttpCrawler):
         if not maybe_cl:
             return
 
-        tasks: List[Awaitable[None]] = []
+        tasks: list[Awaitable[None]] = []
 
         async with maybe_cl:
             for item in await self._fetch_items():
@@ -120,9 +122,9 @@ class KitIpdCrawler(HttpCrawler):
         async with maybe_dl as (bar, sink):
             await self._stream_from_url(file.url, element_path, sink, bar)
 
-    async def _fetch_items(self) -> Iterable[Union[KitIpdFile, KitIpdFolder]]:
+    async def _fetch_items(self) -> Iterable[KitIpdFile | KitIpdFolder]:
         page, url = await self.get_page()
-        elements: List[Tag] = self._find_file_links(page)
+        elements: list[Tag] = self._find_file_links(page)
 
         # do not add unnecessary nesting for a single <h1> heading
         drop_h1: bool = len(page.find_all(name="h1")) <= 1
@@ -151,7 +153,7 @@ class KitIpdCrawler(HttpCrawler):
         name = os.path.basename(url)
         return KitIpdFile(name, url)
 
-    def _find_file_links(self, tag: Union[Tag, BeautifulSoup]) -> list[Tag]:
+    def _find_file_links(self, tag: Tag | BeautifulSoup) -> list[Tag]:
         return cast(list[Tag], tag.find_all(name="a", attrs={"href": self._file_regex}))
 
     def _abs_url_from_link(self, url: str, link_tag: Tag) -> str:
@@ -172,7 +174,7 @@ class KitIpdCrawler(HttpCrawler):
 
             self._add_etag_to_report(path, resp.headers.get("ETag"))
 
-    async def get_page(self) -> Tuple[BeautifulSoup, str]:
+    async def get_page(self) -> tuple[BeautifulSoup, str]:
         async with self.session.get(self._url) as request:
             # The web page for Algorithmen für Routenplanung contains some
             # weird comments that beautifulsoup doesn't parse correctly. This
diff --git a/PFERD/deduplicator.py b/PFERD/deduplicator.py
index c204726..18940c5 100644
--- a/PFERD/deduplicator.py
+++ b/PFERD/deduplicator.py
@@ -1,5 +1,5 @@
+from collections.abc import Iterator
 from pathlib import PurePath
-from typing import Iterator, Set
 
 from .logging import log
 from .utils import fmt_path
@@ -43,7 +43,7 @@ class Deduplicator:
     def __init__(self, windows_paths: bool) -> None:
         self._windows_paths = windows_paths
 
-        self._known: Set[PurePath] = set()
+        self._known: set[PurePath] = set()
 
     def _add(self, path: PurePath) -> None:
         self._known.add(path)
diff --git a/PFERD/limiter.py b/PFERD/limiter.py
index 49de0ed..01b4914 100644
--- a/PFERD/limiter.py
+++ b/PFERD/limiter.py
@@ -1,8 +1,9 @@
 import asyncio
 import time
+from collections.abc import AsyncIterator
 from contextlib import asynccontextmanager
 from dataclasses import dataclass
-from typing import AsyncIterator, Optional
+from typing import Optional
 
 
 @dataclass
diff --git a/PFERD/logging.py b/PFERD/logging.py
index e371494..a810aa9 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -1,8 +1,9 @@
 import asyncio
 import sys
 import traceback
+from collections.abc import AsyncIterator, Iterator
 from contextlib import AbstractContextManager, asynccontextmanager, contextmanager
-from typing import AsyncIterator, Iterator, List, Optional
+from typing import Optional
 
 from rich.console import Console, Group
 from rich.live import Live
@@ -60,7 +61,7 @@ class Log:
         self._showing_progress = False
         self._progress_suspended = False
         self._lock = asyncio.Lock()
-        self._lines: List[str] = []
+        self._lines: list[str] = []
 
         # Whether different parts of the output are enabled or disabled
         self.output_explain = False
diff --git a/PFERD/output_dir.py b/PFERD/output_dir.py
index c452c0f..159e1db 100644
--- a/PFERD/output_dir.py
+++ b/PFERD/output_dir.py
@@ -4,12 +4,13 @@ import os
 import random
 import shutil
 import string
-from contextlib import contextmanager
+from collections.abc import Iterator
+from contextlib import contextmanager, suppress
 from dataclasses import dataclass
 from datetime import datetime
 from enum import Enum
 from pathlib import Path, PurePath
-from typing import BinaryIO, Iterator, Optional, Tuple
+from typing import BinaryIO, Optional
 
 from .logging import log
 from .report import Report, ReportLoadError
@@ -35,7 +36,7 @@ class Redownload(Enum):
         try:
             return Redownload(string)
         except ValueError:
-            raise ValueError("must be one of 'never', 'never-smart', 'always', 'always-smart'")
+            raise ValueError("must be one of 'never', 'never-smart', 'always', 'always-smart'") from None
 
 
 class OnConflict(Enum):
@@ -53,7 +54,7 @@ class OnConflict(Enum):
             raise ValueError(
                 "must be one of 'prompt', 'local-first',"
                 " 'remote-first', 'no-delete', 'no-delete-prompt-overwrite'"
-            )
+            ) from None
 
 
 @dataclass
@@ -177,8 +178,8 @@ class OutputDirectory:
 
         try:
             self._root.mkdir(parents=True, exist_ok=True)
-        except OSError:
-            raise OutputDirError("Failed to create base directory")
+        except OSError as e:
+            raise OutputDirError("Failed to create base directory") from e
 
     def register_reserved(self, path: PurePath) -> None:
         self._report.mark_reserved(path)
@@ -358,7 +359,7 @@ class OutputDirectory:
     async def _create_tmp_file(
         self,
         local_path: Path,
-    ) -> Tuple[Path, BinaryIO]:
+    ) -> tuple[Path, BinaryIO]:
         """
         May raise an OutputDirError.
         """
@@ -509,10 +510,8 @@ class OutputDirectory:
             await self._cleanup(child, pure_child)
 
         if delete_self:
-            try:
+            with suppress(OSError):
                 path.rmdir()
-            except OSError:
-                pass
 
     async def _cleanup_file(self, path: Path, pure: PurePath) -> None:
         if self._report.is_marked(pure):
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index c805c13..1fe37d0 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -1,5 +1,5 @@
 from pathlib import Path, PurePath
-from typing import Dict, List, Optional
+from typing import Optional
 
 from rich.markup import escape
 
@@ -15,7 +15,7 @@ class PferdLoadError(Exception):
 
 
 class Pferd:
-    def __init__(self, config: Config, cli_crawlers: Optional[List[str]], cli_skips: Optional[List[str]]):
+    def __init__(self, config: Config, cli_crawlers: Optional[list[str]], cli_skips: Optional[list[str]]):
         """
         May throw PferdLoadError.
         """
@@ -23,10 +23,10 @@ class Pferd:
         self._config = config
         self._crawlers_to_run = self._find_crawlers_to_run(config, cli_crawlers, cli_skips)
 
-        self._authenticators: Dict[str, Authenticator] = {}
-        self._crawlers: Dict[str, Crawler] = {}
+        self._authenticators: dict[str, Authenticator] = {}
+        self._crawlers: dict[str, Crawler] = {}
 
-    def _find_config_crawlers(self, config: Config) -> List[str]:
+    def _find_config_crawlers(self, config: Config) -> list[str]:
         crawl_sections = []
 
         for name, section in config.crawl_sections():
@@ -37,7 +37,7 @@ class Pferd:
 
         return crawl_sections
 
-    def _find_cli_crawlers(self, config: Config, cli_crawlers: List[str]) -> List[str]:
+    def _find_cli_crawlers(self, config: Config, cli_crawlers: list[str]) -> list[str]:
         if len(cli_crawlers) != len(set(cli_crawlers)):
             raise PferdLoadError("Some crawlers were selected multiple times")
 
@@ -68,12 +68,12 @@ class Pferd:
     def _find_crawlers_to_run(
         self,
         config: Config,
-        cli_crawlers: Optional[List[str]],
-        cli_skips: Optional[List[str]],
-    ) -> List[str]:
+        cli_crawlers: Optional[list[str]],
+        cli_skips: Optional[list[str]],
+    ) -> list[str]:
         log.explain_topic("Deciding which crawlers to run")
 
-        crawlers: List[str]
+        crawlers: list[str]
         if cli_crawlers is None:
             log.explain("No crawlers specified on CLI")
             log.explain("Running crawlers specified in config")
@@ -104,7 +104,7 @@ class Pferd:
 
     def _load_crawlers(self) -> None:
         # Cookie sharing
-        kit_ilias_web_paths: Dict[Authenticator, List[Path]] = {}
+        kit_ilias_web_paths: dict[Authenticator, list[Path]] = {}
 
         for name, section in self._config.crawl_sections():
             log.print(f"[bold bright_cyan]Loading[/] {escape(name)}")
@@ -117,9 +117,8 @@ class Pferd:
             crawler = crawler_constructor(name, section, self._config, self._authenticators)
             self._crawlers[name] = crawler
 
-            if self._config.default_section.share_cookies():
-                if isinstance(crawler, KitIliasWebCrawler):
-                    crawler.share_cookies(kit_ilias_web_paths)
+            if self._config.default_section.share_cookies() and isinstance(crawler, KitIliasWebCrawler):
+                crawler.share_cookies(kit_ilias_web_paths)
 
     def debug_transforms(self) -> None:
         for name in self._crawlers_to_run:
diff --git a/PFERD/report.py b/PFERD/report.py
index 72e2727..5b37c1c 100644
--- a/PFERD/report.py
+++ b/PFERD/report.py
@@ -1,6 +1,6 @@
 import json
 from pathlib import Path, PurePath
-from typing import Any, Dict, List, Optional, Set
+from typing import Any, Optional
 
 
 class ReportLoadError(Exception):
@@ -42,32 +42,32 @@ class Report:
 
     def __init__(self) -> None:
         # Paths found by the crawler, untransformed
-        self.found_paths: Set[PurePath] = set()
+        self.found_paths: set[PurePath] = set()
 
         # Files reserved for metadata files (e. g. the report file or cookies)
         # that can't be overwritten by user transforms and won't be cleaned up
         # at the end.
-        self.reserved_files: Set[PurePath] = set()
+        self.reserved_files: set[PurePath] = set()
 
         # Files found by the crawler, transformed. Only includes files that
         # were downloaded (or a download was attempted)
-        self.known_files: Set[PurePath] = set()
+        self.known_files: set[PurePath] = set()
 
-        self.added_files: Set[PurePath] = set()
-        self.changed_files: Set[PurePath] = set()
-        self.deleted_files: Set[PurePath] = set()
+        self.added_files: set[PurePath] = set()
+        self.changed_files: set[PurePath] = set()
+        self.deleted_files: set[PurePath] = set()
         # Files that should have been deleted by the cleanup but weren't
-        self.not_deleted_files: Set[PurePath] = set()
+        self.not_deleted_files: set[PurePath] = set()
 
         # Custom crawler-specific data
-        self.custom: Dict[str, Any] = dict()
+        self.custom: dict[str, Any] = dict()
 
         # Encountered errors and warnings
-        self.encountered_warnings: List[str] = []
-        self.encountered_errors: List[str] = []
+        self.encountered_warnings: list[str] = []
+        self.encountered_errors: list[str] = []
 
     @staticmethod
-    def _get_list_of_strs(data: Dict[str, Any], key: str) -> List[str]:
+    def _get_list_of_strs(data: dict[str, Any], key: str) -> list[str]:
         result: Any = data.get(key, [])
 
         if not isinstance(result, list):
@@ -80,8 +80,8 @@ class Report:
         return result
 
     @staticmethod
-    def _get_str_dictionary(data: Dict[str, Any], key: str) -> Dict[str, Any]:
-        result: Dict[str, Any] = data.get(key, {})
+    def _get_str_dictionary(data: dict[str, Any], key: str) -> dict[str, Any]:
+        result: dict[str, Any] = data.get(key, {})
 
         if not isinstance(result, dict):
             raise ReportLoadError(f"Incorrect format: {key!r} is not a dictionary")
@@ -170,7 +170,7 @@ class Report:
         self.known_files.add(path)
 
     @property
-    def marked(self) -> Set[PurePath]:
+    def marked(self) -> set[PurePath]:
         return self.known_files | self.reserved_files
 
     def is_marked(self, path: PurePath) -> bool:
diff --git a/PFERD/transformer.py b/PFERD/transformer.py
index 96b5ca7..2cfb28d 100644
--- a/PFERD/transformer.py
+++ b/PFERD/transformer.py
@@ -1,10 +1,12 @@
 import ast
+import contextlib
 import re
 from abc import ABC, abstractmethod
+from collections.abc import Callable, Sequence
 from dataclasses import dataclass
 from enum import Enum
 from pathlib import PurePath
-from typing import Callable, Dict, List, Optional, Sequence, TypeVar, Union
+from typing import Optional, TypeVar
 
 from .logging import log
 from .utils import fmt_path, str_path
@@ -23,7 +25,7 @@ class Empty:
     pass
 
 
-RightSide = Union[str, Ignore, Empty]
+RightSide = str | Ignore | Empty
 
 
 @dataclass
@@ -35,7 +37,7 @@ class Ignored:
     pass
 
 
-TransformResult = Optional[Union[Transformed, Ignored]]
+TransformResult = Transformed | Ignored | None
 
 
 @dataclass
@@ -47,7 +49,7 @@ class Rule:
     right: RightSide
     right_index: int
 
-    def right_result(self, path: PurePath) -> Union[str, Transformed, Ignored]:
+    def right_result(self, path: PurePath) -> str | Transformed | Ignored:
         if isinstance(self.right, str):
             return self.right
         elif isinstance(self.right, Ignore):
@@ -93,24 +95,20 @@ class ExactReTf(Transformation):
         # since elements of "match.groups()" can be None, mypy is wrong.
         groups: Sequence[Optional[str]] = [match[0]] + list(match.groups())
 
-        locals_dir: Dict[str, Union[str, int, float]] = {}
+        locals_dir: dict[str, str | int | float] = {}
         for i, group in enumerate(groups):
             if group is None:
                 continue
 
             locals_dir[f"g{i}"] = group
 
-            try:
+            with contextlib.suppress(ValueError):
                 locals_dir[f"i{i}"] = int(group)
-            except ValueError:
-                pass
 
-            try:
+            with contextlib.suppress(ValueError):
                 locals_dir[f"f{i}"] = float(group)
-            except ValueError:
-                pass
 
-        named_groups: Dict[str, str] = match.groupdict()
+        named_groups: dict[str, str] = match.groupdict()
         for name, capture in named_groups.items():
             locals_dir[name] = capture
 
@@ -228,7 +226,7 @@ class Line:
         self.expect(string)
         return value
 
-    def one_of(self, parsers: List[Callable[[], T]], description: str) -> T:
+    def one_of(self, parsers: list[Callable[[], T]], description: str) -> T:
         for parser in parsers:
             index = self.index
             try:
@@ -315,7 +313,7 @@ def parse_left(line: Line) -> str:
         return parse_str(line)
 
 
-def parse_right(line: Line) -> Union[str, Ignore]:
+def parse_right(line: Line) -> str | Ignore:
     c = line.peek()
     if c in QUOTATION_MARKS:
         return parse_quoted_str(line)
diff --git a/PFERD/utils.py b/PFERD/utils.py
index acd282e..2d01713 100644
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -3,10 +3,11 @@ import getpass
 import sys
 import threading
 from abc import ABC, abstractmethod
+from collections.abc import Callable
 from contextlib import AsyncExitStack
 from pathlib import Path, PurePath
 from types import TracebackType
-from typing import Any, Callable, Dict, Generic, Optional, Type, TypeVar
+from typing import Any, Generic, Optional, TypeVar
 from urllib.parse import parse_qs, urlencode, urlsplit, urlunsplit
 
 import bs4
@@ -79,7 +80,7 @@ def url_set_query_param(url: str, param: str, value: str) -> str:
     return urlunsplit((scheme, netloc, path, new_query_string, fragment))
 
 
-def url_set_query_params(url: str, params: Dict[str, str]) -> str:
+def url_set_query_params(url: str, params: dict[str, str]) -> str:
     """
     Sets multiple query parameters in an url, overwriting existing ones.
     """
@@ -132,7 +133,7 @@ class ReusableAsyncContextManager(ABC, Generic[T]):
 
     async def __aexit__(
         self,
-        exc_type: Optional[Type[BaseException]],
+        exc_type: Optional[type[BaseException]],
         exc_value: Optional[BaseException],
         traceback: Optional[TracebackType],
     ) -> Optional[bool]:

From 5646e933fdb66d0de531d0f0aa725b977ac13294 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 19 Oct 2025 15:46:04 +0200
Subject: [PATCH 138/147] Ignore reformat in git blame

---
 .git-blame-ignore-revs | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 .git-blame-ignore-revs

diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
new file mode 100644
index 0000000..27246bf
--- /dev/null
+++ b/.git-blame-ignore-revs
@@ -0,0 +1 @@
+2cf0e060ed126537dd993896b6aa793e2a6b9e80

From ebcfb2a2f360c1c265b78bd7562a4ab6fa6a40ad Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 19 Oct 2025 15:59:08 +0200
Subject: [PATCH 139/147] Fix some typing errors

It seems like the type hints have gotten better :)
---
 PFERD/auth/keyring.py                   |  4 +-
 PFERD/crawl/http_crawler.py             |  4 +-
 PFERD/crawl/ilias/file_templates.py     |  8 +--
 PFERD/crawl/ilias/ilias_html_cleaner.py |  2 +-
 PFERD/crawl/ilias/ilias_web_crawler.py  | 10 ++--
 PFERD/crawl/ilias/kit_ilias_html.py     | 73 +++++++++----------------
 PFERD/crawl/ilias/shibboleth_login.py   |  6 +-
 PFERD/logging.py                        |  4 +-
 PFERD/pferd.py                          |  5 +-
 PFERD/utils.py                          |  6 +-
 10 files changed, 51 insertions(+), 71 deletions(-)

diff --git a/PFERD/auth/keyring.py b/PFERD/auth/keyring.py
index e69a69e..414640a 100644
--- a/PFERD/auth/keyring.py
+++ b/PFERD/auth/keyring.py
@@ -1,4 +1,4 @@
-from typing import Optional, cast
+from typing import Optional
 
 import keyring
 
@@ -13,7 +13,7 @@ class KeyringAuthSection(AuthSection):
         return self.s.get("username")
 
     def keyring_name(self) -> str:
-        return cast(str, self.s.get("keyring_name", fallback=NAME))
+        return self.s.get("keyring_name", fallback=NAME)
 
 
 class KeyringAuthenticator(Authenticator):
diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index 830f537..70ec5c1 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -3,7 +3,7 @@ import http.cookies
 import ssl
 from datetime import datetime
 from pathlib import Path, PurePath
-from typing import Any, Optional, cast
+from typing import Any, Optional
 
 import aiohttp
 import certifi
@@ -187,7 +187,7 @@ class HttpCrawler(Crawler):
             if level == 0 or (level == 1 and drop_h1):
                 return PurePath()
 
-            level_heading = cast(Optional[Tag], tag.find_previous(name=f"h{level}"))
+            level_heading = tag.find_previous(name=f"h{level}")
 
             if level_heading is None:
                 return find_associated_headings(tag, level - 1)
diff --git a/PFERD/crawl/ilias/file_templates.py b/PFERD/crawl/ilias/file_templates.py
index 37691b2..c832977 100644
--- a/PFERD/crawl/ilias/file_templates.py
+++ b/PFERD/crawl/ilias/file_templates.py
@@ -258,17 +258,17 @@ def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next
             soupify(nav_template.replace("{{left}}", left).replace("{{right}}", right).encode())
         )
 
-    body_str = cast(str, body.prettify())
+    body_str = body.prettify()
     return _learning_module_template.replace("{{body}}", body_str).replace("{{name}}", name)
 
 
 def forum_thread_template(name: str, url: str, heading: bs4.Tag, content: bs4.Tag) -> str:
-    if title := cast(Optional[bs4.Tag], heading.find(name="b")):
+    if title := heading.find(name="b"):
         title.wrap(bs4.Tag(name="a", attrs={"href": url}))
     return (
         _forum_thread_template.replace("{{name}}", name)
-        .replace("{{heading}}", cast(str, heading.prettify()))
-        .replace("{{content}}", cast(str, content.prettify()))
+        .replace("{{heading}}", heading.prettify())
+        .replace("{{content}}", content.prettify())
     )
 
 
diff --git a/PFERD/crawl/ilias/ilias_html_cleaner.py b/PFERD/crawl/ilias/ilias_html_cleaner.py
index 958860a..35a7ea0 100644
--- a/PFERD/crawl/ilias/ilias_html_cleaner.py
+++ b/PFERD/crawl/ilias/ilias_html_cleaner.py
@@ -86,7 +86,7 @@ def clean(soup: BeautifulSoup) -> BeautifulSoup:
 
     for block in cast(list[Tag], soup.find_all(class_="ilc_va_ihcap_VAccordIHeadCap")):
         block.name = "h3"
-        block["class"] += ["accordion-head"]
+        block["class"] += ["accordion-head"]  # type: ignore
 
     for dummy in soup.select(".ilc_text_block_Standard.ilc_Paragraph"):
         children = list(dummy.children)
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index b8212a4..12d8700 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -548,7 +548,7 @@ instance's greatest bottleneck.
 
     @staticmethod
     def _parse_link_content(element: IliasPageElement, content: BeautifulSoup) -> list[LinkData]:
-        links = cast(list[Tag], list(content.select("a")))
+        links = list(content.select("a"))
         if len(links) == 1:
             url = str(links[0].get("href")).strip()
             return [LinkData(name=element.name, description=element.description or "", url=url)]
@@ -598,7 +598,7 @@ instance's greatest bottleneck.
         async with dl as (_bar, sink):
             description = clean(insert_base_markup(description))
             description_tag = await self.internalize_images(description)
-            sink.file.write(cast(str, description_tag.prettify()).encode("utf-8"))
+            sink.file.write(description_tag.prettify().encode("utf-8"))
             sink.done()
 
     @anoncritical
@@ -946,10 +946,10 @@ instance's greatest bottleneck.
 
         if prev:
             prev_p = self._transformer.transform(parent_path / (_sanitize_path_name(prev) + ".html"))
-            prev = cast(str, os.path.relpath(prev_p, my_path.parent)) if prev_p else None
+            prev = os.path.relpath(prev_p, my_path.parent) if prev_p else None
         if next:
             next_p = self._transformer.transform(parent_path / (_sanitize_path_name(next) + ".html"))
-            next = cast(str, os.path.relpath(next_p, my_path.parent)) if next_p else None
+            next = os.path.relpath(next_p, my_path.parent) if next_p else None
 
         async with maybe_dl as (bar, sink):
             content = element.content
@@ -1052,7 +1052,7 @@ instance's greatest bottleneck.
             async with self.session.get(urljoin(self._base_url, "/login.php"), params=params) as request:
                 login_page = soupify(await request.read())
 
-            login_form = cast(Optional[Tag], login_page.find("form", attrs={"name": "login_form"}))
+            login_form = login_page.find("form", attrs={"name": "login_form"})
             if login_form is None:
                 raise CrawlError("Could not find the login form! Specified client id might be invalid.")
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index d7f6f8d..db965b0 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -401,11 +401,8 @@ class IliasPage:
         return self._find_normal_entries()
 
     def get_info_tab(self) -> Optional[IliasPageElement]:
-        tab: Optional[Tag] = cast(
-            Optional[Tag],
-            self._soup.find(
-                name="a", attrs={"href": lambda x: x is not None and "cmdClass=ilinfoscreengui" in x}
-            ),
+        tab: Optional[Tag] = self._soup.find(
+            name="a", attrs={"href": lambda x: x is not None and "cmdClass=ilinfoscreengui" in x}
         )
         if tab is not None:
             return IliasPageElement.create_new(
@@ -496,10 +493,7 @@ class IliasPage:
         base_url = re.sub(r"cmd=\w+", "cmd=post", base_url)
         base_url = re.sub(r"cmdClass=\w+", "cmdClass=ilExportGUI", base_url)
 
-        rtoken_form = cast(
-            Optional[Tag],
-            self._soup.find("form", attrs={"action": lambda x: x is not None and "rtoken=" in x}),
-        )
+        rtoken_form = self._soup.find("form", attrs={"action": lambda x: x is not None and "rtoken=" in x})
         if not rtoken_form:
             log.explain("Found no rtoken anywhere")
             return None
@@ -579,14 +573,9 @@ class IliasPage:
         return self._uncollapse_future_meetings_url() is not None
 
     def _uncollapse_future_meetings_url(self) -> Optional[IliasPageElement]:
-        element = cast(
-            Optional[Tag],
-            self._soup.find(
-                "a",
-                attrs={
-                    "href": lambda x: x is not None and ("crs_next_sess=1" in x or "crs_prev_sess=1" in x)
-                },
-            ),
+        element = self._soup.find(
+            "a",
+            attrs={"href": lambda x: x is not None and ("crs_next_sess=1" in x or "crs_prev_sess=1" in x)},
         )
         if not element:
             return None
@@ -614,16 +603,13 @@ class IliasPage:
         return "baseClass=ilmembershipoverviewgui" in self._page_url
 
     def _select_content_page_url(self) -> Optional[IliasPageElement]:
-        tab = cast(
-            Optional[Tag],
-            self._soup.find(
-                id="tab_view_content", attrs={"class": lambda x: x is not None and "active" not in x}
-            ),
+        tab = self._soup.find(
+            id="tab_view_content", attrs={"class": lambda x: x is not None and "active" not in x}
         )
         # Already selected (or not found)
         if not tab:
             return None
-        link = cast(Optional[Tag], tab.find("a"))
+        link = tab.find("a")
         if link:
             link_str = self._abs_url_from_link(link)
             return IliasPageElement.create_new(IliasElementType.FOLDER, link_str, "select content page")
@@ -670,11 +656,8 @@ class IliasPage:
     def _get_show_max_forum_entries_per_page_url(
         self, wanted_max: Optional[int] = None
     ) -> Optional[IliasPageElement]:
-        correct_link = cast(
-            Optional[Tag],
-            self._soup.find(
-                "a", attrs={"href": lambda x: x is not None and "trows=800" in x and "cmd=showThreads" in x}
-            ),
+        correct_link = self._soup.find(
+            "a", attrs={"href": lambda x: x is not None and "trows=800" in x and "cmd=showThreads" in x}
         )
 
         if not correct_link:
@@ -706,7 +689,7 @@ class IliasPage:
 
         titles: list[Tag] = self._soup.select("#block_pditems_0 .il-item-title")
         for title in titles:
-            link = cast(Optional[Tag], title.find("a"))
+            link = title.find("a")
 
             if not link:
                 log.explain(f"Skipping offline item: {title.get_text().strip()!r}")
@@ -720,7 +703,7 @@ class IliasPage:
                 continue
 
             typ = IliasPage._find_type_for_element(
-                name, url, lambda: IliasPage._find_icon_for_folder_entry(link)
+                name, url, lambda: IliasPage._find_icon_for_folder_entry(cast(Tag, link))
             )
             if not typ:
                 _unexpected_html_warning()
@@ -776,9 +759,7 @@ class IliasPage:
         #
         # We need to figure out where we are.
 
-        video_element_table = cast(
-            Optional[Tag], self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
-        )
+        video_element_table = self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
 
         if video_element_table is None:
             # We are in stage 1
@@ -801,7 +782,7 @@ class IliasPage:
         return self._find_opencast_video_entries_no_paging()
 
     def _find_opencast_video_entries_paginated(self) -> list[IliasPageElement]:
-        table_element = cast(Optional[Tag], self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+")))
+        table_element = self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
 
         if table_element is None:
             log.warn("Couldn't increase elements per page (table not found). I might miss elements.")
@@ -841,12 +822,10 @@ class IliasPage:
         # 6th or 7th child (1 indexed) is the modification time string. Try to find it
         # by parsing backwards from the end and finding something that looks like a date
         modification_time = None
-        row: Tag = link.parent.parent.parent
+        row: Tag = link.parent.parent.parent  # type: ignore
         column_count = len(row.select("td.std"))
         for index in range(column_count, 0, -1):
-            modification_string = (
-                link.parent.parent.parent.select_one(f"td.std:nth-child({index})").get_text().strip()
-            )
+            modification_string = cast(Tag, row.select_one(f"td.std:nth-child({index})")).get_text().strip()
             if match := re.search(r"\d+\.\d+.\d+ \d+:\d+", modification_string):
                 modification_time = datetime.strptime(match.group(0), "%d.%m.%Y %H:%M")
                 break
@@ -855,7 +834,7 @@ class IliasPage:
             log.warn(f"Could not determine upload time for {link}")
             modification_time = datetime.now()
 
-        title = link.parent.parent.parent.select_one("td.std:nth-child(3)").get_text().strip()
+        title = cast(Tag, row.select_one("td.std:nth-child(3)")).get_text().strip()
         title += ".mp4"
 
         video_name: str = _sanitize_path_name(title)
@@ -883,7 +862,7 @@ class IliasPage:
     def _find_exercise_entries_detail_page(self) -> list[IliasPageElement]:
         results: list[IliasPageElement] = []
 
-        if link := cast(Optional[Tag], self._soup.select_one("#tab_submission > a")):
+        if link := self._soup.select_one("#tab_submission > a"):
             results.append(
                 IliasPageElement.create_new(
                     IliasElementType.EXERCISE_FILES, self._abs_url_from_link(link), "Submission"
@@ -907,7 +886,7 @@ class IliasPage:
             parent_row: Tag = cast(
                 Tag, link.find_parent(attrs={"class": lambda x: x is not None and "row" in x})
             )
-            name_tag = cast(Optional[Tag], parent_row.find(name="div"))
+            name_tag = parent_row.find(name="div")
 
             if not name_tag:
                 log.warn("Could not find name tag for exercise entry")
@@ -961,7 +940,7 @@ class IliasPage:
     def _find_exercise_entries_root_page(self) -> list[IliasPageElement]:
         results: list[IliasPageElement] = []
 
-        content_tab = cast(Optional[Tag], self._soup.find(id="ilContentContainer"))
+        content_tab = self._soup.find(id="ilContentContainer")
         if not content_tab:
             log.warn("Could not find content tab in exercise overview page")
             _unexpected_html_warning()
@@ -1118,7 +1097,7 @@ class IliasPage:
         if url is None and video_element.get("src"):
             url = cast(Optional[str], video_element.get("src"))
 
-        fig_caption = cast(Optional[Tag], figure.select_one("figcaption"))
+        fig_caption = figure.select_one("figcaption")
         if fig_caption:
             title = cast(Tag, figure.select_one("figcaption")).get_text().strip() + ".mp4"
         elif url is not None:
@@ -1146,7 +1125,7 @@ class IliasPage:
 
             # We should not crawl files under meetings
             if "ilContainerListItemContentCB" in cast(str, parent.get("class")):
-                link: Tag = parent.parent.find("a")
+                link: Tag = cast(Tag, cast(Tag, parent.parent).find("a"))
                 typ = IliasPage._find_type_for_element(
                     "meeting",
                     self._abs_url_from_link(link),
@@ -1179,7 +1158,7 @@ class IliasPage:
 
             # This is for these weird JS-y blocks and custom item groups
             if "ilContainerItemsContainer" in cast(str, parent.get("class")):
-                data_store_url = parent.parent.get("data-store-url", "").lower()
+                data_store_url = cast(str, cast(Tag, parent.parent).get("data-store-url", "")).lower()
                 is_custom_item_group = (
                     "baseclass=ilcontainerblockpropertiesstoragegui" in data_store_url
                     and "cont_block_id=" in data_store_url
@@ -1417,7 +1396,7 @@ class IliasPage:
     def is_logged_in(ilias_soup: IliasSoup) -> bool:
         soup = ilias_soup.soup
         # Normal ILIAS pages
-        mainbar = cast(Optional[Tag], soup.find(class_="il-maincontrols-metabar"))
+        mainbar = soup.find(class_="il-maincontrols-metabar")
         if mainbar is not None:
             login_button = mainbar.find(attrs={"href": lambda x: x is not None and "login.php" in x})
             shib_login = soup.find(id="button_shib_login")
@@ -1561,7 +1540,7 @@ def parse_ilias_forum_export(forum_export: BeautifulSoup) -> list[IliasForumThre
     elements = []
     for p in forum_export.select("body > p"):
         title_tag = p
-        content_tag = cast(Optional[Tag], p.find_next_sibling("ul"))
+        content_tag = p.find_next_sibling("ul")
 
         title = cast(Tag, p.find("b")).text
         if ":" in title:
diff --git a/PFERD/crawl/ilias/shibboleth_login.py b/PFERD/crawl/ilias/shibboleth_login.py
index bdff4ea..bffb183 100644
--- a/PFERD/crawl/ilias/shibboleth_login.py
+++ b/PFERD/crawl/ilias/shibboleth_login.py
@@ -60,7 +60,7 @@ class ShibbolethLogin:
                 "fudis_web_authn_assertion_input": "",
             }
             if csrf_token_input := form.find("input", {"name": "csrf_token"}):
-                data["csrf_token"] = csrf_token_input["value"]
+                data["csrf_token"] = csrf_token_input["value"]  # type: ignore
             soup = await _post(sess, url, data)
 
             if soup.find(id="attributeRelease"):
@@ -79,7 +79,7 @@ class ShibbolethLogin:
         # (or clicking "Continue" if you have JS disabled)
         relay_state = cast(Tag, soup.find("input", {"name": "RelayState"}))
         saml_response = cast(Tag, soup.find("input", {"name": "SAMLResponse"}))
-        url = form = soup.find("form", {"method": "post"})["action"]
+        url = cast(str, cast(Tag, soup.find("form", {"method": "post"}))["action"])
         data = {  # using the info obtained in the while loop above
             "RelayState": cast(str, relay_state["value"]),
             "SAMLResponse": cast(str, saml_response["value"]),
@@ -108,7 +108,7 @@ class ShibbolethLogin:
             "fudis_otp_input": tfa_token,
         }
         if csrf_token_input := form.find("input", {"name": "csrf_token"}):
-            data["csrf_token"] = csrf_token_input["value"]
+            data["csrf_token"] = csrf_token_input["value"]  # type: ignore
         return await _post(session, url, data)
 
     @staticmethod
diff --git a/PFERD/logging.py b/PFERD/logging.py
index a810aa9..ac633ec 100644
--- a/PFERD/logging.py
+++ b/PFERD/logging.py
@@ -3,7 +3,7 @@ import sys
 import traceback
 from collections.abc import AsyncIterator, Iterator
 from contextlib import AbstractContextManager, asynccontextmanager, contextmanager
-from typing import Optional
+from typing import Any, Optional
 
 from rich.console import Console, Group
 from rich.live import Live
@@ -122,7 +122,7 @@ class Log:
         for line in self._lines:
             self.print(line)
 
-    def print(self, text: str) -> None:
+    def print(self, text: Any) -> None:
         """
         Print a normal message. Allows markup.
         """
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index 1fe37d0..9a6035f 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -160,9 +160,10 @@ class Pferd:
 
     def print_report(self) -> None:
         for name in self._crawlers_to_run:
-            crawler = self._crawlers.get(name)
-            if crawler is None:
+            crawlerOpt = self._crawlers.get(name)
+            if crawlerOpt is None:
                 continue  # Crawler failed to load
+            crawler = crawlerOpt
 
             log.report("")
             log.report(f"[bold bright_cyan]Report[/] for {escape(name)}")
diff --git a/PFERD/utils.py b/PFERD/utils.py
index 2d01713..918a9b6 100644
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -125,11 +125,11 @@ class ReusableAsyncContextManager(ABC, Generic[T]):
         # See https://stackoverflow.com/a/13075071
         try:
             result: T = await self._on_aenter()
-        except:  # noqa: E722 do not use bare 'except'
+            return result
+        except:
             if not await self.__aexit__(*sys.exc_info()):
                 raise
-
-        return result
+            raise
 
     async def __aexit__(
         self,

From c1c78673aa9fa046fdc80ff7fee72c58fc095ca7 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 19 Oct 2025 16:17:13 +0200
Subject: [PATCH 140/147] Switch to uv

---
 .github/workflows/build-and-release.yml |  12 +-
 DEV.md                                  |  21 +--
 pyproject.toml                          |   7 +
 scripts/build                           |   2 +-
 scripts/check                           |   4 +-
 scripts/format                          |   2 +-
 uv.lock                                 | 185 ++++++++++++++++++++++++
 7 files changed, 207 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/build-and-release.yml b/.github/workflows/build-and-release.yml
index 0117222..9cd962f 100644
--- a/.github/workflows/build-and-release.yml
+++ b/.github/workflows/build-and-release.yml
@@ -18,19 +18,13 @@ jobs:
     steps:
       - uses: actions/checkout@v4
 
-      - uses: actions/setup-python@v5
+      - name: Install uv
+        uses: astral-sh/setup-uv@v7
         with:
           python-version: ${{ matrix.python }}
 
       - name: Set up project
-        if: matrix.os != 'windows-latest'
-        run: ./scripts/setup
-
-      - name: Set up project on windows
-        if: matrix.os == 'windows-latest'
-        # For some reason, `pip install --upgrade pip` doesn't work on
-        # 'windows-latest'. The installed pip version works fine however.
-        run: ./scripts/setup --no-pip
+        run: uv sync
 
       - name: Run checks
         run: |
diff --git a/DEV.md b/DEV.md
index f577b93..8cc42c2 100644
--- a/DEV.md
+++ b/DEV.md
@@ -9,30 +9,25 @@ particular [this][ppug-1] and [this][ppug-2] guide).
 
 ## Setting up a dev environment
 
-The use of [venv][venv] is recommended. To initially set up a development
-environment, run these commands in the same directory as this file:
+The use of [venv][venv] and [uv][uv] is recommended. To initially set up a
+development environment, run these commands in the same directory as this file:
 
 ```
-$ python -m venv .venv
+$ uv sync
 $ . .venv/bin/activate
-$ ./scripts/setup
 ```
 
-The setup script installs a few required dependencies and tools. It also
-installs PFERD via `pip install --editable .`, which means that you can just run
-`pferd` as if it was installed normally. Since PFERD was installed with
-`--editable`, there is no need to re-run `pip install` when the source code is
-changed.
-
-If you get any errors because pip can't update itself, try running
-`./scripts/setup --no-pip` instead of `./scripts/setup`.
+This install all required dependencies and tools. It also installs PFERD as
+*editable*, which means that you can just run `pferd` as if it was installed
+normally. Since PFERD was installed with `--editable`, there is no need to
+re-run `uv sync` when the source code is changed.
 
 For more details, see [this part of the Python Tutorial][venv-tut] and
 [this section on "development mode"][ppug-dev].
 
 [venv]: <https://docs.python.org/3/library/venv.html> "venv - Creation of virtual environments"
 [venv-tut]: <https://docs.python.org/3/tutorial/venv.html> "12. Virtual Environments and Packages"
-[ppug-dev]: <https://packaging.python.org/guides/distributing-packages-using-setuptools/#working-in-development-mode> "Working in “development mode”"
+[uv]: <https://docs.astral.sh/uv/> "uv - An extremely fast Python package and project manager"
 
 ## Checking and formatting the code
 
diff --git a/pyproject.toml b/pyproject.toml
index 9d4460b..93251ce 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -53,3 +53,10 @@ warn_unused_ignores = true
 warn_unreachable = true
 show_error_context = true
 ignore_missing_imports = true
+
+[dependency-groups]
+dev = [
+    "mypy>=1.18.2",
+    "pyinstaller>=6.16.0",
+    "ruff>=0.14.1",
+]
diff --git a/scripts/build b/scripts/build
index 6f88655..65746c7 100755
--- a/scripts/build
+++ b/scripts/build
@@ -2,4 +2,4 @@
 
 set -e
 
-pyinstaller --onefile pferd.py
+uv run pyinstaller --onefile pferd.py
diff --git a/scripts/check b/scripts/check
index cce6a38..609c4df 100755
--- a/scripts/check
+++ b/scripts/check
@@ -2,5 +2,5 @@
 
 set -e
 
-mypy .
-ruff check
+uv run mypy .
+uv run ruff check
diff --git a/scripts/format b/scripts/format
index 38b10fd..6e814b5 100755
--- a/scripts/format
+++ b/scripts/format
@@ -2,4 +2,4 @@
 
 set -e
 
-ruff format
+uv run ruff format
diff --git a/uv.lock b/uv.lock
index eba384b..691ba1d 100644
--- a/uv.lock
+++ b/uv.lock
@@ -126,6 +126,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" },
 ]
 
+[[package]]
+name = "altgraph"
+version = "0.17.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/de/a8/7145824cf0b9e3c28046520480f207df47e927df83aa9555fb47f8505922/altgraph-0.17.4.tar.gz", hash = "sha256:1b5afbb98f6c4dcadb2e2ae6ab9fa994bbb8c1d75f4fa96d340f9437ae454406", size = 48418, upload-time = "2023-09-25T09:04:52.164Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4d/3f/3bc3f1d83f6e4a7fcb834d3720544ca597590425be5ba9db032b2bf322a2/altgraph-0.17.4-py2.py3-none-any.whl", hash = "sha256:642743b4750de17e655e6711601b077bc6598dbfa3ba5fa2b2a35ce12b508dff", size = 21212, upload-time = "2023-09-25T09:04:50.691Z" },
+]
+
 [[package]]
 name = "attrs"
 version = "25.4.0"
@@ -448,6 +457,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d3/32/da7f44bcb1105d3e88a0b74ebdca50c59121d2ddf71c9e34ba47df7f3a56/keyring-25.6.0-py3-none-any.whl", hash = "sha256:552a3f7af126ece7ed5c89753650eec89c7eaae8617d0aa4d9ad2b75111266bd", size = 39085, upload-time = "2024-12-25T15:26:44.377Z" },
 ]
 
+[[package]]
+name = "macholib"
+version = "1.16.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "altgraph" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/95/ee/af1a3842bdd5902ce133bd246eb7ffd4375c38642aeb5dc0ae3a0329dfa2/macholib-1.16.3.tar.gz", hash = "sha256:07ae9e15e8e4cd9a788013d81f5908b3609aa76f9b1421bae9c4d7606ec86a30", size = 59309, upload-time = "2023-09-25T09:10:16.155Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d1/5d/c059c180c84f7962db0aeae7c3b9303ed1d73d76f2bfbc32bc231c8be314/macholib-1.16.3-py2.py3-none-any.whl", hash = "sha256:0e315d7583d38b8c77e815b1ecbdbf504a8258d8b3e17b61165c6feb60d18f2c", size = 38094, upload-time = "2023-09-25T09:10:14.188Z" },
+]
+
 [[package]]
 name = "markdown-it-py"
 version = "4.0.0"
@@ -595,6 +616,80 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b7/da/7d22601b625e241d4f23ef1ebff8acfc60da633c9e7e7922e24d10f592b3/multidict-6.7.0-py3-none-any.whl", hash = "sha256:394fc5c42a333c9ffc3e421a4c85e08580d990e08b99f6bf35b4132114c5dcb3", size = 12317, upload-time = "2025-10-06T14:52:29.272Z" },
 ]
 
+[[package]]
+name = "mypy"
+version = "1.18.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mypy-extensions" },
+    { name = "pathspec" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c0/77/8f0d0001ffad290cef2f7f216f96c814866248a0b92a722365ed54648e7e/mypy-1.18.2.tar.gz", hash = "sha256:06a398102a5f203d7477b2923dda3634c36727fa5c237d8f859ef90c42a9924b", size = 3448846, upload-time = "2025-09-19T00:11:10.519Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/88/87/cafd3ae563f88f94eec33f35ff722d043e09832ea8530ef149ec1efbaf08/mypy-1.18.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:807d9315ab9d464125aa9fcf6d84fde6e1dc67da0b6f80e7405506b8ac72bc7f", size = 12731198, upload-time = "2025-09-19T00:09:44.857Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/e0/1e96c3d4266a06d4b0197ace5356d67d937d8358e2ee3ffac71faa843724/mypy-1.18.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:776bb00de1778caf4db739c6e83919c1d85a448f71979b6a0edd774ea8399341", size = 11817879, upload-time = "2025-09-19T00:09:47.131Z" },
+    { url = "https://files.pythonhosted.org/packages/72/ef/0c9ba89eb03453e76bdac5a78b08260a848c7bfc5d6603634774d9cd9525/mypy-1.18.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1379451880512ffce14505493bd9fe469e0697543717298242574882cf8cdb8d", size = 12427292, upload-time = "2025-09-19T00:10:22.472Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/52/ec4a061dd599eb8179d5411d99775bec2a20542505988f40fc2fee781068/mypy-1.18.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1331eb7fd110d60c24999893320967594ff84c38ac6d19e0a76c5fd809a84c86", size = 13163750, upload-time = "2025-09-19T00:09:51.472Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/5f/2cf2ceb3b36372d51568f2208c021870fe7834cf3186b653ac6446511839/mypy-1.18.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3ca30b50a51e7ba93b00422e486cbb124f1c56a535e20eff7b2d6ab72b3b2e37", size = 13351827, upload-time = "2025-09-19T00:09:58.311Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/7d/2697b930179e7277529eaaec1513f8de622818696857f689e4a5432e5e27/mypy-1.18.2-cp311-cp311-win_amd64.whl", hash = "sha256:664dc726e67fa54e14536f6e1224bcfce1d9e5ac02426d2326e2bb4e081d1ce8", size = 9757983, upload-time = "2025-09-19T00:10:09.071Z" },
+    { url = "https://files.pythonhosted.org/packages/07/06/dfdd2bc60c66611dd8335f463818514733bc763e4760dee289dcc33df709/mypy-1.18.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:33eca32dd124b29400c31d7cf784e795b050ace0e1f91b8dc035672725617e34", size = 12908273, upload-time = "2025-09-19T00:10:58.321Z" },
+    { url = "https://files.pythonhosted.org/packages/81/14/6a9de6d13a122d5608e1a04130724caf9170333ac5a924e10f670687d3eb/mypy-1.18.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a3c47adf30d65e89b2dcd2fa32f3aeb5e94ca970d2c15fcb25e297871c8e4764", size = 11920910, upload-time = "2025-09-19T00:10:20.043Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/a9/b29de53e42f18e8cc547e38daa9dfa132ffdc64f7250e353f5c8cdd44bee/mypy-1.18.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d6c838e831a062f5f29d11c9057c6009f60cb294fea33a98422688181fe2893", size = 12465585, upload-time = "2025-09-19T00:10:33.005Z" },
+    { url = "https://files.pythonhosted.org/packages/77/ae/6c3d2c7c61ff21f2bee938c917616c92ebf852f015fb55917fd6e2811db2/mypy-1.18.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01199871b6110a2ce984bde85acd481232d17413868c9807e95c1b0739a58914", size = 13348562, upload-time = "2025-09-19T00:10:11.51Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/31/aec68ab3b4aebdf8f36d191b0685d99faa899ab990753ca0fee60fb99511/mypy-1.18.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a2afc0fa0b0e91b4599ddfe0f91e2c26c2b5a5ab263737e998d6817874c5f7c8", size = 13533296, upload-time = "2025-09-19T00:10:06.568Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/83/abcb3ad9478fca3ebeb6a5358bb0b22c95ea42b43b7789c7fb1297ca44f4/mypy-1.18.2-cp312-cp312-win_amd64.whl", hash = "sha256:d8068d0afe682c7c4897c0f7ce84ea77f6de953262b12d07038f4d296d547074", size = 9828828, upload-time = "2025-09-19T00:10:28.203Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/04/7f462e6fbba87a72bc8097b93f6842499c428a6ff0c81dd46948d175afe8/mypy-1.18.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:07b8b0f580ca6d289e69209ec9d3911b4a26e5abfde32228a288eb79df129fcc", size = 12898728, upload-time = "2025-09-19T00:10:01.33Z" },
+    { url = "https://files.pythonhosted.org/packages/99/5b/61ed4efb64f1871b41fd0b82d29a64640f3516078f6c7905b68ab1ad8b13/mypy-1.18.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ed4482847168439651d3feee5833ccedbf6657e964572706a2adb1f7fa4dfe2e", size = 11910758, upload-time = "2025-09-19T00:10:42.607Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/46/d297d4b683cc89a6e4108c4250a6a6b717f5fa96e1a30a7944a6da44da35/mypy-1.18.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c3ad2afadd1e9fea5cf99a45a822346971ede8685cc581ed9cd4d42eaf940986", size = 12475342, upload-time = "2025-09-19T00:11:00.371Z" },
+    { url = "https://files.pythonhosted.org/packages/83/45/4798f4d00df13eae3bfdf726c9244bcb495ab5bd588c0eed93a2f2dd67f3/mypy-1.18.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a431a6f1ef14cf8c144c6b14793a23ec4eae3db28277c358136e79d7d062f62d", size = 13338709, upload-time = "2025-09-19T00:11:03.358Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/09/479f7358d9625172521a87a9271ddd2441e1dab16a09708f056e97007207/mypy-1.18.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7ab28cc197f1dd77a67e1c6f35cd1f8e8b73ed2217e4fc005f9e6a504e46e7ba", size = 13529806, upload-time = "2025-09-19T00:10:26.073Z" },
+    { url = "https://files.pythonhosted.org/packages/71/cf/ac0f2c7e9d0ea3c75cd99dff7aec1c9df4a1376537cb90e4c882267ee7e9/mypy-1.18.2-cp313-cp313-win_amd64.whl", hash = "sha256:0e2785a84b34a72ba55fb5daf079a1003a34c05b22238da94fcae2bbe46f3544", size = 9833262, upload-time = "2025-09-19T00:10:40.035Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/0c/7d5300883da16f0063ae53996358758b2a2df2a09c72a5061fa79a1f5006/mypy-1.18.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:62f0e1e988ad41c2a110edde6c398383a889d95b36b3e60bcf155f5164c4fdce", size = 12893775, upload-time = "2025-09-19T00:10:03.814Z" },
+    { url = "https://files.pythonhosted.org/packages/50/df/2cffbf25737bdb236f60c973edf62e3e7b4ee1c25b6878629e88e2cde967/mypy-1.18.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8795a039bab805ff0c1dfdb8cd3344642c2b99b8e439d057aba30850b8d3423d", size = 11936852, upload-time = "2025-09-19T00:10:51.631Z" },
+    { url = "https://files.pythonhosted.org/packages/be/50/34059de13dd269227fb4a03be1faee6e2a4b04a2051c82ac0a0b5a773c9a/mypy-1.18.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6ca1e64b24a700ab5ce10133f7ccd956a04715463d30498e64ea8715236f9c9c", size = 12480242, upload-time = "2025-09-19T00:11:07.955Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/11/040983fad5132d85914c874a2836252bbc57832065548885b5bb5b0d4359/mypy-1.18.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d924eef3795cc89fecf6bedc6ed32b33ac13e8321344f6ddbf8ee89f706c05cb", size = 13326683, upload-time = "2025-09-19T00:09:55.572Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/ba/89b2901dd77414dd7a8c8729985832a5735053be15b744c18e4586e506ef/mypy-1.18.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:20c02215a080e3a2be3aa50506c67242df1c151eaba0dcbc1e4e557922a26075", size = 13514749, upload-time = "2025-09-19T00:10:44.827Z" },
+    { url = "https://files.pythonhosted.org/packages/25/bc/cc98767cffd6b2928ba680f3e5bc969c4152bf7c2d83f92f5a504b92b0eb/mypy-1.18.2-cp314-cp314-win_amd64.whl", hash = "sha256:749b5f83198f1ca64345603118a6f01a4e99ad4bf9d103ddc5a3200cc4614adf", size = 9982959, upload-time = "2025-09-19T00:10:37.344Z" },
+    { url = "https://files.pythonhosted.org/packages/87/e3/be76d87158ebafa0309946c4a73831974d4d6ab4f4ef40c3b53a385a66fd/mypy-1.18.2-py3-none-any.whl", hash = "sha256:22a1748707dd62b58d2ae53562ffc4d7f8bcc727e8ac7cbc69c053ddc874d47e", size = 2352367, upload-time = "2025-09-19T00:10:15.489Z" },
+]
+
+[[package]]
+name = "mypy-extensions"
+version = "1.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
+]
+
+[[package]]
+name = "packaging"
+version = "25.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
+]
+
+[[package]]
+name = "pathspec"
+version = "0.12.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043, upload-time = "2023-12-10T22:30:45Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" },
+]
+
+[[package]]
+name = "pefile"
+version = "2023.2.7"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/78/c5/3b3c62223f72e2360737fd2a57c30e5b2adecd85e70276879609a7403334/pefile-2023.2.7.tar.gz", hash = "sha256:82e6114004b3d6911c77c3953e3838654b04511b8b66e8583db70c65998017dc", size = 74854, upload-time = "2023-02-07T12:23:55.958Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/55/26/d0ad8b448476d0a1e8d3ea5622dc77b916db84c6aa3cb1e1c0965af948fc/pefile-2023.2.7-py3-none-any.whl", hash = "sha256:da185cd2af68c08a6cd4481f7325ed600a88f6a813bad9dea07ab3ef73d8d8d6", size = 71791, upload-time = "2023-02-07T12:28:36.678Z" },
+]
+
 [[package]]
 name = "pferd"
 source = { editable = "." }
@@ -606,6 +701,13 @@ dependencies = [
     { name = "rich" },
 ]
 
+[package.dev-dependencies]
+dev = [
+    { name = "mypy" },
+    { name = "pyinstaller" },
+    { name = "ruff" },
+]
+
 [package.metadata]
 requires-dist = [
     { name = "aiohttp", specifier = ">=3.8.1" },
@@ -615,6 +717,13 @@ requires-dist = [
     { name = "rich", specifier = ">=11.0.0" },
 ]
 
+[package.metadata.requires-dev]
+dev = [
+    { name = "mypy", specifier = ">=1.18.2" },
+    { name = "pyinstaller", specifier = ">=6.16.0" },
+    { name = "ruff", specifier = ">=0.14.1" },
+]
+
 [[package]]
 name = "propcache"
 version = "0.4.1"
@@ -732,6 +841,47 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
 ]
 
+[[package]]
+name = "pyinstaller"
+version = "6.16.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "altgraph" },
+    { name = "macholib", marker = "sys_platform == 'darwin'" },
+    { name = "packaging" },
+    { name = "pefile", marker = "sys_platform == 'win32'" },
+    { name = "pyinstaller-hooks-contrib" },
+    { name = "pywin32-ctypes", marker = "sys_platform == 'win32'" },
+    { name = "setuptools" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/94/94/1f62e95e4a28b64cfbb5b922ef3046f968b47170d37a1e1a029f56ac9cb4/pyinstaller-6.16.0.tar.gz", hash = "sha256:53559fe1e041a234f2b4dcc3288ea8bdd57f7cad8a6644e422c27bb407f3edef", size = 4008473, upload-time = "2025-09-13T20:07:01.733Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7b/0a/c42ce6e5d3de287f2e9432a074fb209f1fb72a86a72f3903849fdb5e4829/pyinstaller-6.16.0-py3-none-macosx_10_13_universal2.whl", hash = "sha256:7fd1c785219a87ca747c21fa92f561b0d2926a7edc06d0a0fe37f3736e00bd7a", size = 1027899, upload-time = "2025-09-13T20:05:59.2Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/d0/f18fedde32835d5a758f464c75924e2154065625f09d5456c3c303527654/pyinstaller-6.16.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:b756ddb9007b8141c5476b553351f9d97559b8af5d07f9460869bfae02be26b0", size = 727990, upload-time = "2025-09-13T20:06:03.583Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/db/c8bb47514ce857b24bf9294cf1ff74844b6a489fa0ab4ef6f923288c4e38/pyinstaller-6.16.0-py3-none-manylinux2014_i686.whl", hash = "sha256:0a48f55b85ff60f83169e10050f2759019cf1d06773ad1c4da3a411cd8751058", size = 739238, upload-time = "2025-09-13T20:06:07.69Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/3e/451dc784a8fcca0fe9f9b6b802d58555364a95b60f253613a2c83fc6b023/pyinstaller-6.16.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:73ba72e04fcece92e32518bbb1e1fb5ac2892677943dfdff38e01a06e8742851", size = 737142, upload-time = "2025-09-13T20:06:11.732Z" },
+    { url = "https://files.pythonhosted.org/packages/71/37/2f457479ef8fa2821cdb448acee2421dfb19fbe908bf5499d1930c164084/pyinstaller-6.16.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:b1752488248f7899281b17ca3238eefb5410521291371a686a4f5830f29f52b3", size = 734133, upload-time = "2025-09-13T20:06:15.477Z" },
+    { url = "https://files.pythonhosted.org/packages/63/c4/0f7daac4d062a4d1ac2571d8a8b9b5d6812094fcd914d139af591ca5e1ba/pyinstaller-6.16.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:ba618a61627ee674d6d68e5de084ba17c707b59a4f2a856084b3999bdffbd3f0", size = 733817, upload-time = "2025-09-13T20:06:19.683Z" },
+    { url = "https://files.pythonhosted.org/packages/11/e4/b6127265b42bef883e8873d850becadf748bc5652e5a7029b059328f3c31/pyinstaller-6.16.0-py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:c8b7ef536711617e12fef4673806198872033fa06fa92326ad7fd1d84a9fa454", size = 732912, upload-time = "2025-09-13T20:06:23.46Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/00/c6663107bdf814b2916e71563beabd09f693c47712213bc228994cb2cc65/pyinstaller-6.16.0-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:d1ebf84d02c51fed19b82a8abb4df536923abd55bb684d694e1356e4ae2a0ce5", size = 732773, upload-time = "2025-09-13T20:06:27.352Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/14/cabe9bc5f60b95d2e70e7d045ab94b0015ff8f6c8b16e2142d3597e30749/pyinstaller-6.16.0-py3-none-win32.whl", hash = "sha256:6d5f8617f3650ff9ef893e2ab4ddbf3c0d23d0c602ef74b5df8fbef4607840c8", size = 1313878, upload-time = "2025-09-13T20:06:33.234Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/99/2005efbc297e7813c1d6f18484aa94a1a81ce87b6a5b497c563681f4c4ea/pyinstaller-6.16.0-py3-none-win_amd64.whl", hash = "sha256:bc10eb1a787f99fea613509f55b902fbd2d8b73ff5f51ff245ea29a481d97d41", size = 1374706, upload-time = "2025-09-13T20:06:39.95Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/f4/4dfcf69b86d60fcaae05a42bbff1616d48a91e71726e5ed795d773dae9b3/pyinstaller-6.16.0-py3-none-win_arm64.whl", hash = "sha256:d0af8a401de792c233c32c44b16d065ca9ab8262ee0c906835c12bdebc992a64", size = 1315923, upload-time = "2025-09-13T20:06:45.846Z" },
+]
+
+[[package]]
+name = "pyinstaller-hooks-contrib"
+version = "2025.9"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "packaging" },
+    { name = "setuptools" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/7d/83/be0f57c0b77b66c33c2283ebd4ea341022b5a743e97c5fb3bebab82b38b9/pyinstaller_hooks_contrib-2025.9.tar.gz", hash = "sha256:56e972bdaad4e9af767ed47d132362d162112260cbe488c9da7fee01f228a5a6", size = 165189, upload-time = "2025-09-24T11:21:35.113Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a2/26/23b4cfc77d7f808c69f59070e1e8293a579ec281a547c61562357160b346/pyinstaller_hooks_contrib-2025.9-py3-none-any.whl", hash = "sha256:ccbfaa49399ef6b18486a165810155e5a8d4c59b41f20dc5da81af7482aaf038", size = 444283, upload-time = "2025-09-24T11:21:33.67Z" },
+]
+
 [[package]]
 name = "pywin32-ctypes"
 version = "0.2.3"
@@ -754,6 +904,32 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/25/7a/b0178788f8dc6cafce37a212c99565fa1fe7872c70c6c9c1e1a372d9d88f/rich-14.2.0-py3-none-any.whl", hash = "sha256:76bc51fe2e57d2b1be1f96c524b890b816e334ab4c1e45888799bfaab0021edd", size = 243393, upload-time = "2025-10-09T14:16:51.245Z" },
 ]
 
+[[package]]
+name = "ruff"
+version = "0.14.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/9e/58/6ca66896635352812de66f71cdf9ff86b3a4f79071ca5730088c0cd0fc8d/ruff-0.14.1.tar.gz", hash = "sha256:1dd86253060c4772867c61791588627320abcb6ed1577a90ef432ee319729b69", size = 5513429, upload-time = "2025-10-16T18:05:41.766Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8d/39/9cc5ab181478d7a18adc1c1e051a84ee02bec94eb9bdfd35643d7c74ca31/ruff-0.14.1-py3-none-linux_armv6l.whl", hash = "sha256:083bfc1f30f4a391ae09c6f4f99d83074416b471775b59288956f5bc18e82f8b", size = 12445415, upload-time = "2025-10-16T18:04:48.227Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/2e/1226961855ccd697255988f5a2474890ac7c5863b080b15bd038df820818/ruff-0.14.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:f6fa757cd717f791009f7669fefb09121cc5f7d9bd0ef211371fad68c2b8b224", size = 12784267, upload-time = "2025-10-16T18:04:52.515Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/ea/fd9e95863124ed159cd0667ec98449ae461de94acda7101f1acb6066da00/ruff-0.14.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d6191903d39ac156921398e9c86b7354d15e3c93772e7dbf26c9fcae59ceccd5", size = 11781872, upload-time = "2025-10-16T18:04:55.396Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/5a/e890f7338ff537dba4589a5e02c51baa63020acfb7c8cbbaea4831562c96/ruff-0.14.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed04f0e04f7a4587244e5c9d7df50e6b5bf2705d75059f409a6421c593a35896", size = 12226558, upload-time = "2025-10-16T18:04:58.166Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/7a/8ab5c3377f5bf31e167b73651841217542bcc7aa1c19e83030835cc25204/ruff-0.14.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5c9e6cf6cd4acae0febbce29497accd3632fe2025c0c583c8b87e8dbdeae5f61", size = 12187898, upload-time = "2025-10-16T18:05:01.455Z" },
+    { url = "https://files.pythonhosted.org/packages/48/8d/ba7c33aa55406955fc124e62c8259791c3d42e3075a71710fdff9375134f/ruff-0.14.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a6fa2458527794ecdfbe45f654e42c61f2503a230545a91af839653a0a93dbc6", size = 12939168, upload-time = "2025-10-16T18:05:04.397Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/c2/70783f612b50f66d083380e68cbd1696739d88e9b4f6164230375532c637/ruff-0.14.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:39f1c392244e338b21d42ab29b8a6392a722c5090032eb49bb4d6defcdb34345", size = 14386942, upload-time = "2025-10-16T18:05:07.102Z" },
+    { url = "https://files.pythonhosted.org/packages/48/44/cd7abb9c776b66d332119d67f96acf15830d120f5b884598a36d9d3f4d83/ruff-0.14.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7382fa12a26cce1f95070ce450946bec357727aaa428983036362579eadcc5cf", size = 13990622, upload-time = "2025-10-16T18:05:09.882Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/56/4259b696db12ac152fe472764b4f78bbdd9b477afd9bc3a6d53c01300b37/ruff-0.14.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dd0bf2be3ae8521e1093a487c4aa3b455882f139787770698530d28ed3fbb37c", size = 13431143, upload-time = "2025-10-16T18:05:13.46Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/35/266a80d0eb97bd224b3265b9437bd89dde0dcf4faf299db1212e81824e7e/ruff-0.14.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cabcaa9ccf8089fb4fdb78d17cc0e28241520f50f4c2e88cb6261ed083d85151", size = 13132844, upload-time = "2025-10-16T18:05:16.1Z" },
+    { url = "https://files.pythonhosted.org/packages/65/6e/d31ce218acc11a8d91ef208e002a31acf315061a85132f94f3df7a252b18/ruff-0.14.1-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:747d583400f6125ec11a4c14d1c8474bf75d8b419ad22a111a537ec1a952d192", size = 13401241, upload-time = "2025-10-16T18:05:19.395Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/b5/dbc4221bf0b03774b3b2f0d47f39e848d30664157c15b965a14d890637d2/ruff-0.14.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:5a6e74c0efd78515a1d13acbfe6c90f0f5bd822aa56b4a6d43a9ffb2ae6e56cd", size = 12132476, upload-time = "2025-10-16T18:05:22.163Z" },
+    { url = "https://files.pythonhosted.org/packages/98/4b/ac99194e790ccd092d6a8b5f341f34b6e597d698e3077c032c502d75ea84/ruff-0.14.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:0ea6a864d2fb41a4b6d5b456ed164302a0d96f4daac630aeba829abfb059d020", size = 12139749, upload-time = "2025-10-16T18:05:25.162Z" },
+    { url = "https://files.pythonhosted.org/packages/47/26/7df917462c3bb5004e6fdfcc505a49e90bcd8a34c54a051953118c00b53a/ruff-0.14.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:0826b8764f94229604fa255918d1cc45e583e38c21c203248b0bfc9a0e930be5", size = 12544758, upload-time = "2025-10-16T18:05:28.018Z" },
+    { url = "https://files.pythonhosted.org/packages/64/d0/81e7f0648e9764ad9b51dd4be5e5dac3fcfff9602428ccbae288a39c2c22/ruff-0.14.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:cbc52160465913a1a3f424c81c62ac8096b6a491468e7d872cb9444a860bc33d", size = 13221811, upload-time = "2025-10-16T18:05:30.707Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/07/3c45562c67933cc35f6d5df4ca77dabbcd88fddaca0d6b8371693d29fd56/ruff-0.14.1-py3-none-win32.whl", hash = "sha256:e037ea374aaaff4103240ae79168c0945ae3d5ae8db190603de3b4012bd1def6", size = 12319467, upload-time = "2025-10-16T18:05:33.261Z" },
+    { url = "https://files.pythonhosted.org/packages/02/88/0ee4ca507d4aa05f67e292d2e5eb0b3e358fbcfe527554a2eda9ac422d6b/ruff-0.14.1-py3-none-win_amd64.whl", hash = "sha256:59d599cdff9c7f925a017f6f2c256c908b094e55967f93f2821b1439928746a1", size = 13401123, upload-time = "2025-10-16T18:05:35.984Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/81/4b6387be7014858d924b843530e1b2a8e531846807516e9bea2ee0936bf7/ruff-0.14.1-py3-none-win_arm64.whl", hash = "sha256:e3b443c4c9f16ae850906b8d0a707b2a4c16f8d2f0a7fe65c475c5886665ce44", size = 12436636, upload-time = "2025-10-16T18:05:38.995Z" },
+]
+
 [[package]]
 name = "secretstorage"
 version = "3.4.0"
@@ -767,6 +943,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/91/ff/2e2eed29e02c14a5cb6c57f09b2d5b40e65d6cc71f45b52e0be295ccbc2f/secretstorage-3.4.0-py3-none-any.whl", hash = "sha256:0e3b6265c2c63509fb7415717607e4b2c9ab767b7f344a57473b779ca13bd02e", size = 15272, upload-time = "2025-09-09T16:42:12.744Z" },
 ]
 
+[[package]]
+name = "setuptools"
+version = "80.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/18/5d/3bf57dcd21979b887f014ea83c24ae194cfcd12b9e0fda66b957c69d1fca/setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c", size = 1319958, upload-time = "2025-05-27T00:56:51.443Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload-time = "2025-05-27T00:56:49.664Z" },
+]
+
 [[package]]
 name = "soupsieve"
 version = "2.8"

From bb0d68da65605066cbc36593a8246e6c8898a09c Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 19 Oct 2025 16:28:41 +0200
Subject: [PATCH 141/147] Switch to pyright

---
 pyproject.toml | 12 +--------
 scripts/check  |  2 +-
 uv.lock        | 72 +++++++++++++-------------------------------------
 3 files changed, 21 insertions(+), 65 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 93251ce..96aa4a9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,19 +44,9 @@ ignore = [
   "B023"
 ]
 
-[tool.mypy]
-disallow_any_generics = true
-disallow_untyped_defs = true
-disallow_incomplete_defs = true
-no_implicit_optional = true
-warn_unused_ignores = true
-warn_unreachable = true
-show_error_context = true
-ignore_missing_imports = true
-
 [dependency-groups]
 dev = [
-    "mypy>=1.18.2",
     "pyinstaller>=6.16.0",
+    "pyright>=1.1.406",
     "ruff>=0.14.1",
 ]
diff --git a/scripts/check b/scripts/check
index 609c4df..0552f07 100755
--- a/scripts/check
+++ b/scripts/check
@@ -2,5 +2,5 @@
 
 set -e
 
-uv run mypy .
+uv run pyright .
 uv run ruff check
diff --git a/uv.lock b/uv.lock
index 691ba1d..9c2a58e 100644
--- a/uv.lock
+++ b/uv.lock
@@ -617,50 +617,12 @@ wheels = [
 ]
 
 [[package]]
-name = "mypy"
-version = "1.18.2"
+name = "nodeenv"
+version = "1.9.1"
 source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "mypy-extensions" },
-    { name = "pathspec" },
-    { name = "typing-extensions" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/c0/77/8f0d0001ffad290cef2f7f216f96c814866248a0b92a722365ed54648e7e/mypy-1.18.2.tar.gz", hash = "sha256:06a398102a5f203d7477b2923dda3634c36727fa5c237d8f859ef90c42a9924b", size = 3448846, upload-time = "2025-09-19T00:11:10.519Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/43/16/fc88b08840de0e0a72a2f9d8c6bae36be573e475a6326ae854bcc549fc45/nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f", size = 47437, upload-time = "2024-06-04T18:44:11.171Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/88/87/cafd3ae563f88f94eec33f35ff722d043e09832ea8530ef149ec1efbaf08/mypy-1.18.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:807d9315ab9d464125aa9fcf6d84fde6e1dc67da0b6f80e7405506b8ac72bc7f", size = 12731198, upload-time = "2025-09-19T00:09:44.857Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/e0/1e96c3d4266a06d4b0197ace5356d67d937d8358e2ee3ffac71faa843724/mypy-1.18.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:776bb00de1778caf4db739c6e83919c1d85a448f71979b6a0edd774ea8399341", size = 11817879, upload-time = "2025-09-19T00:09:47.131Z" },
-    { url = "https://files.pythonhosted.org/packages/72/ef/0c9ba89eb03453e76bdac5a78b08260a848c7bfc5d6603634774d9cd9525/mypy-1.18.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1379451880512ffce14505493bd9fe469e0697543717298242574882cf8cdb8d", size = 12427292, upload-time = "2025-09-19T00:10:22.472Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/52/ec4a061dd599eb8179d5411d99775bec2a20542505988f40fc2fee781068/mypy-1.18.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1331eb7fd110d60c24999893320967594ff84c38ac6d19e0a76c5fd809a84c86", size = 13163750, upload-time = "2025-09-19T00:09:51.472Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/5f/2cf2ceb3b36372d51568f2208c021870fe7834cf3186b653ac6446511839/mypy-1.18.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3ca30b50a51e7ba93b00422e486cbb124f1c56a535e20eff7b2d6ab72b3b2e37", size = 13351827, upload-time = "2025-09-19T00:09:58.311Z" },
-    { url = "https://files.pythonhosted.org/packages/c8/7d/2697b930179e7277529eaaec1513f8de622818696857f689e4a5432e5e27/mypy-1.18.2-cp311-cp311-win_amd64.whl", hash = "sha256:664dc726e67fa54e14536f6e1224bcfce1d9e5ac02426d2326e2bb4e081d1ce8", size = 9757983, upload-time = "2025-09-19T00:10:09.071Z" },
-    { url = "https://files.pythonhosted.org/packages/07/06/dfdd2bc60c66611dd8335f463818514733bc763e4760dee289dcc33df709/mypy-1.18.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:33eca32dd124b29400c31d7cf784e795b050ace0e1f91b8dc035672725617e34", size = 12908273, upload-time = "2025-09-19T00:10:58.321Z" },
-    { url = "https://files.pythonhosted.org/packages/81/14/6a9de6d13a122d5608e1a04130724caf9170333ac5a924e10f670687d3eb/mypy-1.18.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a3c47adf30d65e89b2dcd2fa32f3aeb5e94ca970d2c15fcb25e297871c8e4764", size = 11920910, upload-time = "2025-09-19T00:10:20.043Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/a9/b29de53e42f18e8cc547e38daa9dfa132ffdc64f7250e353f5c8cdd44bee/mypy-1.18.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d6c838e831a062f5f29d11c9057c6009f60cb294fea33a98422688181fe2893", size = 12465585, upload-time = "2025-09-19T00:10:33.005Z" },
-    { url = "https://files.pythonhosted.org/packages/77/ae/6c3d2c7c61ff21f2bee938c917616c92ebf852f015fb55917fd6e2811db2/mypy-1.18.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01199871b6110a2ce984bde85acd481232d17413868c9807e95c1b0739a58914", size = 13348562, upload-time = "2025-09-19T00:10:11.51Z" },
-    { url = "https://files.pythonhosted.org/packages/4d/31/aec68ab3b4aebdf8f36d191b0685d99faa899ab990753ca0fee60fb99511/mypy-1.18.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a2afc0fa0b0e91b4599ddfe0f91e2c26c2b5a5ab263737e998d6817874c5f7c8", size = 13533296, upload-time = "2025-09-19T00:10:06.568Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/83/abcb3ad9478fca3ebeb6a5358bb0b22c95ea42b43b7789c7fb1297ca44f4/mypy-1.18.2-cp312-cp312-win_amd64.whl", hash = "sha256:d8068d0afe682c7c4897c0f7ce84ea77f6de953262b12d07038f4d296d547074", size = 9828828, upload-time = "2025-09-19T00:10:28.203Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/04/7f462e6fbba87a72bc8097b93f6842499c428a6ff0c81dd46948d175afe8/mypy-1.18.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:07b8b0f580ca6d289e69209ec9d3911b4a26e5abfde32228a288eb79df129fcc", size = 12898728, upload-time = "2025-09-19T00:10:01.33Z" },
-    { url = "https://files.pythonhosted.org/packages/99/5b/61ed4efb64f1871b41fd0b82d29a64640f3516078f6c7905b68ab1ad8b13/mypy-1.18.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ed4482847168439651d3feee5833ccedbf6657e964572706a2adb1f7fa4dfe2e", size = 11910758, upload-time = "2025-09-19T00:10:42.607Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/46/d297d4b683cc89a6e4108c4250a6a6b717f5fa96e1a30a7944a6da44da35/mypy-1.18.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c3ad2afadd1e9fea5cf99a45a822346971ede8685cc581ed9cd4d42eaf940986", size = 12475342, upload-time = "2025-09-19T00:11:00.371Z" },
-    { url = "https://files.pythonhosted.org/packages/83/45/4798f4d00df13eae3bfdf726c9244bcb495ab5bd588c0eed93a2f2dd67f3/mypy-1.18.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a431a6f1ef14cf8c144c6b14793a23ec4eae3db28277c358136e79d7d062f62d", size = 13338709, upload-time = "2025-09-19T00:11:03.358Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/09/479f7358d9625172521a87a9271ddd2441e1dab16a09708f056e97007207/mypy-1.18.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7ab28cc197f1dd77a67e1c6f35cd1f8e8b73ed2217e4fc005f9e6a504e46e7ba", size = 13529806, upload-time = "2025-09-19T00:10:26.073Z" },
-    { url = "https://files.pythonhosted.org/packages/71/cf/ac0f2c7e9d0ea3c75cd99dff7aec1c9df4a1376537cb90e4c882267ee7e9/mypy-1.18.2-cp313-cp313-win_amd64.whl", hash = "sha256:0e2785a84b34a72ba55fb5daf079a1003a34c05b22238da94fcae2bbe46f3544", size = 9833262, upload-time = "2025-09-19T00:10:40.035Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/0c/7d5300883da16f0063ae53996358758b2a2df2a09c72a5061fa79a1f5006/mypy-1.18.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:62f0e1e988ad41c2a110edde6c398383a889d95b36b3e60bcf155f5164c4fdce", size = 12893775, upload-time = "2025-09-19T00:10:03.814Z" },
-    { url = "https://files.pythonhosted.org/packages/50/df/2cffbf25737bdb236f60c973edf62e3e7b4ee1c25b6878629e88e2cde967/mypy-1.18.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8795a039bab805ff0c1dfdb8cd3344642c2b99b8e439d057aba30850b8d3423d", size = 11936852, upload-time = "2025-09-19T00:10:51.631Z" },
-    { url = "https://files.pythonhosted.org/packages/be/50/34059de13dd269227fb4a03be1faee6e2a4b04a2051c82ac0a0b5a773c9a/mypy-1.18.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6ca1e64b24a700ab5ce10133f7ccd956a04715463d30498e64ea8715236f9c9c", size = 12480242, upload-time = "2025-09-19T00:11:07.955Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/11/040983fad5132d85914c874a2836252bbc57832065548885b5bb5b0d4359/mypy-1.18.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d924eef3795cc89fecf6bedc6ed32b33ac13e8321344f6ddbf8ee89f706c05cb", size = 13326683, upload-time = "2025-09-19T00:09:55.572Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/ba/89b2901dd77414dd7a8c8729985832a5735053be15b744c18e4586e506ef/mypy-1.18.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:20c02215a080e3a2be3aa50506c67242df1c151eaba0dcbc1e4e557922a26075", size = 13514749, upload-time = "2025-09-19T00:10:44.827Z" },
-    { url = "https://files.pythonhosted.org/packages/25/bc/cc98767cffd6b2928ba680f3e5bc969c4152bf7c2d83f92f5a504b92b0eb/mypy-1.18.2-cp314-cp314-win_amd64.whl", hash = "sha256:749b5f83198f1ca64345603118a6f01a4e99ad4bf9d103ddc5a3200cc4614adf", size = 9982959, upload-time = "2025-09-19T00:10:37.344Z" },
-    { url = "https://files.pythonhosted.org/packages/87/e3/be76d87158ebafa0309946c4a73831974d4d6ab4f4ef40c3b53a385a66fd/mypy-1.18.2-py3-none-any.whl", hash = "sha256:22a1748707dd62b58d2ae53562ffc4d7f8bcc727e8ac7cbc69c053ddc874d47e", size = 2352367, upload-time = "2025-09-19T00:10:15.489Z" },
-]
-
-[[package]]
-name = "mypy-extensions"
-version = "1.1.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/1d/1b658dbd2b9fa9c4c9f32accbfc0205d532c8c6194dc0f2a4c0428e7128a/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9", size = 22314, upload-time = "2024-06-04T18:44:08.352Z" },
 ]
 
 [[package]]
@@ -672,15 +634,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
 ]
 
-[[package]]
-name = "pathspec"
-version = "0.12.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043, upload-time = "2023-12-10T22:30:45Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" },
-]
-
 [[package]]
 name = "pefile"
 version = "2023.2.7"
@@ -703,8 +656,8 @@ dependencies = [
 
 [package.dev-dependencies]
 dev = [
-    { name = "mypy" },
     { name = "pyinstaller" },
+    { name = "pyright" },
     { name = "ruff" },
 ]
 
@@ -719,8 +672,8 @@ requires-dist = [
 
 [package.metadata.requires-dev]
 dev = [
-    { name = "mypy", specifier = ">=1.18.2" },
     { name = "pyinstaller", specifier = ">=6.16.0" },
+    { name = "pyright", specifier = ">=1.1.406" },
     { name = "ruff", specifier = ">=0.14.1" },
 ]
 
@@ -882,6 +835,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a2/26/23b4cfc77d7f808c69f59070e1e8293a579ec281a547c61562357160b346/pyinstaller_hooks_contrib-2025.9-py3-none-any.whl", hash = "sha256:ccbfaa49399ef6b18486a165810155e5a8d4c59b41f20dc5da81af7482aaf038", size = 444283, upload-time = "2025-09-24T11:21:33.67Z" },
 ]
 
+[[package]]
+name = "pyright"
+version = "1.1.406"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nodeenv" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f7/16/6b4fbdd1fef59a0292cbb99f790b44983e390321eccbc5921b4d161da5d1/pyright-1.1.406.tar.gz", hash = "sha256:c4872bc58c9643dac09e8a2e74d472c62036910b3bd37a32813989ef7576ea2c", size = 4113151, upload-time = "2025-10-02T01:04:45.488Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f6/a2/e309afbb459f50507103793aaef85ca4348b66814c86bc73908bdeb66d12/pyright-1.1.406-py3-none-any.whl", hash = "sha256:1d81fb43c2407bf566e97e57abb01c811973fdb21b2df8df59f870f688bdca71", size = 5980982, upload-time = "2025-10-02T01:04:43.137Z" },
+]
+
 [[package]]
 name = "pywin32-ctypes"
 version = "0.2.3"

From 1e56976b9f58a6a0dcae56ed16e99c8fbc2f4644 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Sun, 19 Oct 2025 16:31:16 +0200
Subject: [PATCH 142/147] Update nix flake.lock

---
 flake.lock | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/flake.lock b/flake.lock
index 8d211fc..ae603f1 100644
--- a/flake.lock
+++ b/flake.lock
@@ -2,11 +2,11 @@
   "nodes": {
     "nixpkgs": {
       "locked": {
-        "lastModified": 1751211869,
-        "narHash": "sha256-1Cu92i1KSPbhPCKxoiVG5qnoRiKTgR5CcGSRyLpOd7Y=",
+        "lastModified": 1760725957,
+        "narHash": "sha256-tdoIhL/NlER290HfSjOkgi4jfmjeqmqrzgnmiMtGepE=",
         "owner": "NixOS",
         "repo": "nixpkgs",
-        "rev": "b43c397f6c213918d6cfe6e3550abfe79b5d1c51",
+        "rev": "81b927b14b7b3988334d5282ef9cba802e193fe1",
         "type": "github"
       },
       "original": {

From 6353571eb4c1812fd3f4a06fb3d5812b42676095 Mon Sep 17 00:00:00 2001
From: randomNumber101 <m.khal@outlook.de>
Date: Sat, 18 Oct 2025 17:46:37 +0200
Subject: [PATCH 143/147] Added Ilias configuration for HHU Duesseldorf to
 Congig file

Closes #125
---
 CONFIG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CONFIG.md b/CONFIG.md
index 201ddde..4bf082f 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -166,6 +166,7 @@ out of the box for the corresponding universities:
 | University      | `base_url`                              | `login_type` | `client_id`   |
 |-----------------|-----------------------------------------|--------------|---------------|
 | FH Aachen       | https://www.ili.fh-aachen.de            | local        | elearning     |
+| HHU Düsseldorf  | https://ilias.hhu.de                    | local        | UniRZ         |
 | Uni Köln        | https://www.ilias.uni-koeln.de/ilias    | local        | uk            |
 | Uni Konstanz    | https://ilias.uni-konstanz.de           | local        | ILIASKONSTANZ |
 | Uni Stuttgart   | https://ilias3.uni-stuttgart.de         | local        | Uni_Stuttgart |

From bd7b384e8f25674755c8235158117fd43a30e60f Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Mon, 27 Oct 2025 12:43:51 +0100
Subject: [PATCH 144/147] Manually set event loop on windows

The behaviour of get_event_loop changed in 3.14 and no longer creates
one. Instead, it will crash.
---
 CHANGELOG.md      | 3 +++
 PFERD/__main__.py | 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7da225b..4fef0e1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,9 @@ ambiguous situations.
 ## Added
 - Store the description when using the `internet-shortcut` link format
 
+## Fixed
+- Event loop errors on Windows with Python 3.14
+
 ## 3.8.3 - 2025-07-01
 
 ## Added
diff --git a/PFERD/__main__.py b/PFERD/__main__.py
index cb8c67c..2de9dbc 100644
--- a/PFERD/__main__.py
+++ b/PFERD/__main__.py
@@ -133,7 +133,8 @@ def main() -> None:
             # https://bugs.python.org/issue39232
             # https://github.com/encode/httpx/issues/914#issuecomment-780023632
             # TODO Fix this properly
-            loop = asyncio.get_event_loop()
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
             loop.run_until_complete(pferd.run(args.debug_transforms))
             loop.run_until_complete(asyncio.sleep(1))
             loop.close()

From 3453bbc99135f2c7af236f82c40f304ad1ab6148 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Wed, 29 Oct 2025 13:02:18 +0100
Subject: [PATCH 145/147] Add basic auth to KIT-IPD crawler

---
 CHANGELOG.md                   |  1 +
 CONFIG.md                      |  1 +
 PFERD/cli/command_kit_ipd.py   | 11 +++++++++++
 PFERD/crawl/__init__.py        |  2 +-
 PFERD/crawl/kit_ipd_crawler.py | 27 +++++++++++++++++++++++++--
 5 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4fef0e1..729299e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ ambiguous situations.
 
 ## Added
 - Store the description when using the `internet-shortcut` link format
+- Support for basic auth with the kit-ipd crawler
 
 ## Fixed
 - Event loop errors on Windows with Python 3.14
diff --git a/CONFIG.md b/CONFIG.md
index 4bf082f..b87f75c 100644
--- a/CONFIG.md
+++ b/CONFIG.md
@@ -153,6 +153,7 @@ requests is likely a good idea.
 - `link_regex`: A regex that is matched against the `href` part of links. If it
   matches, the given link is downloaded as a file. This is used to extract
   files from KIT-IPD pages. (Default: `^.*?[^/]+\.(pdf|zip|c|cpp|java)$`)
+- `auth`: Name of auth section to use for basic authentication. (Optional)
 
 ### The `ilias-web` crawler
 
diff --git a/PFERD/cli/command_kit_ipd.py b/PFERD/cli/command_kit_ipd.py
index 589d9a3..a80af03 100644
--- a/PFERD/cli/command_kit_ipd.py
+++ b/PFERD/cli/command_kit_ipd.py
@@ -20,6 +20,11 @@ GROUP.add_argument(
     metavar="REGEX",
     help="href-matching regex to identify downloadable files",
 )
+GROUP.add_argument(
+    "--basic-auth",
+    action="store_true",
+    help="enable basic authentication",
+)
 GROUP.add_argument(
     "target",
     type=str,
@@ -50,5 +55,11 @@ def load(
     if args.link_regex:
         section["link_regex"] = str(args.link_regex)
 
+    if args.basic_auth:
+        section["auth"] = "auth:kit-ipd"
+        parser["auth:kit-ipd"] = {}
+        auth_section = parser["auth:kit-ipd"]
+        auth_section["type"] = "simple"
+
 
 SUBPARSER.set_defaults(command=load)
diff --git a/PFERD/crawl/__init__.py b/PFERD/crawl/__init__.py
index 6032c97..9ba6a37 100644
--- a/PFERD/crawl/__init__.py
+++ b/PFERD/crawl/__init__.py
@@ -22,5 +22,5 @@ CRAWLERS: dict[str, CrawlerConstructor] = {
     "local": lambda n, s, c, a: LocalCrawler(n, LocalCrawlerSection(s), c),
     "ilias-web": lambda n, s, c, a: IliasWebCrawler(n, IliasWebCrawlerSection(s), c, a),
     "kit-ilias-web": lambda n, s, c, a: KitIliasWebCrawler(n, KitIliasWebCrawlerSection(s), c, a),
-    "kit-ipd": lambda n, s, c, a: KitIpdCrawler(n, KitIpdCrawlerSection(s), c),
+    "kit-ipd": lambda n, s, c, a: KitIpdCrawler(n, KitIpdCrawlerSection(s), c, a),
 }
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index 165a661..4dad8f0 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -8,8 +8,10 @@ from re import Pattern
 from typing import Any, Optional, Union, cast
 from urllib.parse import urljoin
 
+import aiohttp
 from bs4 import BeautifulSoup, Tag
 
+from ..auth import Authenticator
 from ..config import Config
 from ..logging import ProgressBar, log
 from ..output_dir import FileSink
@@ -33,6 +35,15 @@ class KitIpdCrawlerSection(HttpCrawlerSection):
         regex = self.s.get("link_regex", r"^.*?[^/]+\.(pdf|zip|c|cpp|java)$")
         return re.compile(regex)
 
+    def basic_auth(self, authenticators: dict[str, Authenticator]) -> Optional[Authenticator]:
+        value: Optional[str] = self.s.get("auth")
+        if value is None:
+            return None
+        auth = authenticators.get(value)
+        if auth is None:
+            self.invalid_value("auth", value, "No such auth section exists")
+        return auth
+
 
 @dataclass
 class KitIpdFile:
@@ -60,12 +71,19 @@ class KitIpdCrawler(HttpCrawler):
         name: str,
         section: KitIpdCrawlerSection,
         config: Config,
+        authenticators: dict[str, Authenticator],
     ):
         super().__init__(name, section, config)
         self._url = section.target()
         self._file_regex = section.link_regex()
+        self._authenticator = section.basic_auth(authenticators)
+        self._basic_auth: Optional[aiohttp.BasicAuth] = None
 
     async def _run(self) -> None:
+        if self._authenticator:
+            username, password = await self._authenticator.credentials()
+            self._basic_auth = aiohttp.BasicAuth(username, password)
+
         maybe_cl = await self.crawl(PurePath("."))
         if not maybe_cl:
             return
@@ -160,9 +178,14 @@ class KitIpdCrawler(HttpCrawler):
         return urljoin(url, cast(str, link_tag.get("href")))
 
     async def _stream_from_url(self, url: str, path: PurePath, sink: FileSink, bar: ProgressBar) -> None:
-        async with self.session.get(url, allow_redirects=False) as resp:
+        async with self.session.get(url, allow_redirects=False, auth=self._basic_auth) as resp:
             if resp.status == 403:
                 raise CrawlError("Received a 403. Are you within the KIT network/VPN?")
+            if resp.status == 401:
+                raise CrawlError("Received a 401. Do you maybe need credentials?")
+            if resp.status >= 400:
+                raise CrawlError(f"Received HTTP {resp.status} when trying to download {url!r}")
+
             if resp.content_length:
                 bar.set_total(resp.content_length)
 
@@ -175,7 +198,7 @@ class KitIpdCrawler(HttpCrawler):
             self._add_etag_to_report(path, resp.headers.get("ETag"))
 
     async def get_page(self) -> tuple[BeautifulSoup, str]:
-        async with self.session.get(self._url) as request:
+        async with self.session.get(self._url, auth=self._basic_auth) as request:
             # The web page for Algorithmen für Routenplanung contains some
             # weird comments that beautifulsoup doesn't parse correctly. This
             # hack enables those pages to be crawled, and should hopefully not

From 3f5637366e3c33af864663e559f4051ccfb5eb16 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Thu, 30 Oct 2025 20:19:30 +0100
Subject: [PATCH 146/147] Sanitize `/` in kit-ipd heading hierarchy

---
 CHANGELOG.md                           |  1 +
 PFERD/crawl/http_crawler.py            |  4 +--
 PFERD/crawl/ilias/ilias_web_crawler.py | 13 ++++-----
 PFERD/crawl/ilias/kit_ilias_html.py    | 38 ++++++++++++--------------
 PFERD/crawl/kit_ipd_crawler.py         |  6 ++--
 PFERD/utils.py                         |  4 +++
 6 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 729299e..e80f345 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -28,6 +28,7 @@ ambiguous situations.
 
 ## Fixed
 - Event loop errors on Windows with Python 3.14
+- Sanitize `/` in headings in kit-ipd crawler
 
 ## 3.8.3 - 2025-07-01
 
diff --git a/PFERD/crawl/http_crawler.py b/PFERD/crawl/http_crawler.py
index 70ec5c1..49d6013 100644
--- a/PFERD/crawl/http_crawler.py
+++ b/PFERD/crawl/http_crawler.py
@@ -13,7 +13,7 @@ from bs4 import Tag
 from ..auth import Authenticator
 from ..config import Config
 from ..logging import log
-from ..utils import fmt_real_path
+from ..utils import fmt_real_path, sanitize_path_name
 from ..version import NAME, VERSION
 from .crawler import Crawler, CrawlerSection
 
@@ -192,7 +192,7 @@ class HttpCrawler(Crawler):
             if level_heading is None:
                 return find_associated_headings(tag, level - 1)
 
-            folder_name = level_heading.get_text().strip()
+            folder_name = sanitize_path_name(level_heading.get_text().strip())
             return find_associated_headings(level_heading, level - 1) / folder_name
 
         # start at level <h3> because paragraph-level headings are usually too granular for folder names
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index 12d8700..fda9f6d 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -15,7 +15,7 @@ from ...auth import Authenticator
 from ...config import Config
 from ...logging import ProgressBar, log
 from ...output_dir import FileSink, Redownload
-from ...utils import fmt_path, soupify, url_set_query_param
+from ...utils import fmt_path, sanitize_path_name, soupify, url_set_query_param
 from ..crawler import CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
 from ..http_crawler import HttpCrawler, HttpCrawlerSection
 from .async_helper import _iorepeat
@@ -28,7 +28,6 @@ from .kit_ilias_html import (
     IliasPage,
     IliasPageElement,
     IliasSoup,
-    _sanitize_path_name,
     parse_ilias_forum_export,
 )
 from .shibboleth_login import ShibbolethLogin
@@ -505,7 +504,7 @@ instance's greatest bottleneck.
 
         async def download_all() -> None:
             for link in links:
-                path = cl.path / (_sanitize_path_name(link.name) + extension)
+                path = cl.path / (sanitize_path_name(link.name) + extension)
                 if dl := await self.download(path, mtime=element.mtime):
                     await self._download_link(self._links, element.name, [link], dl)
 
@@ -843,7 +842,7 @@ instance's greatest bottleneck.
     async def _download_forum_thread(
         self, parent_path: PurePath, thread: IliasForumThread | IliasPageElement, forum_url: str
     ) -> None:
-        path = parent_path / (_sanitize_path_name(thread.name) + ".html")
+        path = parent_path / (sanitize_path_name(thread.name) + ".html")
         maybe_dl = await self.download(path, mtime=thread.mtime)
         if not maybe_dl or not isinstance(thread, IliasForumThread):
             return
@@ -936,7 +935,7 @@ instance's greatest bottleneck.
         prev: Optional[str],
         next: Optional[str],
     ) -> None:
-        path = parent_path / (_sanitize_path_name(element.title) + ".html")
+        path = parent_path / (sanitize_path_name(element.title) + ".html")
         maybe_dl = await self.download(path)
         if not maybe_dl:
             return
@@ -945,10 +944,10 @@ instance's greatest bottleneck.
             return
 
         if prev:
-            prev_p = self._transformer.transform(parent_path / (_sanitize_path_name(prev) + ".html"))
+            prev_p = self._transformer.transform(parent_path / (sanitize_path_name(prev) + ".html"))
             prev = os.path.relpath(prev_p, my_path.parent) if prev_p else None
         if next:
-            next_p = self._transformer.transform(parent_path / (_sanitize_path_name(next) + ".html"))
+            next_p = self._transformer.transform(parent_path / (sanitize_path_name(next) + ".html"))
             next = os.path.relpath(next_p, my_path.parent) if next_p else None
 
         async with maybe_dl as (bar, sink):
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index db965b0..e23469c 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -12,7 +12,7 @@ from bs4 import BeautifulSoup, Tag
 from PFERD.crawl import CrawlError
 from PFERD.crawl.crawler import CrawlWarning
 from PFERD.logging import log
-from PFERD.utils import url_set_query_params
+from PFERD.utils import sanitize_path_name, url_set_query_params
 
 TargetType = str | int
 
@@ -297,7 +297,7 @@ class IliasPageElement:
             name = normalized
 
         if not skip_sanitize:
-            name = _sanitize_path_name(name)
+            name = sanitize_path_name(name)
 
         return IliasPageElement(typ, url, name, mtime, description)
 
@@ -695,7 +695,7 @@ class IliasPage:
                 log.explain(f"Skipping offline item: {title.get_text().strip()!r}")
                 continue
 
-            name = _sanitize_path_name(link.text.strip())
+            name = sanitize_path_name(link.text.strip())
             url = self._abs_url_from_link(link)
 
             if "cmd=manage" in url and "cmdClass=ilPDSelectedItemsBlockGUI" in url:
@@ -723,7 +723,7 @@ class IliasPage:
         for link in links:
             url = self._abs_url_from_link(link)
             name = re.sub(r"\([\d,.]+ [MK]B\)", "", link.get_text()).strip().replace("\t", "")
-            name = _sanitize_path_name(name)
+            name = sanitize_path_name(name)
 
             if "file_id" not in url:
                 _unexpected_html_warning()
@@ -745,7 +745,7 @@ class IliasPage:
                 continue
             items.append(
                 IliasPageElement.create_new(
-                    IliasElementType.FILE, self._abs_url_from_link(link), _sanitize_path_name(link.get_text())
+                    IliasElementType.FILE, self._abs_url_from_link(link), sanitize_path_name(link.get_text())
                 )
             )
 
@@ -837,7 +837,7 @@ class IliasPage:
         title = cast(Tag, row.select_one("td.std:nth-child(3)")).get_text().strip()
         title += ".mp4"
 
-        video_name: str = _sanitize_path_name(title)
+        video_name: str = sanitize_path_name(title)
 
         video_url = self._abs_url_from_link(link)
 
@@ -893,7 +893,7 @@ class IliasPage:
                 _unexpected_html_warning()
                 continue
 
-            name = _sanitize_path_name(name_tag.get_text().strip())
+            name = sanitize_path_name(name_tag.get_text().strip())
             log.explain(f"Found exercise detail entry {name!r}")
 
             results.append(
@@ -920,7 +920,7 @@ class IliasPage:
             parent_row: Tag = cast(Tag, link.find_parent("tr"))
             children = cast(list[Tag], parent_row.find_all("td"))
 
-            name = _sanitize_path_name(children[1].get_text().strip())
+            name = sanitize_path_name(children[1].get_text().strip())
             log.explain(f"Found exercise file entry {name!r}")
 
             date = None
@@ -957,7 +957,7 @@ class IliasPage:
             if "ass_id=" not in href or "cmdclass=ilassignmentpresentationgui" not in href.lower():
                 continue
 
-            name = _sanitize_path_name(exercise.get_text().strip())
+            name = sanitize_path_name(exercise.get_text().strip())
             results.append(
                 IliasPageElement.create_new(
                     IliasElementType.EXERCISE, self._abs_url_from_link(exercise), name
@@ -983,12 +983,12 @@ class IliasPage:
         for link in links:
             abs_url = self._abs_url_from_link(link)
             # Make sure parents are sanitized. We do not want accidental parents
-            parents = [_sanitize_path_name(x) for x in IliasPage._find_upwards_folder_hierarchy(link)]
+            parents = [sanitize_path_name(x) for x in IliasPage._find_upwards_folder_hierarchy(link)]
 
             if parents:
-                element_name = "/".join(parents) + "/" + _sanitize_path_name(link.get_text())
+                element_name = "/".join(parents) + "/" + sanitize_path_name(link.get_text())
             else:
-                element_name = _sanitize_path_name(link.get_text())
+                element_name = sanitize_path_name(link.get_text())
 
             element_type = IliasPage._find_type_for_element(
                 element_name, abs_url, lambda: IliasPage._find_icon_for_folder_entry(link)
@@ -1053,7 +1053,7 @@ class IliasPage:
                         IliasPageElement.create_new(
                             typ=IliasElementType.MEDIACAST_VIDEO,
                             url=self._abs_url_from_relative(cast(str, url)),
-                            name=_sanitize_path_name(title),
+                            name=sanitize_path_name(title),
                         )
                     )
 
@@ -1081,7 +1081,7 @@ class IliasPage:
 
             videos.append(
                 IliasPageElement.create_new(
-                    typ=IliasElementType.MOB_VIDEO, url=url, name=_sanitize_path_name(title), mtime=None
+                    typ=IliasElementType.MOB_VIDEO, url=url, name=sanitize_path_name(title), mtime=None
                 )
             )
 
@@ -1192,7 +1192,7 @@ class IliasPage:
             )
             found_titles.append(head_tag.get_text().strip())
 
-        return [_sanitize_path_name(x) for x in reversed(found_titles)]
+        return [sanitize_path_name(x) for x in reversed(found_titles)]
 
     @staticmethod
     def _find_link_description(link: Tag) -> Optional[str]:
@@ -1247,7 +1247,7 @@ class IliasPage:
 
         for title in card_titles:
             url = self._abs_url_from_link(title)
-            name = _sanitize_path_name(title.get_text().strip())
+            name = sanitize_path_name(title.get_text().strip())
             typ = IliasPage._find_type_for_element(name, url, lambda: IliasPage._find_icon_from_card(title))
 
             if not typ:
@@ -1274,7 +1274,7 @@ class IliasPage:
                 log.warn_contd(f"Could not find click handler target for signal {signal} for {button}")
                 continue
             url = self._abs_url_from_relative(open_match.group(1))
-            name = _sanitize_path_name(button.get_text().strip())
+            name = sanitize_path_name(button.get_text().strip())
             typ = IliasPage._find_type_for_element(name, url, lambda: IliasPage._find_icon_from_card(button))
             caption_parent = cast(
                 Tag,
@@ -1532,10 +1532,6 @@ def _tomorrow() -> date:
     return date.today() + timedelta(days=1)
 
 
-def _sanitize_path_name(name: str) -> str:
-    return name.replace("/", "-").replace("\\", "-").strip()
-
-
 def parse_ilias_forum_export(forum_export: BeautifulSoup) -> list[IliasForumThread]:
     elements = []
     for p in forum_export.select("body > p"):
diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py
index 4dad8f0..7094b9c 100644
--- a/PFERD/crawl/kit_ipd_crawler.py
+++ b/PFERD/crawl/kit_ipd_crawler.py
@@ -15,7 +15,7 @@ from ..auth import Authenticator
 from ..config import Config
 from ..logging import ProgressBar, log
 from ..output_dir import FileSink
-from ..utils import soupify
+from ..utils import sanitize_path_name, soupify
 from .crawler import CrawlError
 from .http_crawler import HttpCrawler, HttpCrawlerSection
 
@@ -106,7 +106,7 @@ class KitIpdCrawler(HttpCrawler):
         await self.gather(tasks)
 
     async def _crawl_folder(self, parent: PurePath, folder: KitIpdFolder) -> None:
-        path = parent / folder.name
+        path = parent / sanitize_path_name(folder.name)
         if not await self.crawl(path):
             return
 
@@ -125,7 +125,7 @@ class KitIpdCrawler(HttpCrawler):
     async def _download_file(
         self, parent: PurePath, file: KitIpdFile, etag: Optional[str], mtime: Optional[datetime]
     ) -> None:
-        element_path = parent / file.name
+        element_path = parent / sanitize_path_name(file.name)
 
         prev_etag = self._get_previous_etag_from_report(element_path)
         etag_differs = None if prev_etag is None else prev_etag != etag
diff --git a/PFERD/utils.py b/PFERD/utils.py
index 918a9b6..1aa0585 100644
--- a/PFERD/utils.py
+++ b/PFERD/utils.py
@@ -106,6 +106,10 @@ def fmt_real_path(path: Path) -> str:
     return repr(str(path.absolute()))
 
 
+def sanitize_path_name(name: str) -> str:
+    return name.replace("/", "-").replace("\\", "-").strip()
+
+
 class ReusableAsyncContextManager(ABC, Generic[T]):
     def __init__(self) -> None:
         self._active = False

From e246053de22c54b42df0885082b687b362ce7678 Mon Sep 17 00:00:00 2001
From: I-Al-Istannen <i-al-istannen@users.noreply.github.com>
Date: Tue, 2 Dec 2025 16:33:04 +0100
Subject: [PATCH 147/147] Crawl the info tab of courses again

This got lost in a refactor
---
 CHANGELOG.md                           | 1 +
 PFERD/crawl/ilias/ilias_web_crawler.py | 2 ++
 PFERD/crawl/ilias/kit_ilias_html.py    | 6 ++++--
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e80f345..2a2848c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ ambiguous situations.
 ## Fixed
 - Event loop errors on Windows with Python 3.14
 - Sanitize `/` in headings in kit-ipd crawler
+- Crawl info tab again
 
 ## 3.8.3 - 2025-07-01
 
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index fda9f6d..b5041b3 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -297,6 +297,8 @@ instance's greatest bottleneck.
 
                 page = cast(IliasPage, page)
                 elements.extend(page.get_child_elements())
+                if current_element is None and (info_tab := page.get_info_tab()):
+                    elements.append(info_tab)
                 if description_string := page.get_description():
                     description.append(description_string)
 
diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py
index e23469c..5966141 100644
--- a/PFERD/crawl/ilias/kit_ilias_html.py
+++ b/PFERD/crawl/ilias/kit_ilias_html.py
@@ -739,9 +739,10 @@ class IliasPage:
         links: list[Tag] = self._soup.select("a.il_ContainerItemCommand")
 
         for link in links:
-            if "cmdClass=ilobjcoursegui" not in link["href"]:
+            log.explain(f"Found info tab link: {self._abs_url_from_link(link)}")
+            if "cmdclass=ilobjcoursegui" not in cast(str, link["href"]).lower():
                 continue
-            if "cmd=sendfile" not in link["href"]:
+            if "cmd=sendfile" not in cast(str, link["href"]).lower():
                 continue
             items.append(
                 IliasPageElement.create_new(
@@ -749,6 +750,7 @@ class IliasPage:
                 )
             )
 
+        log.explain(f"Found {len(items)} info tab entries {items}")
         return items
 
     def _find_opencast_video_entries(self) -> list[IliasPageElement]: