From 998abc3afd10b7ae387d5a4b1728c17fb047a362 Mon Sep 17 00:00:00 2001
From: PinieP <59698589+PinieP@users.noreply.github.com>
Date: Sat, 2 Nov 2024 21:54:57 +0100
Subject: [PATCH] Fix line wrapping

---
 PFERD/crawl/ilias/ilias_web_crawler.py | 87 +++++++++-----------------
 PFERD/crawl/ilias/shibboleth_login.py  |  3 +-
 2 files changed, 30 insertions(+), 60 deletions(-)
diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py
index fe94d39..e6887c3 100644
--- a/PFERD/crawl/ilias/ilias_web_crawler.py
+++ b/PFERD/crawl/ilias/ilias_web_crawler.py
@@ -53,8 +53,7 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
                 self.missing_value("client_id")
             return LoginTypeLocal(client_id)
 
-        self.invalid_value("login_type", login_type,
-                           "Should be <shibboleth | local>")
+        self.invalid_value("login_type", login_type, "Should be <shibboleth | local>")
 
     def tfa_auth(
         self, authenticators: Dict[str, Authenticator]
@@ -64,8 +63,7 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
             return None
         auth = authenticators.get(value)
         if auth is None:
-            self.invalid_value("tfa_auth", value,
-                               "No such auth section exists")
+            self.invalid_value("tfa_auth", value, "No such auth section exists")
         return auth
 
     def target(self) -> TargetType:
@@ -83,8 +81,7 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
             # URL
             return target
 
-        self.invalid_value(
-            "target", target, "Should be <course id | desktop | kit ilias URL>")
+        self.invalid_value("target", target, "Should be <course id | desktop | kit ilias URL>")
 
     def links(self) -> Links:
         type_str: Optional[str] = self.s.get("links")
@@ -200,8 +197,7 @@ instance's greatest bottleneck.
 
     async def _run(self) -> None:
         if isinstance(self._target, int):
-            log.explain_topic(
-                f"Inferred crawl target: Course with id {self._target}")
+            log.explain_topic(f"Inferred crawl target: Course with id {self._target}")
             await self._crawl_course(self._target)
         elif self._target == "desktop":
             log.explain_topic("Inferred crawl target: Personal desktop")
@@ -400,8 +396,7 @@ instance's greatest bottleneck.
             return None
         else:
             log.explain("Answer: Yes")
-        element_path = element_path.with_name(
-            element_path.name + link_extension)
+        element_path = element_path.with_name(element_path.name + link_extension)
 
         maybe_dl = await self.download(element_path, mtime=element.mtime)
         if not maybe_dl:
@@ -413,11 +408,9 @@ instance's greatest bottleneck.
     @_iorepeat(3, "resolving link")
     async def _download_link(self, element: IliasPageElement, link_template: str, dl: DownloadToken) -> None:
         async with dl as (bar, sink):
-            export_url = element.url.replace(
-                "cmd=calldirectlink", "cmd=exportHTML")
+            export_url = element.url.replace("cmd=calldirectlink", "cmd=exportHTML")
             real_url = await self._resolve_link_target(export_url)
-            self._write_link_content(
-                link_template, real_url, element.name, element.description, sink)
+            self._write_link_content(link_template, real_url, element.name, element.description, sink)
 
     def _write_link_content(
         self,
@@ -431,8 +424,7 @@ instance's greatest bottleneck.
         content = content.replace("{{link}}", url)
         content = content.replace("{{name}}", name)
         content = content.replace("{{description}}", str(description))
-        content = content.replace(
-            "{{redirect_delay}}", str(self._link_file_redirect_delay))
+        content = content.replace("{{redirect_delay}}", str(self._link_file_redirect_delay))
         sink.file.write(content.encode("utf-8"))
         sink.done()
 
@@ -451,8 +443,7 @@ instance's greatest bottleneck.
             return None
         else:
             log.explain("Answer: Yes")
-        element_path = element_path.with_name(
-            element_path.name + link_extension)
+        element_path = element_path.with_name(element_path.name + link_extension)
 
         maybe_dl = await self.download(element_path, mtime=element.mtime)
         if not maybe_dl:
@@ -485,8 +476,7 @@ instance's greatest bottleneck.
         dl: DownloadToken,
     ) -> None:
         async with dl as (bar, sink):
-            self._write_link_content(
-                link_template, element.url, element.name, element.description, sink)
+            self._write_link_content(link_template, element.url, element.name, element.description, sink)
 
     async def _resolve_link_target(self, export_url: str) -> str:
         async def impl() -> Optional[str]:
@@ -510,8 +500,7 @@ instance's greatest bottleneck.
         if target is not None:
             return target
 
-        raise CrawlError(
-            "resolve_link_target failed even after authenticating")
+        raise CrawlError("resolve_link_target failed even after authenticating")
 
     async def _handle_opencast_video(
         self,
@@ -522,8 +511,7 @@ instance's greatest bottleneck.
         if self.prev_report:
             self.report.add_custom_value(
                 _get_video_cache_key(element),
-                self.prev_report.get_custom_value(
-                    _get_video_cache_key(element))
+                self.prev_report.get_custom_value(_get_video_cache_key(element))
             )
 
         # A video might contain other videos, so let's "crawl" the video first
@@ -544,8 +532,7 @@ instance's greatest bottleneck.
             # Mark all existing videos as known to ensure they do not get deleted during cleanup.
             # We "downloaded" them, just without actually making a network request as we assumed
             # they did not change.
-            contained = self._previous_contained_opencast_videos(
-                element, maybe_dl.path)
+            contained = self._previous_contained_opencast_videos(element, maybe_dl.path)
             if len(contained) > 1:
                 # Only do this if we threw away the original dl token,
                 # to not download single-stream videos twice
@@ -561,8 +548,7 @@ instance's greatest bottleneck.
     ) -> List[PurePath]:
         if not self.prev_report:
             return []
-        custom_value = self.prev_report.get_custom_value(
-            _get_video_cache_key(element))
+        custom_value = self.prev_report.get_custom_value(_get_video_cache_key(element))
         if not custom_value:
             return []
         cached_value = cast(dict[str, Any], custom_value)
@@ -584,11 +570,9 @@ instance's greatest bottleneck.
                 f"The following contained videos are known: {','.join(map(fmt_path, contained_videos))}"
             )
             if all(self._output_dir.resolve(path).exists() for path in contained_videos):
-                log.explain(
-                    "Found all known videos locally, skipping enumeration request")
+                log.explain("Found all known videos locally, skipping enumeration request")
                 return True
-            log.explain(
-                "Missing at least one video, continuing with requests!")
+            log.explain("Missing at least one video, continuing with requests!")
         else:
             log.explain("No local cache present")
         return False
@@ -599,8 +583,7 @@ instance's greatest bottleneck.
         def add_to_report(paths: list[str]) -> None:
             self.report.add_custom_value(
                 _get_video_cache_key(element),
-                {"known_paths": paths, "own_path": str(
-                    self._transformer.transform(dl.path))}
+                {"known_paths": paths, "own_path": str(self._transformer.transform(dl.path))}
             )
 
         async with dl as (bar, sink):
@@ -628,8 +611,7 @@ instance's greatest bottleneck.
                 continue
             async with maybe_dl as (bar, sink):
                 log.explain(f"Streaming video from real url {stream_element.url}")
-                contained_video_paths.append(
-                    str(self._transformer.transform(maybe_dl.path)))
+                contained_video_paths.append(str(self._transformer.transform(maybe_dl.path)))
                 await self._stream_from_url(stream_element.url, sink, bar, is_video=True)
 
         add_to_report(contained_video_paths)
@@ -749,8 +731,7 @@ instance's greatest bottleneck.
 
         tasks: List[Awaitable[None]] = []
         for elem in elements:
-            tasks.append(asyncio.create_task(
-                self._download_forum_thread(cl.path, elem)))
+            tasks.append(asyncio.create_task(self._download_forum_thread(cl.path, elem)))
 
         # And execute them
         await self.gather(tasks)
@@ -811,11 +792,9 @@ instance's greatest bottleneck.
         tasks: List[Awaitable[None]] = []
         for index, elem in enumerate(elements):
             prev_url = elements[index - 1].title if index > 0 else None
-            next_url = elements[index +
-                                1].title if index < len(elements) - 1 else None
+            next_url = elements[index + 1].title if index < len(elements) - 1 else None
             tasks.append(asyncio.create_task(
-                self._download_learning_module_page(
-                    cl.path, elem, prev_url, next_url)
+                self._download_learning_module_page(cl.path, elem, prev_url, next_url)
             ))
 
         # And execute them
@@ -868,15 +847,13 @@ instance's greatest bottleneck.
             return
 
         if prev:
-            prev_p = self._transformer.transform(
-                parent_path / (_sanitize_path_name(prev) + ".html"))
+            prev_p = self._transformer.transform(parent_path / (_sanitize_path_name(prev) + ".html"))
             if prev_p:
                 prev = os.path.relpath(prev_p, my_path.parent)
             else:
                 prev = None
         if next:
-            next_p = self._transformer.transform(
-                parent_path / (_sanitize_path_name(next) + ".html"))
+            next_p = self._transformer.transform(parent_path / (_sanitize_path_name(next) + ".html"))
             if next_p:
                 next = os.path.relpath(next_p, my_path.parent)
             else:
@@ -885,8 +862,7 @@ instance's greatest bottleneck.
         async with maybe_dl as (bar, sink):
             content = element.content
             content = await self.internalize_images(content)
-            sink.file.write(learning_module_template(
-                content, maybe_dl.path.name, prev, next).encode("utf-8"))
+            sink.file.write(learning_module_template(content, maybe_dl.path.name, prev, next).encode("utf-8"))
             sink.done()
 
     async def internalize_images(self, tag: Tag) -> Tag:
@@ -904,8 +880,7 @@ instance's greatest bottleneck.
                         continue
                     log.explain(f"Internalizing {url!r}")
                     img = await self._get_authenticated(url)
-                    elem.attrs["src"] = "data:;base64," + \
-                        base64.b64encode(img).decode()
+                    elem.attrs["src"] = "data:;base64," + base64.b64encode(img).decode()
             if elem.name == "iframe" and elem.attrs.get("src", "").startswith("//"):
                 # For unknown reasons the protocol seems to be stripped.
                 elem.attrs["src"] = "https:" + elem.attrs["src"]
@@ -935,8 +910,7 @@ instance's greatest bottleneck.
             soup = soupify(await request.read())
             if IliasPage.is_logged_in(soup):
                 return self._verify_page(soup, url, root_page_allowed)
-        raise CrawlError(
-            f"get_page failed even after authenticating on {url!r}")
+        raise CrawlError(f"get_page failed even after authenticating on {url!r}")
 
     @staticmethod
     def _verify_page(soup: BeautifulSoup, url: str, root_page_allowed: bool) -> BeautifulSoup:
@@ -1007,13 +981,11 @@ instance's greatest bottleneck.
 
             login_form = login_page.find("form", attrs={"name": "formlogin"})
             if login_form is None:
-                raise CrawlError(
-                    "Could not find the login form! Specified client id might be invalid.")
+                raise CrawlError("Could not find the login form! Specified client id might be invalid.")
 
             login_url = login_form.attrs.get("action")
             if login_url is None:
-                raise CrawlError(
-                    "Could not find the action URL in the login form!")
+                raise CrawlError("Could not find the action URL in the login form!")
 
             username, password = await self._auth.credentials()
 
@@ -1034,8 +1006,7 @@ instance's greatest bottleneck.
         # Normal ILIAS pages
         mainbar: Optional[Tag] = soup.find(class_="il-maincontrols-metabar")
         if mainbar is not None:
-            login_button = mainbar.find(
-                attrs={"href": lambda x: x and "login.php" in x})
+            login_button = mainbar.find(attrs={"href": lambda x: x and "login.php" in x})
             shib_login = soup.find(id="button_shib_login")
             return not login_button and not shib_login
 
diff --git a/PFERD/crawl/ilias/shibboleth_login.py b/PFERD/crawl/ilias/shibboleth_login.py
index 42257bd..28f23c2 100644
--- a/PFERD/crawl/ilias/shibboleth_login.py
+++ b/PFERD/crawl/ilias/shibboleth_login.py
@@ -48,8 +48,7 @@ class ShibbolethLogin:
         while not self._login_successful(soup):
             # Searching the form here so that this fails before asking for
             # credentials rather than after asking.
-            form = soup.find(
-                "form", {"method": "post"})
+            form = soup.find("form", {"method": "post"})
             action = form["action"]
 
             # Equivalent: Enter credentials in