From 3eab236b99872af3817486ca21c24f5cbd3073d3 Mon Sep 17 00:00:00 2001
From: be7a <bela.stoyan@gmail.com>
Date: Wed, 28 Apr 2021 23:58:21 +0200
Subject: [PATCH] update sync_url.py and fix event_loop housekeeping

---
 PFERD/ilias/crawler.py | 20 +++++++--------
 PFERD/pferd.py         | 56 ++++++++++++++++++++++++++++++++++++++++--
 sync_url.py            | 25 ++++++++++++++-----
 3 files changed, 83 insertions(+), 18 deletions(-)

diff --git a/PFERD/ilias/crawler.py b/PFERD/ilias/crawler.py
index 66e876d..1f89e07 100644
--- a/PFERD/ilias/crawler.py
+++ b/PFERD/ilias/crawler.py
@@ -136,6 +136,10 @@ class IliasCrawler:
         self._authenticator = authenticator
         self.dir_filter = dir_filter
 
+        # Setup authentication locks
+        self.auth_event = asyncio.Event()
+        self.auth_lock = asyncio.Lock()
+
     @staticmethod
     def _url_set_query_param(url: str, param: str, value: str) -> str:
         """
@@ -188,7 +192,7 @@ class IliasCrawler:
         """
         Returns the name of the element at the given URL, if it can find one.
         """
-        focus_element: bs4.Tag = await self._get_page(url, {}).find(
+        focus_element: bs4.Tag = (await self._get_page(url, {})).find(
             id="il_mhead_t_focus"
         )
         if not focus_element:
@@ -274,10 +278,6 @@ class IliasCrawler:
     ):
         crawl_queue = asyncio.Queue()
 
-        # Setup authentication locks
-        self._auth_event = asyncio.Event()
-        self._auth_lock = asyncio.Lock()
-
         for entry in entries:
             crawl_queue.put_nowait(entry)
 
@@ -784,16 +784,16 @@ class IliasCrawler:
         if self._is_logged_in(soup):
             return soup
 
-        if self._auth_lock.locked():
+        if self.auth_lock.locked():
             # Some other future is already logging in
             await self._auth_event.wait()
         else:
-            await self._auth_lock.acquire()
-            self._auth_event.clear()
+            await self.auth_lock.acquire()
+            self.auth_event.clear()
             LOGGER.info("Not authenticated, changing that...")
             await self._authenticator.authenticate(self._client)
-            self._auth_event.set()
-            self._auth_lock.release()
+            self.auth_event.set()
+            self.auth_lock.release()
 
         return await self._get_page(
             url,
diff --git a/PFERD/pferd.py b/PFERD/pferd.py
index a82f069..94ab93b 100644
--- a/PFERD/pferd.py
+++ b/PFERD/pferd.py
@@ -94,6 +94,11 @@ class Pferd(Location):
         self._test_run = test_run
         self._ilias_targets: List[IliasTarget] = []
 
+        # Initiate event loop
+        # This is needed, because ILIASCrawler sets up syncronization primitives
+        # which are tied to the event loop, so it shouldnt cant change
+        self._loop = asyncio.get_event_loop()
+
     @staticmethod
     def enable_logging() -> None:
         """
@@ -188,7 +193,7 @@ class Pferd(Location):
         )
         self._ilias_targets.append(target)
 
-    def add_ilias_folder(
+    def add_ilias_course(
         self,
         ilias: IliasSycronizer,
         target: PathLike,
@@ -234,6 +239,53 @@ class Pferd(Location):
         )
         self._ilias_targets.append(target)
 
+
+    def add_ilias_folder(
+            self,
+            ilias: IliasSycronizer,
+            target: PathLike,
+            full_url: str,
+            transform: Transform = lambda x: x,
+            download_strategy: IliasDownloadStrategy = download_modified_or_new,
+            clean: bool = True,
+            timeout: int = 5,
+            file_conflict_resolver: FileConflictResolver = resolve_prompt_user
+    ) -> Organizer:
+        """
+        Synchronizes a folder with a given folder on the given ILIAS instance.
+        Arguments:
+            ilias {IliasSycronizer} -- the ILIAS Instance
+            target {Path}  -- the target path to write the data to
+            full_url {str} -- the full url of the folder/videos/course to crawl
+        Keyword Arguments:
+            transform {Transform} -- A transformation function for the output paths. Return None
+                to ignore a file. (default: {lambdax:x})
+            download_strategy {DownloadStrategy} -- A function to determine which files need to
+                be downloaded. Can save bandwidth and reduce the number of requests.
+                (default: {download_modified_or_new})
+            clean {bool} -- Whether to clean up when the method finishes.
+            timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
+                requests bug.
+            file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
+                with overwriting or deleting files. The default always asks the user.
+        """
+        PRETTY.starting_synchronizer(target, "ILIAS", "An ILIAS element by url")
+
+
+        results = ilias.add_target(
+            lambda crawler: crawler.recursive_crawl_url(full_url),
+        )
+        target = IliasTarget(
+            results,
+            target,
+            transform,
+            download_strategy,
+            clean,
+            timeout,
+            file_conflict_resolver,
+        )
+        self._ilias_targets.append(target)
+
     async def _syncronize_ilias(self, ilias: IliasSycronizer):
         await ilias.syncronize()
 
@@ -279,7 +331,7 @@ class Pferd(Location):
         Arguments:
             ilias {IliasSycronizer} -- the ILIAS Instance
         """
-        asyncio.run(self._syncronize_ilias(ilias))
+        self._loop.run_until_complete(self._syncronize_ilias(ilias))
 
     def print_summary(self) -> None:
         """
diff --git a/sync_url.py b/sync_url.py
index 2ccbc95..9252776 100755
--- a/sync_url.py
+++ b/sync_url.py
@@ -4,6 +4,7 @@
 A simple script to download a course by name from ILIAS.
 """
 
+import asyncio
 import argparse
 import logging
 import sys
@@ -86,7 +87,11 @@ def main() -> None:
     args = parser.parse_args()
 
     cookie_jar = CookieJar(to_path(args.cookies) if args.cookies else None)
-    client = cookie_jar.create_client()
+    client = cookie_jar.create_async_client()
+
+    if not args.url.startswith("https://ilias.studium.kit.edu"):
+        _PRETTY.error("Not a valid KIT ILIAS URL")
+        return
 
     if args.keyring:
         if not args.username:
@@ -103,13 +108,14 @@ def main() -> None:
 
     url = urlparse(args.url)
 
+    loop = asyncio.get_event_loop()
     crawler = IliasCrawler(url.scheme + '://' + url.netloc, client,
                            authenticator, lambda x, y: True)
 
     cookie_jar.load_cookies()
 
     if args.folder is None:
-        element_name = crawler.find_element_name(args.url)
+        element_name = loop.run_until_complete(crawler.find_element_name(args.url))
         if not element_name:
             print("Error, could not get element name. Please specify a folder yourself.")
             return
@@ -142,17 +148,24 @@ def main() -> None:
     pferd.enable_logging()
 
     # fetch
-    pferd.ilias_kit_folder(
-        target=target,
-        full_url=args.url,
-        cookies=args.cookies,
+
+    ilias = pferd.ilias_kit(
         dir_filter=dir_filter,
+        cookies=args.cookies,
         username=username,
         password=password,
+    )
+
+    pferd.add_ilias_folder(
+        ilias=ilias,
+        target=target,
+        full_url=args.url,
         file_conflict_resolver=file_confilict_resolver,
         transform=sanitize_windows_path
     )
 
+    pferd.syncronize_ilias(ilias)
+
     pferd.print_summary()