mirror of
https://github.com/Garmelon/PFERD.git
synced 2026-04-13 07:55:05 +02:00
update sync_url.py and fix event_loop housekeeping
This commit is contained in:
parent
2d6be9f5c1
commit
3eab236b99
3 changed files with 83 additions and 18 deletions
|
|
@ -136,6 +136,10 @@ class IliasCrawler:
|
||||||
self._authenticator = authenticator
|
self._authenticator = authenticator
|
||||||
self.dir_filter = dir_filter
|
self.dir_filter = dir_filter
|
||||||
|
|
||||||
|
# Setup authentication locks
|
||||||
|
self.auth_event = asyncio.Event()
|
||||||
|
self.auth_lock = asyncio.Lock()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _url_set_query_param(url: str, param: str, value: str) -> str:
|
def _url_set_query_param(url: str, param: str, value: str) -> str:
|
||||||
"""
|
"""
|
||||||
|
|
@ -188,7 +192,7 @@ class IliasCrawler:
|
||||||
"""
|
"""
|
||||||
Returns the name of the element at the given URL, if it can find one.
|
Returns the name of the element at the given URL, if it can find one.
|
||||||
"""
|
"""
|
||||||
focus_element: bs4.Tag = await self._get_page(url, {}).find(
|
focus_element: bs4.Tag = (await self._get_page(url, {})).find(
|
||||||
id="il_mhead_t_focus"
|
id="il_mhead_t_focus"
|
||||||
)
|
)
|
||||||
if not focus_element:
|
if not focus_element:
|
||||||
|
|
@ -274,10 +278,6 @@ class IliasCrawler:
|
||||||
):
|
):
|
||||||
crawl_queue = asyncio.Queue()
|
crawl_queue = asyncio.Queue()
|
||||||
|
|
||||||
# Setup authentication locks
|
|
||||||
self._auth_event = asyncio.Event()
|
|
||||||
self._auth_lock = asyncio.Lock()
|
|
||||||
|
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
crawl_queue.put_nowait(entry)
|
crawl_queue.put_nowait(entry)
|
||||||
|
|
||||||
|
|
@ -784,16 +784,16 @@ class IliasCrawler:
|
||||||
if self._is_logged_in(soup):
|
if self._is_logged_in(soup):
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
if self._auth_lock.locked():
|
if self.auth_lock.locked():
|
||||||
# Some other future is already logging in
|
# Some other future is already logging in
|
||||||
await self._auth_event.wait()
|
await self._auth_event.wait()
|
||||||
else:
|
else:
|
||||||
await self._auth_lock.acquire()
|
await self.auth_lock.acquire()
|
||||||
self._auth_event.clear()
|
self.auth_event.clear()
|
||||||
LOGGER.info("Not authenticated, changing that...")
|
LOGGER.info("Not authenticated, changing that...")
|
||||||
await self._authenticator.authenticate(self._client)
|
await self._authenticator.authenticate(self._client)
|
||||||
self._auth_event.set()
|
self.auth_event.set()
|
||||||
self._auth_lock.release()
|
self.auth_lock.release()
|
||||||
|
|
||||||
return await self._get_page(
|
return await self._get_page(
|
||||||
url,
|
url,
|
||||||
|
|
|
||||||
|
|
@ -94,6 +94,11 @@ class Pferd(Location):
|
||||||
self._test_run = test_run
|
self._test_run = test_run
|
||||||
self._ilias_targets: List[IliasTarget] = []
|
self._ilias_targets: List[IliasTarget] = []
|
||||||
|
|
||||||
|
# Initiate event loop
|
||||||
|
# This is needed, because ILIASCrawler sets up syncronization primitives
|
||||||
|
# which are tied to the event loop, so it shouldnt cant change
|
||||||
|
self._loop = asyncio.get_event_loop()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def enable_logging() -> None:
|
def enable_logging() -> None:
|
||||||
"""
|
"""
|
||||||
|
|
@ -188,7 +193,7 @@ class Pferd(Location):
|
||||||
)
|
)
|
||||||
self._ilias_targets.append(target)
|
self._ilias_targets.append(target)
|
||||||
|
|
||||||
def add_ilias_folder(
|
def add_ilias_course(
|
||||||
self,
|
self,
|
||||||
ilias: IliasSycronizer,
|
ilias: IliasSycronizer,
|
||||||
target: PathLike,
|
target: PathLike,
|
||||||
|
|
@ -234,6 +239,53 @@ class Pferd(Location):
|
||||||
)
|
)
|
||||||
self._ilias_targets.append(target)
|
self._ilias_targets.append(target)
|
||||||
|
|
||||||
|
|
||||||
|
def add_ilias_folder(
|
||||||
|
self,
|
||||||
|
ilias: IliasSycronizer,
|
||||||
|
target: PathLike,
|
||||||
|
full_url: str,
|
||||||
|
transform: Transform = lambda x: x,
|
||||||
|
download_strategy: IliasDownloadStrategy = download_modified_or_new,
|
||||||
|
clean: bool = True,
|
||||||
|
timeout: int = 5,
|
||||||
|
file_conflict_resolver: FileConflictResolver = resolve_prompt_user
|
||||||
|
) -> Organizer:
|
||||||
|
"""
|
||||||
|
Synchronizes a folder with a given folder on the given ILIAS instance.
|
||||||
|
Arguments:
|
||||||
|
ilias {IliasSycronizer} -- the ILIAS Instance
|
||||||
|
target {Path} -- the target path to write the data to
|
||||||
|
full_url {str} -- the full url of the folder/videos/course to crawl
|
||||||
|
Keyword Arguments:
|
||||||
|
transform {Transform} -- A transformation function for the output paths. Return None
|
||||||
|
to ignore a file. (default: {lambdax:x})
|
||||||
|
download_strategy {DownloadStrategy} -- A function to determine which files need to
|
||||||
|
be downloaded. Can save bandwidth and reduce the number of requests.
|
||||||
|
(default: {download_modified_or_new})
|
||||||
|
clean {bool} -- Whether to clean up when the method finishes.
|
||||||
|
timeout {int} -- The download timeout for opencast videos. Sadly needed due to a
|
||||||
|
requests bug.
|
||||||
|
file_conflict_resolver {FileConflictResolver} -- A function specifying how to deal
|
||||||
|
with overwriting or deleting files. The default always asks the user.
|
||||||
|
"""
|
||||||
|
PRETTY.starting_synchronizer(target, "ILIAS", "An ILIAS element by url")
|
||||||
|
|
||||||
|
|
||||||
|
results = ilias.add_target(
|
||||||
|
lambda crawler: crawler.recursive_crawl_url(full_url),
|
||||||
|
)
|
||||||
|
target = IliasTarget(
|
||||||
|
results,
|
||||||
|
target,
|
||||||
|
transform,
|
||||||
|
download_strategy,
|
||||||
|
clean,
|
||||||
|
timeout,
|
||||||
|
file_conflict_resolver,
|
||||||
|
)
|
||||||
|
self._ilias_targets.append(target)
|
||||||
|
|
||||||
async def _syncronize_ilias(self, ilias: IliasSycronizer):
|
async def _syncronize_ilias(self, ilias: IliasSycronizer):
|
||||||
await ilias.syncronize()
|
await ilias.syncronize()
|
||||||
|
|
||||||
|
|
@ -279,7 +331,7 @@ class Pferd(Location):
|
||||||
Arguments:
|
Arguments:
|
||||||
ilias {IliasSycronizer} -- the ILIAS Instance
|
ilias {IliasSycronizer} -- the ILIAS Instance
|
||||||
"""
|
"""
|
||||||
asyncio.run(self._syncronize_ilias(ilias))
|
self._loop.run_until_complete(self._syncronize_ilias(ilias))
|
||||||
|
|
||||||
def print_summary(self) -> None:
|
def print_summary(self) -> None:
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
25
sync_url.py
25
sync_url.py
|
|
@ -4,6 +4,7 @@
|
||||||
A simple script to download a course by name from ILIAS.
|
A simple script to download a course by name from ILIAS.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
import argparse
|
import argparse
|
||||||
import logging
|
import logging
|
||||||
import sys
|
import sys
|
||||||
|
|
@ -86,7 +87,11 @@ def main() -> None:
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
cookie_jar = CookieJar(to_path(args.cookies) if args.cookies else None)
|
cookie_jar = CookieJar(to_path(args.cookies) if args.cookies else None)
|
||||||
client = cookie_jar.create_client()
|
client = cookie_jar.create_async_client()
|
||||||
|
|
||||||
|
if not args.url.startswith("https://ilias.studium.kit.edu"):
|
||||||
|
_PRETTY.error("Not a valid KIT ILIAS URL")
|
||||||
|
return
|
||||||
|
|
||||||
if args.keyring:
|
if args.keyring:
|
||||||
if not args.username:
|
if not args.username:
|
||||||
|
|
@ -103,13 +108,14 @@ def main() -> None:
|
||||||
|
|
||||||
url = urlparse(args.url)
|
url = urlparse(args.url)
|
||||||
|
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
crawler = IliasCrawler(url.scheme + '://' + url.netloc, client,
|
crawler = IliasCrawler(url.scheme + '://' + url.netloc, client,
|
||||||
authenticator, lambda x, y: True)
|
authenticator, lambda x, y: True)
|
||||||
|
|
||||||
cookie_jar.load_cookies()
|
cookie_jar.load_cookies()
|
||||||
|
|
||||||
if args.folder is None:
|
if args.folder is None:
|
||||||
element_name = crawler.find_element_name(args.url)
|
element_name = loop.run_until_complete(crawler.find_element_name(args.url))
|
||||||
if not element_name:
|
if not element_name:
|
||||||
print("Error, could not get element name. Please specify a folder yourself.")
|
print("Error, could not get element name. Please specify a folder yourself.")
|
||||||
return
|
return
|
||||||
|
|
@ -142,17 +148,24 @@ def main() -> None:
|
||||||
pferd.enable_logging()
|
pferd.enable_logging()
|
||||||
|
|
||||||
# fetch
|
# fetch
|
||||||
pferd.ilias_kit_folder(
|
|
||||||
target=target,
|
ilias = pferd.ilias_kit(
|
||||||
full_url=args.url,
|
|
||||||
cookies=args.cookies,
|
|
||||||
dir_filter=dir_filter,
|
dir_filter=dir_filter,
|
||||||
|
cookies=args.cookies,
|
||||||
username=username,
|
username=username,
|
||||||
password=password,
|
password=password,
|
||||||
|
)
|
||||||
|
|
||||||
|
pferd.add_ilias_folder(
|
||||||
|
ilias=ilias,
|
||||||
|
target=target,
|
||||||
|
full_url=args.url,
|
||||||
file_conflict_resolver=file_confilict_resolver,
|
file_conflict_resolver=file_confilict_resolver,
|
||||||
transform=sanitize_windows_path
|
transform=sanitize_windows_path
|
||||||
)
|
)
|
||||||
|
|
||||||
|
pferd.syncronize_ilias(ilias)
|
||||||
|
|
||||||
pferd.print_summary()
|
pferd.print_summary()
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue