From e49caa48776a02e10cd961e4cb67eba862ca58b5 Mon Sep 17 00:00:00 2001 From: Shirkanesi Date: Wed, 5 Jan 2022 21:52:00 +0100 Subject: [PATCH] Initial work on ILIAS7-support. Basic ('normal') elements seem to work. --- .gitignore | 1 + PFERD/crawl/ilias/kit_ilias_html.py | 6 ++++-- PFERD/crawl/ilias/kit_ilias_web_crawler.py | 6 +++--- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 455eaca..36ab590 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ /PFERD.egg-info/ __pycache__/ /.vscode/ +/.idea/ # pyinstaller /pferd.spec diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py index d8c347d..11f02fe 100644 --- a/PFERD/crawl/ilias/kit_ilias_html.py +++ b/PFERD/crawl/ilias/kit_ilias_html.py @@ -133,7 +133,7 @@ class IliasPage: # parse it json_object = json.loads(json_str) - streams = [stream for stream in json_object["streams"] if stream["type"] == "video"] + streams = [stream for stream in json_object["streams"] if stream["content"] == "presentation"] # and just fetch the lone video url! if len(streams) == 1: @@ -390,12 +390,14 @@ class IliasPage: # but some JS later transforms them into an accordion. # This is for these weird JS-y blocks - if "ilContainerItemsContainer" in parent.get("class"): + if "il_ContainerItemTitle" in parent.get("class"): # I am currently under the impression that *only* those JS blocks have an # ilNoDisplay class. if "ilNoDisplay" not in parent.get("class"): continue prev: Tag = parent.findPreviousSibling("div") + if not prev.get("class"): + continue if "ilContainerBlockHeader" in prev.get("class"): found_titles.append(prev.find("h3").getText().strip()) diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py index c6115f4..da1bf2f 100644 --- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py +++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py @@ -610,9 +610,9 @@ instance's greatest bottleneck. @staticmethod def _is_logged_in(soup: BeautifulSoup) -> bool: - # Normal ILIAS pages - userlog = soup.find("li", {"id": "userlog"}) - if userlog is not None: + # Normal ILIAS pages -- not sure if this will work on every page... + userlog = soup.find("span", {"class": "glyphicon-login"}) + if userlog is None: return True # Video listing embeds do not have complete ILIAS html. Try to match them by # their video listing table