diff --git a/.gitignore b/.gitignore index 455eaca..36ab590 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ /PFERD.egg-info/ __pycache__/ /.vscode/ +/.idea/ # pyinstaller /pferd.spec diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py index d8c347d..11f02fe 100644 --- a/PFERD/crawl/ilias/kit_ilias_html.py +++ b/PFERD/crawl/ilias/kit_ilias_html.py @@ -133,7 +133,7 @@ class IliasPage: # parse it json_object = json.loads(json_str) - streams = [stream for stream in json_object["streams"] if stream["type"] == "video"] + streams = [stream for stream in json_object["streams"] if stream["content"] == "presentation"] # and just fetch the lone video url! if len(streams) == 1: @@ -390,12 +390,14 @@ class IliasPage: # but some JS later transforms them into an accordion. # This is for these weird JS-y blocks - if "ilContainerItemsContainer" in parent.get("class"): + if "il_ContainerItemTitle" in parent.get("class"): # I am currently under the impression that *only* those JS blocks have an # ilNoDisplay class. if "ilNoDisplay" not in parent.get("class"): continue prev: Tag = parent.findPreviousSibling("div") + if not prev.get("class"): + continue if "ilContainerBlockHeader" in prev.get("class"): found_titles.append(prev.find("h3").getText().strip()) diff --git a/PFERD/crawl/ilias/kit_ilias_web_crawler.py b/PFERD/crawl/ilias/kit_ilias_web_crawler.py index c6115f4..da1bf2f 100644 --- a/PFERD/crawl/ilias/kit_ilias_web_crawler.py +++ b/PFERD/crawl/ilias/kit_ilias_web_crawler.py @@ -610,9 +610,9 @@ instance's greatest bottleneck. @staticmethod def _is_logged_in(soup: BeautifulSoup) -> bool: - # Normal ILIAS pages - userlog = soup.find("li", {"id": "userlog"}) - if userlog is not None: + # Normal ILIAS pages -- not sure if this will work on every page... + userlog = soup.find("span", {"class": "glyphicon-login"}) + if userlog is None: return True # Video listing embeds do not have complete ILIAS html. Try to match them by # their video listing table