diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py index add49ee..c37148d 100644 --- a/PFERD/crawl/ilias/ilias_web_crawler.py +++ b/PFERD/crawl/ilias/ilias_web_crawler.py @@ -267,7 +267,7 @@ instance's greatest bottleneck. # If we expect to find a root course, enforce it if current_parent is None and expected_course_id is not None: perma_link = IliasPage.get_soup_permalink(soup) - if not perma_link or "crs_" not in perma_link: + if not perma_link or "crs" not in perma_link: raise CrawlError("Invalid course id? Didn't find anything looking like a course") if str(expected_course_id) not in perma_link: raise CrawlError(f"Expected course id {expected_course_id} but got {perma_link}") diff --git a/PFERD/crawl/ilias/kit_ilias_html.py b/PFERD/crawl/ilias/kit_ilias_html.py index 963ab05..7df49bc 100644 --- a/PFERD/crawl/ilias/kit_ilias_html.py +++ b/PFERD/crawl/ilias/kit_ilias_html.py @@ -1301,10 +1301,14 @@ class IliasPage: @staticmethod def get_soup_permalink(soup: BeautifulSoup) -> Optional[str]: - perma_link_element = cast(Tag, soup.select_one(".il-footer-permanent-url > a")) - if not perma_link_element or not perma_link_element.get("href"): - return None - return cast(Optional[str], perma_link_element.get("href")) + for script in soup.find_all("script", attrs={'src': cast(str, None)}): + match = re.search( + r"((?:https?:\\\/\\\/)?(?:[^.]+\.)?ilias\.studium\.kit\.edu(\\\/.*)?)\"", + script.text + ) + if match is not None: + return match.group(1) + return None def _unexpected_html_warning() -> None: