diff --git a/CONFIG.md b/CONFIG.md index b87f75c..589883d 100644 --- a/CONFIG.md +++ b/CONFIG.md @@ -173,6 +173,7 @@ out of the box for the corresponding universities: | Uni Stuttgart | https://ilias3.uni-stuttgart.de | local | Uni_Stuttgart | | Uni Tübingen | https://ovidius.uni-tuebingen.de/ilias3 | shibboleth | | | KIT ILIAS Pilot | https://pilot.ilias.studium.kit.edu | shibboleth | pilot | +| FAU StudOn | https://www.studon.fau.de/studon | simple-saml | StudOn | If your university isn't listed, try navigating to your instance's login page. Assuming no custom login service is used, the URL will look something like this: @@ -187,8 +188,9 @@ If the values work, feel free to submit a PR and add them to the table above. - `login_type`: How you authenticate. (Required) - `local`: Use `client_id` for authentication. - `shibboleth`: Use shibboleth for authentication. + - `simple-saml`: Use SimpleSAML based authentication. - `client_id`: An ID used for authentication if `login_type` is `local`. Is - ignored if `login_type` is `shibboleth`. + ignored if `login_type` is `shibboleth` or `simple-saml`. - `target`: The ILIAS element to crawl. (Required) - `desktop`: Crawl your personal desktop / dashboard - ``: Crawl the course with the given id diff --git a/LICENSE b/LICENSE index ccccbe3..6e965e3 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ Copyright 2019-2024 Garmelon, I-Al-Istannen, danstooamerican, pavelzw, TheChristophe, Scriptim, thelukasprobst, Toorero, - Mr-Pine, p-fruck, PinieP + Mr-Pine, p-fruck, PinieP, NIKL45 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py index b5041b3..bda3f39 100644 --- a/PFERD/crawl/ilias/ilias_web_crawler.py +++ b/PFERD/crawl/ilias/ilias_web_crawler.py @@ -31,6 +31,7 @@ from .kit_ilias_html import ( parse_ilias_forum_export, ) from .shibboleth_login import ShibbolethLogin +from .simplesaml_login import SimpleSAMLLogin TargetType = str | int @@ -48,12 +49,14 @@ class IliasWebCrawlerSection(HttpCrawlerSection): return base_url - def login(self) -> Literal["shibboleth"] | LoginTypeLocal: + def login(self) -> Literal["shibboleth", "simple-saml"] | LoginTypeLocal: login_type = self.s.get("login_type") if not login_type: self.missing_value("login_type") if login_type == "shibboleth": return "shibboleth" + if login_type == "simple-saml": + return "simple-saml" if login_type == "local": client_id = self.s.get("client_id") if not client_id: @@ -193,7 +196,14 @@ instance's greatest bottleneck. if isinstance(self._login_type, LoginTypeLocal): self._client_id = self._login_type.client_id else: - self._shibboleth_login = ShibbolethLogin(self._base_url, self._auth, self._tfa_auth) + # Allow multiple remote login backends + if self._login_type == "shibboleth": + self._shibboleth_login = ShibbolethLogin(self._base_url, self._auth, self._tfa_auth) + elif self._login_type == "simple-saml": + self._simplesaml_login = SimpleSAMLLogin(self._base_url, self._auth, self._tfa_auth) + else: + # Fallback to shibboleth to avoid breaking older configs + self._shibboleth_login = ShibbolethLogin(self._base_url, self._auth, self._tfa_auth) self._target = section.target() self._link_file_redirect_delay = section.link_redirect_delay() @@ -1045,6 +1055,8 @@ instance's greatest bottleneck. # fill the session with the correct cookies if self._login_type == "shibboleth": await self._shibboleth_login.login(self.session) + elif self._login_type == "simple-saml": + await self._simplesaml_login.login(self.session) else: params = { "client_id": self._client_id, diff --git a/PFERD/crawl/ilias/simplesaml_login.py b/PFERD/crawl/ilias/simplesaml_login.py new file mode 100644 index 0000000..85c137f --- /dev/null +++ b/PFERD/crawl/ilias/simplesaml_login.py @@ -0,0 +1,121 @@ +from typing import Any, Optional, cast + +import aiohttp +import yarl +from bs4 import BeautifulSoup, Tag + +from ...auth import Authenticator, TfaAuthenticator +from ...logging import log +from ...utils import soupify +from ..crawler import CrawlError + + +class SimpleSAMLLogin: + """ + Login via a SimpleSAML system. + + It performs a basic authentication by following the login redirect + and posting credentials to the indicated form. It also supports TFA similar to Shibboleth. + """ + + def __init__( + self, ilias_url: str, authenticator: Authenticator, tfa_authenticator: Optional[Authenticator] + ) -> None: + self._ilias_url = ilias_url + self._auth = authenticator + self._tfa_auth = tfa_authenticator + + async def login(self, sess: aiohttp.ClientSession) -> None: + """ + Perform a SimpleSAML login flow and populate the session cookies. + """ + + # Start at the local login entrypoint which may redirect to SimpleSAML + url = f"{self._ilias_url}/saml.php" + async with sess.get(url) as response: + saml_url = response.url + # If the redirect stayed on the ILIAS host, assume we're already logged in + if str(saml_url).startswith(self._ilias_url): + log.explain("ILIAS recognized our SAML token and logged us in in the background, returning") + return + soup: BeautifulSoup = soupify(await response.read()) + + # The SimpleSAML login page uses a form POST similar to Shibboleth. + # Attempt to login using credentials. + while not self._login_successful(soup): + form = cast(Tag, soup.find("form", {"method": "post"})) + action = cast(str, form["action"]) + # dynamically determine full URL from action (FAU uses full URL here, KIT uses relative URL) + url = action if action.startswith("https") else str(saml_url.origin()) + action + + username, password = await self._auth.credentials() + data = { + "username": username, + "password": password, + } + if csrf_token_input := form.find("input", {"name": "csrf_token"}): + data["csrf_token"] = csrf_token_input["value"] # type: ignore + + soup = await _post(sess, url, data) + + # Detect attribute release prompt + if soup.find(id="attributeRelease"): + raise CrawlError( + "ILIAS SAML entitlements changed! Please log in once in your browser and review them" + ) + + if self._tfa_required(soup): + soup = await self._authenticate_tfa(sess, soup, saml_url) + + if not self._login_successful(soup): + self._auth.invalidate_credentials() + + # Equivalent: Being redirected via JS automatically + # (or clicking "Continue" if you have JS disabled) + relay_state = cast(Tag, soup.find("input", {"name": "RelayState"})) + saml_response = cast(Tag, soup.find("input", {"name": "SAMLResponse"})) + url = cast(str, cast(Tag, soup.find("form", {"method": "post"}))["action"]) + data = { # using the info obtained in the while loop above + "RelayState": cast(str, relay_state["value"]), + "SAMLResponse": cast(str, saml_response["value"]), + } + await sess.post(cast(str, url), data=data) + + async def _authenticate_tfa( + self, session: aiohttp.ClientSession, soup: BeautifulSoup, saml_url: yarl.URL + ) -> BeautifulSoup: + if not self._tfa_auth: + self._tfa_auth = TfaAuthenticator("ilias-anon-tfa") + + tfa_token = await self._tfa_auth.password() + + # Searching the form here so that this fails before asking for + # credentials rather than after asking. + form = cast(Tag, soup.find("form", {"method": "post"})) + action = cast(str, form["action"]) + # dynamically determine full URL from action (FAU uses full URL here, KIT uses relative URL) + url = action if action.startswith("https") else str(saml_url.origin()) + action + + data = { # for www.sso.uni-erlangen.de/simplesaml/module.php/mfa/otp?... + "otp": tfa_token + } + if csrf_token_input := form.find("input", {"name": "csrf_token"}): + data["csrf_token"] = csrf_token_input["value"] # type: ignore + return await _post(session, url, data) + + @staticmethod + def _login_successful(soup: BeautifulSoup) -> bool: + relay_state = soup.find("input", {"name": "RelayState"}) + saml_response = soup.find("input", {"name": "SAMLResponse"}) + return relay_state is not None and saml_response is not None + + @staticmethod + def _tfa_required(soup: BeautifulSoup) -> bool: + # Also treat a body with id="mfa:otp" as TFA required (for FAU) + body = soup.find("body") + return body is not None and body.get("id") == "mfa:otp" + + +async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup: + async with session.post(url, data=data) as response: + return soupify(await response.read())