From 77c6e7d8168b9530e6fe88b9ca908ef1dc9e301c Mon Sep 17 00:00:00 2001 From: NIKL45 Date: Fri, 24 Oct 2025 21:25:07 +0200 Subject: [PATCH 1/5] created class SimpleSAMLLogin by duplicating the class ShibbolethLogin and making the two changes mentioned in Garmelon/PFERD/issues/126. TFA not tested yet. --- PFERD/crawl/ilias/simplesaml_login.py | 121 ++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 PFERD/crawl/ilias/simplesaml_login.py diff --git a/PFERD/crawl/ilias/simplesaml_login.py b/PFERD/crawl/ilias/simplesaml_login.py new file mode 100644 index 0000000..d6a629b --- /dev/null +++ b/PFERD/crawl/ilias/simplesaml_login.py @@ -0,0 +1,121 @@ +from typing import Any, Optional, cast + +import aiohttp +import yarl +from bs4 import BeautifulSoup, Tag + +from ...auth import Authenticator, TfaAuthenticator +from ...logging import log +from ...utils import soupify +from ..crawler import CrawlError + + +class SimpleSAMLLogin: + """ + Login via a SimpleSAML system. + + It performs a basic authentication by following the login redirect + and posting credentials to the indicated form. It also supports TFA similar to Shibboleth. + """ + + def __init__( + self, ilias_url: str, authenticator: Authenticator, tfa_authenticator: Optional[Authenticator] + ) -> None: + self._ilias_url = ilias_url + self._auth = authenticator + self._tfa_auth = tfa_authenticator + + async def login(self, sess: aiohttp.ClientSession) -> None: + """ + Perform a SimpleSAML login flow and populate the session cookies. + """ + + # Start at the local login entrypoint which may redirect to SimpleSAML + url = f"{self._ilias_url}/saml.php" + async with sess.get(url) as response: + saml_url = response.url + # If the redirect stayed on the ILIAS host, assume we're already logged in + if str(saml_url).startswith(self._ilias_url): + log.explain("ILIAS recognized our simple-saml token and logged us in in the background, returning") + return + soup: BeautifulSoup = soupify(await response.read()) + + # The SimpleSAML login page uses a form POST similar to Shibboleth. + # Attempt to login using credentials. + while not self._login_successful(soup): + form = cast(Tag, soup.find("form", {"method": "post"})) + action = cast(str, form["action"]) + if action.startswith("https"): # FAU uses full URL here + url = action + else: + url = str(saml_url.origin()) + action #KIT uses relative URL here + + username, password = await self._auth.credentials() + data = { + "username": username, + "password": password, + } + if csrf_token_input := form.find("input", {"name": "csrf_token"}): + data["csrf_token"] = csrf_token_input["value"] # type: ignore + + soup = await _post(sess, url, data) + + # Detect attribute release prompt + if soup.find(id="attributeRelease"): + raise CrawlError( + "ILIAS SimpleSAML entitlements changed! Please log in once in your browser and review them" + ) + + if self._tfa_required(soup): + soup = await self._authenticate_tfa(sess, soup, saml_url) + + if not self._login_successful(soup): + self._auth.invalidate_credentials() + + # Equivalent: Being redirected via JS automatically + # (or clicking "Continue" if you have JS disabled) + relay_state = cast(Tag, soup.find("input", {"name": "RelayState"})) + saml_response = cast(Tag, soup.find("input", {"name": "SAMLResponse"})) + url = cast(str, cast(Tag, soup.find("form", {"method": "post"}))["action"]) + data = { # using the info obtained in the while loop above + "RelayState": cast(str, relay_state["value"]), + "SAMLResponse": cast(str, saml_response["value"]), + } + await sess.post(cast(str, url), data=data) + + async def _authenticate_tfa( + self, session: aiohttp.ClientSession, soup: BeautifulSoup, saml_url: yarl.URL + ) -> BeautifulSoup: + if not self._tfa_auth: + self._tfa_auth = TfaAuthenticator("ilias-anon-tfa") + + tfa_token = await self._tfa_auth.password() + + # Searching the form here so that this fails before asking for + # credentials rather than after asking. + form = cast(Tag, soup.find("form", {"method": "post"})) + action = cast(str, form["action"]) + + url = str(saml_url.origin()) + action + data = { + "_eventId_proceed": "", + "fudis_otp_input": tfa_token, + } + if csrf_token_input := form.find("input", {"name": "csrf_token"}): + data["csrf_token"] = csrf_token_input["value"] # type: ignore + return await _post(session, url, data) + + @staticmethod + def _login_successful(soup: BeautifulSoup) -> bool: + relay_state = soup.find("input", {"name": "RelayState"}) + saml_response = soup.find("input", {"name": "SAMLResponse"}) + return relay_state is not None and saml_response is not None + + @staticmethod + def _tfa_required(soup: BeautifulSoup) -> bool: + return soup.find(id="fudiscr-form") is not None + + +async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup: + async with session.post(url, data=data) as response: + return soupify(await response.read()) From 8d37f42ce82f1b23acf8d828230ec08b2a22c96e Mon Sep 17 00:00:00 2001 From: NIKL45 Date: Fri, 24 Oct 2025 21:26:06 +0200 Subject: [PATCH 2/5] impemented SimpleSAMLLogin option into the IliasWebCrawler --- PFERD/crawl/ilias/ilias_web_crawler.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py index 12d8700..d8e96a3 100644 --- a/PFERD/crawl/ilias/ilias_web_crawler.py +++ b/PFERD/crawl/ilias/ilias_web_crawler.py @@ -32,6 +32,7 @@ from .kit_ilias_html import ( parse_ilias_forum_export, ) from .shibboleth_login import ShibbolethLogin +from .simplesaml_login import SimpleSAMLLogin TargetType = str | int @@ -49,12 +50,14 @@ class IliasWebCrawlerSection(HttpCrawlerSection): return base_url - def login(self) -> Literal["shibboleth"] | LoginTypeLocal: + def login(self) -> Literal["shibboleth", "simple-saml"] | LoginTypeLocal: login_type = self.s.get("login_type") if not login_type: self.missing_value("login_type") if login_type == "shibboleth": return "shibboleth" + if login_type == "simple-saml": + return "simple-saml" if login_type == "local": client_id = self.s.get("client_id") if not client_id: @@ -194,7 +197,14 @@ instance's greatest bottleneck. if isinstance(self._login_type, LoginTypeLocal): self._client_id = self._login_type.client_id else: - self._shibboleth_login = ShibbolethLogin(self._base_url, self._auth, self._tfa_auth) + # Allow multiple remote login backends + if self._login_type == "shibboleth": + self._shibboleth_login = ShibbolethLogin(self._base_url, self._auth, self._tfa_auth) + elif self._login_type == "simple-saml": + self._simplesaml_login = SimpleSAMLLogin(self._base_url, self._auth, self._tfa_auth) + else: + # Fallback to shibboleth to avoid breaking older configs + self._shibboleth_login = ShibbolethLogin(self._base_url, self._auth, self._tfa_auth) self._target = section.target() self._link_file_redirect_delay = section.link_redirect_delay() @@ -1044,6 +1054,8 @@ instance's greatest bottleneck. # fill the session with the correct cookies if self._login_type == "shibboleth": await self._shibboleth_login.login(self.session) + elif self._login_type == "simple-saml": + await self._simplesaml_login.login(self.session) else: params = { "client_id": self._client_id, From afbfea6ba5e1000736473b1a8c12e96fa7be477a Mon Sep 17 00:00:00 2001 From: NIKL45 Date: Fri, 24 Oct 2025 21:46:45 +0200 Subject: [PATCH 3/5] fixed and tested TFA (OTP) for FAU --- PFERD/crawl/ilias/simplesaml_login.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/PFERD/crawl/ilias/simplesaml_login.py b/PFERD/crawl/ilias/simplesaml_login.py index d6a629b..7357efc 100644 --- a/PFERD/crawl/ilias/simplesaml_login.py +++ b/PFERD/crawl/ilias/simplesaml_login.py @@ -96,10 +96,12 @@ class SimpleSAMLLogin: form = cast(Tag, soup.find("form", {"method": "post"})) action = cast(str, form["action"]) - url = str(saml_url.origin()) + action - data = { - "_eventId_proceed": "", - "fudis_otp_input": tfa_token, + if action.startswith("https"): # FAU uses full URL here + url = action + else: + url = str(saml_url.origin()) + action #KIT uses relative URL here + data = { # for www.sso.uni-erlangen.de/simplesaml/module.php/mfa/otp?... + "otp": tfa_token } if csrf_token_input := form.find("input", {"name": "csrf_token"}): data["csrf_token"] = csrf_token_input["value"] # type: ignore @@ -113,7 +115,9 @@ class SimpleSAMLLogin: @staticmethod def _tfa_required(soup: BeautifulSoup) -> bool: - return soup.find(id="fudiscr-form") is not None + # Also treat a body with id="mfa:otp" as TFA required (for FAU) + body = soup.find("body") + return body is not None and body.get("id") == "mfa:otp" async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup: From 2509711d62fdfed336dc948537b3344b55a111ca Mon Sep 17 00:00:00 2001 From: NIKL45 Date: Fri, 24 Oct 2025 21:59:51 +0200 Subject: [PATCH 4/5] checked and formatted --- PFERD/crawl/ilias/simplesaml_login.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/PFERD/crawl/ilias/simplesaml_login.py b/PFERD/crawl/ilias/simplesaml_login.py index 7357efc..85c137f 100644 --- a/PFERD/crawl/ilias/simplesaml_login.py +++ b/PFERD/crawl/ilias/simplesaml_login.py @@ -14,7 +14,7 @@ class SimpleSAMLLogin: """ Login via a SimpleSAML system. - It performs a basic authentication by following the login redirect + It performs a basic authentication by following the login redirect and posting credentials to the indicated form. It also supports TFA similar to Shibboleth. """ @@ -36,7 +36,7 @@ class SimpleSAMLLogin: saml_url = response.url # If the redirect stayed on the ILIAS host, assume we're already logged in if str(saml_url).startswith(self._ilias_url): - log.explain("ILIAS recognized our simple-saml token and logged us in in the background, returning") + log.explain("ILIAS recognized our SAML token and logged us in in the background, returning") return soup: BeautifulSoup = soupify(await response.read()) @@ -45,10 +45,8 @@ class SimpleSAMLLogin: while not self._login_successful(soup): form = cast(Tag, soup.find("form", {"method": "post"})) action = cast(str, form["action"]) - if action.startswith("https"): # FAU uses full URL here - url = action - else: - url = str(saml_url.origin()) + action #KIT uses relative URL here + # dynamically determine full URL from action (FAU uses full URL here, KIT uses relative URL) + url = action if action.startswith("https") else str(saml_url.origin()) + action username, password = await self._auth.credentials() data = { @@ -63,7 +61,7 @@ class SimpleSAMLLogin: # Detect attribute release prompt if soup.find(id="attributeRelease"): raise CrawlError( - "ILIAS SimpleSAML entitlements changed! Please log in once in your browser and review them" + "ILIAS SAML entitlements changed! Please log in once in your browser and review them" ) if self._tfa_required(soup): @@ -95,12 +93,10 @@ class SimpleSAMLLogin: # credentials rather than after asking. form = cast(Tag, soup.find("form", {"method": "post"})) action = cast(str, form["action"]) + # dynamically determine full URL from action (FAU uses full URL here, KIT uses relative URL) + url = action if action.startswith("https") else str(saml_url.origin()) + action - if action.startswith("https"): # FAU uses full URL here - url = action - else: - url = str(saml_url.origin()) + action #KIT uses relative URL here - data = { # for www.sso.uni-erlangen.de/simplesaml/module.php/mfa/otp?... + data = { # for www.sso.uni-erlangen.de/simplesaml/module.php/mfa/otp?... "otp": tfa_token } if csrf_token_input := form.find("input", {"name": "csrf_token"}): From b10b75d56b92a9d4c8ca1ea7785c2128dfaa0c59 Mon Sep 17 00:00:00 2001 From: NIKL45 Date: Fri, 24 Oct 2025 22:18:31 +0200 Subject: [PATCH 5/5] added simple-saml to documentation, added name to LICENSE --- CONFIG.md | 4 +++- LICENSE | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CONFIG.md b/CONFIG.md index 4bf082f..05f7f9c 100644 --- a/CONFIG.md +++ b/CONFIG.md @@ -172,6 +172,7 @@ out of the box for the corresponding universities: | Uni Stuttgart | https://ilias3.uni-stuttgart.de | local | Uni_Stuttgart | | Uni Tübingen | https://ovidius.uni-tuebingen.de/ilias3 | shibboleth | | | KIT ILIAS Pilot | https://pilot.ilias.studium.kit.edu | shibboleth | pilot | +| FAU StudOn | https://www.studon.fau.de/studon | simple-saml | StudOn | If your university isn't listed, try navigating to your instance's login page. Assuming no custom login service is used, the URL will look something like this: @@ -186,8 +187,9 @@ If the values work, feel free to submit a PR and add them to the table above. - `login_type`: How you authenticate. (Required) - `local`: Use `client_id` for authentication. - `shibboleth`: Use shibboleth for authentication. + - `simple-saml`: Use SimpleSAML based authentication. - `client_id`: An ID used for authentication if `login_type` is `local`. Is - ignored if `login_type` is `shibboleth`. + ignored if `login_type` is `shibboleth` or `simple-saml`. - `target`: The ILIAS element to crawl. (Required) - `desktop`: Crawl your personal desktop / dashboard - ``: Crawl the course with the given id diff --git a/LICENSE b/LICENSE index ccccbe3..6e965e3 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ Copyright 2019-2024 Garmelon, I-Al-Istannen, danstooamerican, pavelzw, TheChristophe, Scriptim, thelukasprobst, Toorero, - Mr-Pine, p-fruck, PinieP + Mr-Pine, p-fruck, PinieP, NIKL45 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in