From 4301dda48fd09171ef3476e147ed391e0ff1d3e4 Mon Sep 17 00:00:00 2001 From: NIKL45 Date: Sat, 18 Oct 2025 23:43:57 +0200 Subject: [PATCH 01/15] duplicated files for integration of FAU login --- PFERD/cli/__init__.py | 1 + PFERD/cli/command_fau_ilias_web.py | 37 ++++++ PFERD/crawl/__init__.py | 4 +- PFERD/crawl/ilias/__init__.py | 3 + PFERD/crawl/ilias/fau_ilias_web_crawler.py | 35 ++++++ PFERD/crawl/ilias/fau_shibboleth_login.py | 136 +++++++++++++++++++++ 6 files changed, 215 insertions(+), 1 deletion(-) create mode 100644 PFERD/cli/command_fau_ilias_web.py create mode 100644 PFERD/crawl/ilias/fau_ilias_web_crawler.py create mode 100644 PFERD/crawl/ilias/fau_shibboleth_login.py diff --git a/PFERD/cli/__init__.py b/PFERD/cli/__init__.py index c89f6f4..0be8c33 100644 --- a/PFERD/cli/__init__.py +++ b/PFERD/cli/__init__.py @@ -10,5 +10,6 @@ from . import command_local # noqa: F401 imported but unused from . import command_ilias_web # noqa: F401 imported but unused from . import command_kit_ilias_web # noqa: F401 imported but unused +from . import command_fau_ilias_web # noqa: F401 imported but unused from . import command_kit_ipd # noqa: F401 imported but unused from .parser import PARSER, ParserLoadError, load_default_section # noqa: F401 imported but unused diff --git a/PFERD/cli/command_fau_ilias_web.py b/PFERD/cli/command_fau_ilias_web.py new file mode 100644 index 0000000..7688783 --- /dev/null +++ b/PFERD/cli/command_fau_ilias_web.py @@ -0,0 +1,37 @@ +import argparse +import configparser + +from ..logging import log +from .common_ilias_args import configure_common_group_args, load_common +from .parser import CRAWLER_PARSER, SUBPARSERS, load_crawler + +COMMAND_NAME = "fau-ilias-web" + +SUBPARSER = SUBPARSERS.add_parser( + COMMAND_NAME, + parents=[CRAWLER_PARSER], +) + +GROUP = SUBPARSER.add_argument_group( + title=f"{COMMAND_NAME} crawler arguments", + description=f"arguments for the '{COMMAND_NAME}' crawler", +) + +configure_common_group_args(GROUP) + + +def load( + args: argparse.Namespace, + parser: configparser.ConfigParser, +) -> None: + log.explain(f"Creating config for command '{COMMAND_NAME}'") + + parser["crawl:ilias"] = {} + section = parser["crawl:ilias"] + load_crawler(args, section) + + section["type"] = COMMAND_NAME + load_common(section, args, parser) + + +SUBPARSER.set_defaults(command=load) diff --git a/PFERD/crawl/__init__.py b/PFERD/crawl/__init__.py index 9a0e080..22028de 100644 --- a/PFERD/crawl/__init__.py +++ b/PFERD/crawl/__init__.py @@ -4,7 +4,7 @@ from typing import Callable, Dict from ..auth import Authenticator from ..config import Config from .crawler import Crawler, CrawlError, CrawlerSection # noqa: F401 -from .ilias import IliasWebCrawler, IliasWebCrawlerSection, KitIliasWebCrawler, KitIliasWebCrawlerSection +from .ilias import IliasWebCrawler, IliasWebCrawlerSection, KitIliasWebCrawler, KitIliasWebCrawlerSection, FauIliasWebCrawler, FauIliasWebCrawlerSection from .kit_ipd_crawler import KitIpdCrawler, KitIpdCrawlerSection from .local_crawler import LocalCrawler, LocalCrawlerSection @@ -22,6 +22,8 @@ CRAWLERS: Dict[str, CrawlerConstructor] = { IliasWebCrawler(n, IliasWebCrawlerSection(s), c, a), "kit-ilias-web": lambda n, s, c, a: KitIliasWebCrawler(n, KitIliasWebCrawlerSection(s), c, a), + "fau-ilias-web": lambda n, s, c, a: + FauIliasWebCrawler(n, FauIliasWebCrawlerSection(s), c, a), "kit-ipd": lambda n, s, c, a: KitIpdCrawler(n, KitIpdCrawlerSection(s), c), } diff --git a/PFERD/crawl/ilias/__init__.py b/PFERD/crawl/ilias/__init__.py index 287bd3d..9f997e5 100644 --- a/PFERD/crawl/ilias/__init__.py +++ b/PFERD/crawl/ilias/__init__.py @@ -1,9 +1,12 @@ from .kit_ilias_web_crawler import (IliasWebCrawler, IliasWebCrawlerSection, KitIliasWebCrawler, KitIliasWebCrawlerSection) +from .fau_ilias_web_crawler import (FauIliasWebCrawler, FauIliasWebCrawlerSection) __all__ = [ "IliasWebCrawler", "IliasWebCrawlerSection", "KitIliasWebCrawler", "KitIliasWebCrawlerSection", + "FauIliasWebCrawler", + "FauIliasWebCrawlerSection", ] diff --git a/PFERD/crawl/ilias/fau_ilias_web_crawler.py b/PFERD/crawl/ilias/fau_ilias_web_crawler.py new file mode 100644 index 0000000..f26b2d1 --- /dev/null +++ b/PFERD/crawl/ilias/fau_ilias_web_crawler.py @@ -0,0 +1,35 @@ +from typing import Dict, Literal + +from ...auth import Authenticator +from ...config import Config +from .ilias_web_crawler import IliasWebCrawler, IliasWebCrawlerSection +from .fau_shibboleth_login import FauShibbolethLogin + +_ILIAS_URL = "https://www.studon.fau.de/studon" + +class KitShibbolethBackgroundLoginSuccessful: + pass + +class FauIliasWebCrawlerSection(IliasWebCrawlerSection): + def base_url(self) -> str: + return _ILIAS_URL + + def login(self) -> Literal["shibboleth"]: + return "shibboleth" + + +class FauIliasWebCrawler(IliasWebCrawler): + def __init__( + self, + name: str, + section: FauIliasWebCrawlerSection, + config: Config, + authenticators: Dict[str, Authenticator], + ): + super().__init__(name, section, config, authenticators) + + self._shibboleth_login = FauShibbolethLogin( + _ILIAS_URL, + self._auth, + section.tfa_auth(authenticators), + ) diff --git a/PFERD/crawl/ilias/fau_shibboleth_login.py b/PFERD/crawl/ilias/fau_shibboleth_login.py new file mode 100644 index 0000000..50a54a7 --- /dev/null +++ b/PFERD/crawl/ilias/fau_shibboleth_login.py @@ -0,0 +1,136 @@ +""" +FAU-specific Shibboleth login helper. + +This module duplicates the original KIT-targeted Shibboleth login implementation +but exposes the same API so it can be swapped in where FAU-specific tweaks are +required. Keep behaviour identical to the original unless changes are needed. +""" +from typing import Any, Optional, cast + +import aiohttp +import yarl +from bs4 import BeautifulSoup, Tag + +from ...auth import Authenticator, TfaAuthenticator +from ...logging import log +from ...utils import soupify +from ..crawler import CrawlError + + +class FauShibbolethLogin: + """ + Login via shibboleth system for FAU. + """ + + def __init__( + self, ilias_url: str, authenticator: Authenticator, tfa_authenticator: Optional[Authenticator] + ) -> None: + self._ilias_url = ilias_url + self._auth = authenticator + self._tfa_auth = tfa_authenticator + + async def login(self, sess: aiohttp.ClientSession) -> None: + """ + Performs the ILIAS Shibboleth authentication dance and saves the login + cookies it receieves. + + This function should only be called whenever it is detected that you're + not logged in. The cookies obtained should be good for a few minutes, + maybe even an hour or two. + """ + + # Equivalent: Click on "Bei StudOn via Single Sign-On anmelden" button in + # https://www.studon.fau.de/studon/login.php + url = f"{self._ilias_url}/saml.php" + async with sess.get(url) as response: + shib_url = response.url + if str(shib_url).startswith(self._ilias_url): + log.explain( + "ILIAS recognized our shib token and logged us in in the background, returning" + ) + return + soup: BeautifulSoup = soupify(await response.read()) + + # Attempt to login using credentials, if necessary + while not self._login_successful(soup): + # Searching the form here so that this fails before asking for + # credentials rather than after asking. + form = cast(Tag, soup.find("form", {"method": "post"})) + action = cast(str, form["action"]) + + # Equivalent: Enter credentials in + # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO + url = str(shib_url.origin()) + action + username, password = await self._auth.credentials() + data = { + "_eventId_proceed": "", + "j_username": username, + "j_password": password, + "fudis_web_authn_assertion_input": "", + } + if csrf_token_input := form.find("input", {"name": "csrf_token"}): + data["csrf_token"] = csrf_token_input["value"] # type: ignore + soup = await _post(sess, url, data) + + if soup.find(id="attributeRelease"): + raise CrawlError( + "ILIAS Shibboleth entitlements changed! " + "Please log in once in your browser and review them" + ) + + if self._tfa_required(soup): + soup = await self._authenticate_tfa(sess, soup, shib_url) + + if not self._login_successful(soup): + self._auth.invalidate_credentials() + + # Equivalent: Being redirected via JS automatically + # (or clicking "Continue" if you have JS disabled) + relay_state = cast(Tag, soup.find("input", {"name": "RelayState"})) + saml_response = cast(Tag, soup.find("input", {"name": "SAMLResponse"})) + url = form = soup.find("form", {"method": "post"})["action"] # type: ignore + data = { # using the info obtained in the while loop above + "RelayState": cast(str, relay_state["value"]), + "SAMLResponse": cast(str, saml_response["value"]), + } + await sess.post(cast(str, url), data=data) + + async def _authenticate_tfa( + self, session: aiohttp.ClientSession, soup: BeautifulSoup, shib_url: yarl.URL + ) -> BeautifulSoup: + if not self._tfa_auth: + self._tfa_auth = TfaAuthenticator("ilias-anon-tfa") + + tfa_token = await self._tfa_auth.password() + + # Searching the form here so that this fails before asking for + # credentials rather than after asking. + form = cast(Tag, soup.find("form", {"method": "post"})) + action = cast(str, form["action"]) + + # Equivalent: Enter token in + # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO + url = str(shib_url.origin()) + action + username, password = await self._auth.credentials() + data = { + "_eventId_proceed": "", + "fudis_otp_input": tfa_token, + } + if csrf_token_input := form.find("input", {"name": "csrf_token"}): + data["csrf_token"] = csrf_token_input["value"] # type: ignore + return await _post(session, url, data) + + @staticmethod + def _login_successful(soup: BeautifulSoup) -> bool: + relay_state = soup.find("input", {"name": "RelayState"}) + saml_response = soup.find("input", {"name": "SAMLResponse"}) + return relay_state is not None and saml_response is not None + + @staticmethod + def _tfa_required(soup: BeautifulSoup) -> bool: + return soup.find(id="fudiscr-form") is not None + + +async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup: + async with session.post(url, data=data) as response: + return soupify(await response.read()) From e9eebe0fa9ac12505915a692836f19d258832027 Mon Sep 17 00:00:00 2001 From: NIKL45 Date: Sat, 18 Oct 2025 23:44:11 +0200 Subject: [PATCH 02/15] gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 36ab590..4a52339 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ __pycache__/ /pferd.spec /build/ /dist/ +pferd.conf From 2c22794c6994ea36c81729f7d9d90a406440eaf6 Mon Sep 17 00:00:00 2001 From: NIKL45 Date: Sun, 19 Oct 2025 00:18:53 +0200 Subject: [PATCH 03/15] removed TFA shibboleth because cant be tested for FAU by me --- PFERD/crawl/ilias/fau_shibboleth_login.py | 30 +++-------------------- 1 file changed, 4 insertions(+), 26 deletions(-) diff --git a/PFERD/crawl/ilias/fau_shibboleth_login.py b/PFERD/crawl/ilias/fau_shibboleth_login.py index 50a54a7..a7cd96e 100644 --- a/PFERD/crawl/ilias/fau_shibboleth_login.py +++ b/PFERD/crawl/ilias/fau_shibboleth_login.py @@ -11,7 +11,7 @@ import aiohttp import yarl from bs4 import BeautifulSoup, Tag -from ...auth import Authenticator, TfaAuthenticator +from ...auth import Authenticator from ...logging import log from ...utils import soupify from ..crawler import CrawlError @@ -79,7 +79,9 @@ class FauShibbolethLogin: ) if self._tfa_required(soup): - soup = await self._authenticate_tfa(sess, soup, shib_url) + raise CrawlError( + "Two-factor authentication is not yet supported for FAU Shibboleth login!" + ) if not self._login_successful(soup): self._auth.invalidate_credentials() @@ -95,30 +97,6 @@ class FauShibbolethLogin: } await sess.post(cast(str, url), data=data) - async def _authenticate_tfa( - self, session: aiohttp.ClientSession, soup: BeautifulSoup, shib_url: yarl.URL - ) -> BeautifulSoup: - if not self._tfa_auth: - self._tfa_auth = TfaAuthenticator("ilias-anon-tfa") - - tfa_token = await self._tfa_auth.password() - - # Searching the form here so that this fails before asking for - # credentials rather than after asking. - form = cast(Tag, soup.find("form", {"method": "post"})) - action = cast(str, form["action"]) - - # Equivalent: Enter token in - # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO - url = str(shib_url.origin()) + action - username, password = await self._auth.credentials() - data = { - "_eventId_proceed": "", - "fudis_otp_input": tfa_token, - } - if csrf_token_input := form.find("input", {"name": "csrf_token"}): - data["csrf_token"] = csrf_token_input["value"] # type: ignore - return await _post(session, url, data) @staticmethod def _login_successful(soup: BeautifulSoup) -> bool: From ac7bf13656e5cbbef3b175bc3432ccf40bf43dd3 Mon Sep 17 00:00:00 2001 From: NIKL45 Date: Sun, 19 Oct 2025 00:21:53 +0200 Subject: [PATCH 04/15] adapted shibboleth login for FAU (slightly different POST url and payload) --- PFERD/crawl/ilias/fau_shibboleth_login.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/PFERD/crawl/ilias/fau_shibboleth_login.py b/PFERD/crawl/ilias/fau_shibboleth_login.py index a7cd96e..892bf71 100644 --- a/PFERD/crawl/ilias/fau_shibboleth_login.py +++ b/PFERD/crawl/ilias/fau_shibboleth_login.py @@ -60,16 +60,15 @@ class FauShibbolethLogin: # Equivalent: Enter credentials in # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO - url = str(shib_url.origin()) + action + if action.startswith("https"): # FAU uses full URL here + url = action + else: + url = str(shib_url.origin()) + action #KIT uses relative URL here username, password = await self._auth.credentials() data = { - "_eventId_proceed": "", - "j_username": username, - "j_password": password, - "fudis_web_authn_assertion_input": "", + "username": username, + "password": password } - if csrf_token_input := form.find("input", {"name": "csrf_token"}): - data["csrf_token"] = csrf_token_input["value"] # type: ignore soup = await _post(sess, url, data) if soup.find(id="attributeRelease"): @@ -97,7 +96,7 @@ class FauShibbolethLogin: } await sess.post(cast(str, url), data=data) - + @staticmethod def _login_successful(soup: BeautifulSoup) -> bool: relay_state = soup.find("input", {"name": "RelayState"}) From 77c6e7d8168b9530e6fe88b9ca908ef1dc9e301c Mon Sep 17 00:00:00 2001 From: NIKL45 Date: Fri, 24 Oct 2025 21:25:07 +0200 Subject: [PATCH 05/15] created class SimpleSAMLLogin by duplicating the class ShibbolethLogin and making the two changes mentioned in Garmelon/PFERD/issues/126. TFA not tested yet. --- PFERD/crawl/ilias/simplesaml_login.py | 121 ++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 PFERD/crawl/ilias/simplesaml_login.py diff --git a/PFERD/crawl/ilias/simplesaml_login.py b/PFERD/crawl/ilias/simplesaml_login.py new file mode 100644 index 0000000..d6a629b --- /dev/null +++ b/PFERD/crawl/ilias/simplesaml_login.py @@ -0,0 +1,121 @@ +from typing import Any, Optional, cast + +import aiohttp +import yarl +from bs4 import BeautifulSoup, Tag + +from ...auth import Authenticator, TfaAuthenticator +from ...logging import log +from ...utils import soupify +from ..crawler import CrawlError + + +class SimpleSAMLLogin: + """ + Login via a SimpleSAML system. + + It performs a basic authentication by following the login redirect + and posting credentials to the indicated form. It also supports TFA similar to Shibboleth. + """ + + def __init__( + self, ilias_url: str, authenticator: Authenticator, tfa_authenticator: Optional[Authenticator] + ) -> None: + self._ilias_url = ilias_url + self._auth = authenticator + self._tfa_auth = tfa_authenticator + + async def login(self, sess: aiohttp.ClientSession) -> None: + """ + Perform a SimpleSAML login flow and populate the session cookies. + """ + + # Start at the local login entrypoint which may redirect to SimpleSAML + url = f"{self._ilias_url}/saml.php" + async with sess.get(url) as response: + saml_url = response.url + # If the redirect stayed on the ILIAS host, assume we're already logged in + if str(saml_url).startswith(self._ilias_url): + log.explain("ILIAS recognized our simple-saml token and logged us in in the background, returning") + return + soup: BeautifulSoup = soupify(await response.read()) + + # The SimpleSAML login page uses a form POST similar to Shibboleth. + # Attempt to login using credentials. + while not self._login_successful(soup): + form = cast(Tag, soup.find("form", {"method": "post"})) + action = cast(str, form["action"]) + if action.startswith("https"): # FAU uses full URL here + url = action + else: + url = str(saml_url.origin()) + action #KIT uses relative URL here + + username, password = await self._auth.credentials() + data = { + "username": username, + "password": password, + } + if csrf_token_input := form.find("input", {"name": "csrf_token"}): + data["csrf_token"] = csrf_token_input["value"] # type: ignore + + soup = await _post(sess, url, data) + + # Detect attribute release prompt + if soup.find(id="attributeRelease"): + raise CrawlError( + "ILIAS SimpleSAML entitlements changed! Please log in once in your browser and review them" + ) + + if self._tfa_required(soup): + soup = await self._authenticate_tfa(sess, soup, saml_url) + + if not self._login_successful(soup): + self._auth.invalidate_credentials() + + # Equivalent: Being redirected via JS automatically + # (or clicking "Continue" if you have JS disabled) + relay_state = cast(Tag, soup.find("input", {"name": "RelayState"})) + saml_response = cast(Tag, soup.find("input", {"name": "SAMLResponse"})) + url = cast(str, cast(Tag, soup.find("form", {"method": "post"}))["action"]) + data = { # using the info obtained in the while loop above + "RelayState": cast(str, relay_state["value"]), + "SAMLResponse": cast(str, saml_response["value"]), + } + await sess.post(cast(str, url), data=data) + + async def _authenticate_tfa( + self, session: aiohttp.ClientSession, soup: BeautifulSoup, saml_url: yarl.URL + ) -> BeautifulSoup: + if not self._tfa_auth: + self._tfa_auth = TfaAuthenticator("ilias-anon-tfa") + + tfa_token = await self._tfa_auth.password() + + # Searching the form here so that this fails before asking for + # credentials rather than after asking. + form = cast(Tag, soup.find("form", {"method": "post"})) + action = cast(str, form["action"]) + + url = str(saml_url.origin()) + action + data = { + "_eventId_proceed": "", + "fudis_otp_input": tfa_token, + } + if csrf_token_input := form.find("input", {"name": "csrf_token"}): + data["csrf_token"] = csrf_token_input["value"] # type: ignore + return await _post(session, url, data) + + @staticmethod + def _login_successful(soup: BeautifulSoup) -> bool: + relay_state = soup.find("input", {"name": "RelayState"}) + saml_response = soup.find("input", {"name": "SAMLResponse"}) + return relay_state is not None and saml_response is not None + + @staticmethod + def _tfa_required(soup: BeautifulSoup) -> bool: + return soup.find(id="fudiscr-form") is not None + + +async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup: + async with session.post(url, data=data) as response: + return soupify(await response.read()) From 8d37f42ce82f1b23acf8d828230ec08b2a22c96e Mon Sep 17 00:00:00 2001 From: NIKL45 Date: Fri, 24 Oct 2025 21:26:06 +0200 Subject: [PATCH 06/15] impemented SimpleSAMLLogin option into the IliasWebCrawler --- PFERD/crawl/ilias/ilias_web_crawler.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py index 12d8700..d8e96a3 100644 --- a/PFERD/crawl/ilias/ilias_web_crawler.py +++ b/PFERD/crawl/ilias/ilias_web_crawler.py @@ -32,6 +32,7 @@ from .kit_ilias_html import ( parse_ilias_forum_export, ) from .shibboleth_login import ShibbolethLogin +from .simplesaml_login import SimpleSAMLLogin TargetType = str | int @@ -49,12 +50,14 @@ class IliasWebCrawlerSection(HttpCrawlerSection): return base_url - def login(self) -> Literal["shibboleth"] | LoginTypeLocal: + def login(self) -> Literal["shibboleth", "simple-saml"] | LoginTypeLocal: login_type = self.s.get("login_type") if not login_type: self.missing_value("login_type") if login_type == "shibboleth": return "shibboleth" + if login_type == "simple-saml": + return "simple-saml" if login_type == "local": client_id = self.s.get("client_id") if not client_id: @@ -194,7 +197,14 @@ instance's greatest bottleneck. if isinstance(self._login_type, LoginTypeLocal): self._client_id = self._login_type.client_id else: - self._shibboleth_login = ShibbolethLogin(self._base_url, self._auth, self._tfa_auth) + # Allow multiple remote login backends + if self._login_type == "shibboleth": + self._shibboleth_login = ShibbolethLogin(self._base_url, self._auth, self._tfa_auth) + elif self._login_type == "simple-saml": + self._simplesaml_login = SimpleSAMLLogin(self._base_url, self._auth, self._tfa_auth) + else: + # Fallback to shibboleth to avoid breaking older configs + self._shibboleth_login = ShibbolethLogin(self._base_url, self._auth, self._tfa_auth) self._target = section.target() self._link_file_redirect_delay = section.link_redirect_delay() @@ -1044,6 +1054,8 @@ instance's greatest bottleneck. # fill the session with the correct cookies if self._login_type == "shibboleth": await self._shibboleth_login.login(self.session) + elif self._login_type == "simple-saml": + await self._simplesaml_login.login(self.session) else: params = { "client_id": self._client_id, From afbfea6ba5e1000736473b1a8c12e96fa7be477a Mon Sep 17 00:00:00 2001 From: NIKL45 Date: Fri, 24 Oct 2025 21:46:45 +0200 Subject: [PATCH 07/15] fixed and tested TFA (OTP) for FAU --- PFERD/crawl/ilias/simplesaml_login.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/PFERD/crawl/ilias/simplesaml_login.py b/PFERD/crawl/ilias/simplesaml_login.py index d6a629b..7357efc 100644 --- a/PFERD/crawl/ilias/simplesaml_login.py +++ b/PFERD/crawl/ilias/simplesaml_login.py @@ -96,10 +96,12 @@ class SimpleSAMLLogin: form = cast(Tag, soup.find("form", {"method": "post"})) action = cast(str, form["action"]) - url = str(saml_url.origin()) + action - data = { - "_eventId_proceed": "", - "fudis_otp_input": tfa_token, + if action.startswith("https"): # FAU uses full URL here + url = action + else: + url = str(saml_url.origin()) + action #KIT uses relative URL here + data = { # for www.sso.uni-erlangen.de/simplesaml/module.php/mfa/otp?... + "otp": tfa_token } if csrf_token_input := form.find("input", {"name": "csrf_token"}): data["csrf_token"] = csrf_token_input["value"] # type: ignore @@ -113,7 +115,9 @@ class SimpleSAMLLogin: @staticmethod def _tfa_required(soup: BeautifulSoup) -> bool: - return soup.find(id="fudiscr-form") is not None + # Also treat a body with id="mfa:otp" as TFA required (for FAU) + body = soup.find("body") + return body is not None and body.get("id") == "mfa:otp" async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup: From 2509711d62fdfed336dc948537b3344b55a111ca Mon Sep 17 00:00:00 2001 From: NIKL45 Date: Fri, 24 Oct 2025 21:59:51 +0200 Subject: [PATCH 08/15] checked and formatted --- PFERD/crawl/ilias/simplesaml_login.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/PFERD/crawl/ilias/simplesaml_login.py b/PFERD/crawl/ilias/simplesaml_login.py index 7357efc..85c137f 100644 --- a/PFERD/crawl/ilias/simplesaml_login.py +++ b/PFERD/crawl/ilias/simplesaml_login.py @@ -14,7 +14,7 @@ class SimpleSAMLLogin: """ Login via a SimpleSAML system. - It performs a basic authentication by following the login redirect + It performs a basic authentication by following the login redirect and posting credentials to the indicated form. It also supports TFA similar to Shibboleth. """ @@ -36,7 +36,7 @@ class SimpleSAMLLogin: saml_url = response.url # If the redirect stayed on the ILIAS host, assume we're already logged in if str(saml_url).startswith(self._ilias_url): - log.explain("ILIAS recognized our simple-saml token and logged us in in the background, returning") + log.explain("ILIAS recognized our SAML token and logged us in in the background, returning") return soup: BeautifulSoup = soupify(await response.read()) @@ -45,10 +45,8 @@ class SimpleSAMLLogin: while not self._login_successful(soup): form = cast(Tag, soup.find("form", {"method": "post"})) action = cast(str, form["action"]) - if action.startswith("https"): # FAU uses full URL here - url = action - else: - url = str(saml_url.origin()) + action #KIT uses relative URL here + # dynamically determine full URL from action (FAU uses full URL here, KIT uses relative URL) + url = action if action.startswith("https") else str(saml_url.origin()) + action username, password = await self._auth.credentials() data = { @@ -63,7 +61,7 @@ class SimpleSAMLLogin: # Detect attribute release prompt if soup.find(id="attributeRelease"): raise CrawlError( - "ILIAS SimpleSAML entitlements changed! Please log in once in your browser and review them" + "ILIAS SAML entitlements changed! Please log in once in your browser and review them" ) if self._tfa_required(soup): @@ -95,12 +93,10 @@ class SimpleSAMLLogin: # credentials rather than after asking. form = cast(Tag, soup.find("form", {"method": "post"})) action = cast(str, form["action"]) + # dynamically determine full URL from action (FAU uses full URL here, KIT uses relative URL) + url = action if action.startswith("https") else str(saml_url.origin()) + action - if action.startswith("https"): # FAU uses full URL here - url = action - else: - url = str(saml_url.origin()) + action #KIT uses relative URL here - data = { # for www.sso.uni-erlangen.de/simplesaml/module.php/mfa/otp?... + data = { # for www.sso.uni-erlangen.de/simplesaml/module.php/mfa/otp?... "otp": tfa_token } if csrf_token_input := form.find("input", {"name": "csrf_token"}): From b10b75d56b92a9d4c8ca1ea7785c2128dfaa0c59 Mon Sep 17 00:00:00 2001 From: NIKL45 Date: Fri, 24 Oct 2025 22:18:31 +0200 Subject: [PATCH 09/15] added simple-saml to documentation, added name to LICENSE --- CONFIG.md | 4 +++- LICENSE | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CONFIG.md b/CONFIG.md index 4bf082f..05f7f9c 100644 --- a/CONFIG.md +++ b/CONFIG.md @@ -172,6 +172,7 @@ out of the box for the corresponding universities: | Uni Stuttgart | https://ilias3.uni-stuttgart.de | local | Uni_Stuttgart | | Uni Tübingen | https://ovidius.uni-tuebingen.de/ilias3 | shibboleth | | | KIT ILIAS Pilot | https://pilot.ilias.studium.kit.edu | shibboleth | pilot | +| FAU StudOn | https://www.studon.fau.de/studon | simple-saml | StudOn | If your university isn't listed, try navigating to your instance's login page. Assuming no custom login service is used, the URL will look something like this: @@ -186,8 +187,9 @@ If the values work, feel free to submit a PR and add them to the table above. - `login_type`: How you authenticate. (Required) - `local`: Use `client_id` for authentication. - `shibboleth`: Use shibboleth for authentication. + - `simple-saml`: Use SimpleSAML based authentication. - `client_id`: An ID used for authentication if `login_type` is `local`. Is - ignored if `login_type` is `shibboleth`. + ignored if `login_type` is `shibboleth` or `simple-saml`. - `target`: The ILIAS element to crawl. (Required) - `desktop`: Crawl your personal desktop / dashboard - ``: Crawl the course with the given id diff --git a/LICENSE b/LICENSE index ccccbe3..6e965e3 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ Copyright 2019-2024 Garmelon, I-Al-Istannen, danstooamerican, pavelzw, TheChristophe, Scriptim, thelukasprobst, Toorero, - Mr-Pine, p-fruck, PinieP + Mr-Pine, p-fruck, PinieP, NIKL45 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in From c21a9f080d9aee48c9b157639fef05fe74b2de79 Mon Sep 17 00:00:00 2001 From: NIKL45 Date: Mon, 3 Nov 2025 21:24:27 +0100 Subject: [PATCH 10/15] Revert to commit 207af51aa49d021d2ea4fd774044a0772a103a08. Discard testing in master branch (moved to branch"quickNdirty-SAML"). --- .gitignore | 1 - PFERD/cli/__init__.py | 1 - PFERD/cli/command_fau_ilias_web.py | 37 ------- PFERD/crawl/__init__.py | 4 +- PFERD/crawl/ilias/__init__.py | 3 - PFERD/crawl/ilias/fau_ilias_web_crawler.py | 35 ------- PFERD/crawl/ilias/fau_shibboleth_login.py | 113 --------------------- 7 files changed, 1 insertion(+), 193 deletions(-) delete mode 100644 PFERD/cli/command_fau_ilias_web.py delete mode 100644 PFERD/crawl/ilias/fau_ilias_web_crawler.py delete mode 100644 PFERD/crawl/ilias/fau_shibboleth_login.py diff --git a/.gitignore b/.gitignore index 4a52339..36ab590 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,3 @@ __pycache__/ /pferd.spec /build/ /dist/ -pferd.conf diff --git a/PFERD/cli/__init__.py b/PFERD/cli/__init__.py index 0be8c33..c89f6f4 100644 --- a/PFERD/cli/__init__.py +++ b/PFERD/cli/__init__.py @@ -10,6 +10,5 @@ from . import command_local # noqa: F401 imported but unused from . import command_ilias_web # noqa: F401 imported but unused from . import command_kit_ilias_web # noqa: F401 imported but unused -from . import command_fau_ilias_web # noqa: F401 imported but unused from . import command_kit_ipd # noqa: F401 imported but unused from .parser import PARSER, ParserLoadError, load_default_section # noqa: F401 imported but unused diff --git a/PFERD/cli/command_fau_ilias_web.py b/PFERD/cli/command_fau_ilias_web.py deleted file mode 100644 index 7688783..0000000 --- a/PFERD/cli/command_fau_ilias_web.py +++ /dev/null @@ -1,37 +0,0 @@ -import argparse -import configparser - -from ..logging import log -from .common_ilias_args import configure_common_group_args, load_common -from .parser import CRAWLER_PARSER, SUBPARSERS, load_crawler - -COMMAND_NAME = "fau-ilias-web" - -SUBPARSER = SUBPARSERS.add_parser( - COMMAND_NAME, - parents=[CRAWLER_PARSER], -) - -GROUP = SUBPARSER.add_argument_group( - title=f"{COMMAND_NAME} crawler arguments", - description=f"arguments for the '{COMMAND_NAME}' crawler", -) - -configure_common_group_args(GROUP) - - -def load( - args: argparse.Namespace, - parser: configparser.ConfigParser, -) -> None: - log.explain(f"Creating config for command '{COMMAND_NAME}'") - - parser["crawl:ilias"] = {} - section = parser["crawl:ilias"] - load_crawler(args, section) - - section["type"] = COMMAND_NAME - load_common(section, args, parser) - - -SUBPARSER.set_defaults(command=load) diff --git a/PFERD/crawl/__init__.py b/PFERD/crawl/__init__.py index 22028de..9a0e080 100644 --- a/PFERD/crawl/__init__.py +++ b/PFERD/crawl/__init__.py @@ -4,7 +4,7 @@ from typing import Callable, Dict from ..auth import Authenticator from ..config import Config from .crawler import Crawler, CrawlError, CrawlerSection # noqa: F401 -from .ilias import IliasWebCrawler, IliasWebCrawlerSection, KitIliasWebCrawler, KitIliasWebCrawlerSection, FauIliasWebCrawler, FauIliasWebCrawlerSection +from .ilias import IliasWebCrawler, IliasWebCrawlerSection, KitIliasWebCrawler, KitIliasWebCrawlerSection from .kit_ipd_crawler import KitIpdCrawler, KitIpdCrawlerSection from .local_crawler import LocalCrawler, LocalCrawlerSection @@ -22,8 +22,6 @@ CRAWLERS: Dict[str, CrawlerConstructor] = { IliasWebCrawler(n, IliasWebCrawlerSection(s), c, a), "kit-ilias-web": lambda n, s, c, a: KitIliasWebCrawler(n, KitIliasWebCrawlerSection(s), c, a), - "fau-ilias-web": lambda n, s, c, a: - FauIliasWebCrawler(n, FauIliasWebCrawlerSection(s), c, a), "kit-ipd": lambda n, s, c, a: KitIpdCrawler(n, KitIpdCrawlerSection(s), c), } diff --git a/PFERD/crawl/ilias/__init__.py b/PFERD/crawl/ilias/__init__.py index 9f997e5..287bd3d 100644 --- a/PFERD/crawl/ilias/__init__.py +++ b/PFERD/crawl/ilias/__init__.py @@ -1,12 +1,9 @@ from .kit_ilias_web_crawler import (IliasWebCrawler, IliasWebCrawlerSection, KitIliasWebCrawler, KitIliasWebCrawlerSection) -from .fau_ilias_web_crawler import (FauIliasWebCrawler, FauIliasWebCrawlerSection) __all__ = [ "IliasWebCrawler", "IliasWebCrawlerSection", "KitIliasWebCrawler", "KitIliasWebCrawlerSection", - "FauIliasWebCrawler", - "FauIliasWebCrawlerSection", ] diff --git a/PFERD/crawl/ilias/fau_ilias_web_crawler.py b/PFERD/crawl/ilias/fau_ilias_web_crawler.py deleted file mode 100644 index f26b2d1..0000000 --- a/PFERD/crawl/ilias/fau_ilias_web_crawler.py +++ /dev/null @@ -1,35 +0,0 @@ -from typing import Dict, Literal - -from ...auth import Authenticator -from ...config import Config -from .ilias_web_crawler import IliasWebCrawler, IliasWebCrawlerSection -from .fau_shibboleth_login import FauShibbolethLogin - -_ILIAS_URL = "https://www.studon.fau.de/studon" - -class KitShibbolethBackgroundLoginSuccessful: - pass - -class FauIliasWebCrawlerSection(IliasWebCrawlerSection): - def base_url(self) -> str: - return _ILIAS_URL - - def login(self) -> Literal["shibboleth"]: - return "shibboleth" - - -class FauIliasWebCrawler(IliasWebCrawler): - def __init__( - self, - name: str, - section: FauIliasWebCrawlerSection, - config: Config, - authenticators: Dict[str, Authenticator], - ): - super().__init__(name, section, config, authenticators) - - self._shibboleth_login = FauShibbolethLogin( - _ILIAS_URL, - self._auth, - section.tfa_auth(authenticators), - ) diff --git a/PFERD/crawl/ilias/fau_shibboleth_login.py b/PFERD/crawl/ilias/fau_shibboleth_login.py deleted file mode 100644 index 892bf71..0000000 --- a/PFERD/crawl/ilias/fau_shibboleth_login.py +++ /dev/null @@ -1,113 +0,0 @@ -""" -FAU-specific Shibboleth login helper. - -This module duplicates the original KIT-targeted Shibboleth login implementation -but exposes the same API so it can be swapped in where FAU-specific tweaks are -required. Keep behaviour identical to the original unless changes are needed. -""" -from typing import Any, Optional, cast - -import aiohttp -import yarl -from bs4 import BeautifulSoup, Tag - -from ...auth import Authenticator -from ...logging import log -from ...utils import soupify -from ..crawler import CrawlError - - -class FauShibbolethLogin: - """ - Login via shibboleth system for FAU. - """ - - def __init__( - self, ilias_url: str, authenticator: Authenticator, tfa_authenticator: Optional[Authenticator] - ) -> None: - self._ilias_url = ilias_url - self._auth = authenticator - self._tfa_auth = tfa_authenticator - - async def login(self, sess: aiohttp.ClientSession) -> None: - """ - Performs the ILIAS Shibboleth authentication dance and saves the login - cookies it receieves. - - This function should only be called whenever it is detected that you're - not logged in. The cookies obtained should be good for a few minutes, - maybe even an hour or two. - """ - - # Equivalent: Click on "Bei StudOn via Single Sign-On anmelden" button in - # https://www.studon.fau.de/studon/login.php - url = f"{self._ilias_url}/saml.php" - async with sess.get(url) as response: - shib_url = response.url - if str(shib_url).startswith(self._ilias_url): - log.explain( - "ILIAS recognized our shib token and logged us in in the background, returning" - ) - return - soup: BeautifulSoup = soupify(await response.read()) - - # Attempt to login using credentials, if necessary - while not self._login_successful(soup): - # Searching the form here so that this fails before asking for - # credentials rather than after asking. - form = cast(Tag, soup.find("form", {"method": "post"})) - action = cast(str, form["action"]) - - # Equivalent: Enter credentials in - # https://idp.scc.kit.edu/idp/profile/SAML2/Redirect/SSO - if action.startswith("https"): # FAU uses full URL here - url = action - else: - url = str(shib_url.origin()) + action #KIT uses relative URL here - username, password = await self._auth.credentials() - data = { - "username": username, - "password": password - } - soup = await _post(sess, url, data) - - if soup.find(id="attributeRelease"): - raise CrawlError( - "ILIAS Shibboleth entitlements changed! " - "Please log in once in your browser and review them" - ) - - if self._tfa_required(soup): - raise CrawlError( - "Two-factor authentication is not yet supported for FAU Shibboleth login!" - ) - - if not self._login_successful(soup): - self._auth.invalidate_credentials() - - # Equivalent: Being redirected via JS automatically - # (or clicking "Continue" if you have JS disabled) - relay_state = cast(Tag, soup.find("input", {"name": "RelayState"})) - saml_response = cast(Tag, soup.find("input", {"name": "SAMLResponse"})) - url = form = soup.find("form", {"method": "post"})["action"] # type: ignore - data = { # using the info obtained in the while loop above - "RelayState": cast(str, relay_state["value"]), - "SAMLResponse": cast(str, saml_response["value"]), - } - await sess.post(cast(str, url), data=data) - - - @staticmethod - def _login_successful(soup: BeautifulSoup) -> bool: - relay_state = soup.find("input", {"name": "RelayState"}) - saml_response = soup.find("input", {"name": "SAMLResponse"}) - return relay_state is not None and saml_response is not None - - @staticmethod - def _tfa_required(soup: BeautifulSoup) -> bool: - return soup.find(id="fudiscr-form") is not None - - -async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup: - async with session.post(url, data=data) as response: - return soupify(await response.read()) From e59a9dbf2dc70696e48514b56ea364a934e6c286 Mon Sep 17 00:00:00 2001 From: NIKL45 Date: Mon, 3 Nov 2025 21:28:57 +0100 Subject: [PATCH 11/15] created class SimpleSAMLLogin by duplicating the class ShibbolethLogin and making the two changes mentioned in Garmelon/PFERD/issues/126. TFA not tested yet. --- PFERD/crawl/ilias/simplesaml_login.py | 121 ++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 PFERD/crawl/ilias/simplesaml_login.py diff --git a/PFERD/crawl/ilias/simplesaml_login.py b/PFERD/crawl/ilias/simplesaml_login.py new file mode 100644 index 0000000..d6a629b --- /dev/null +++ b/PFERD/crawl/ilias/simplesaml_login.py @@ -0,0 +1,121 @@ +from typing import Any, Optional, cast + +import aiohttp +import yarl +from bs4 import BeautifulSoup, Tag + +from ...auth import Authenticator, TfaAuthenticator +from ...logging import log +from ...utils import soupify +from ..crawler import CrawlError + + +class SimpleSAMLLogin: + """ + Login via a SimpleSAML system. + + It performs a basic authentication by following the login redirect + and posting credentials to the indicated form. It also supports TFA similar to Shibboleth. + """ + + def __init__( + self, ilias_url: str, authenticator: Authenticator, tfa_authenticator: Optional[Authenticator] + ) -> None: + self._ilias_url = ilias_url + self._auth = authenticator + self._tfa_auth = tfa_authenticator + + async def login(self, sess: aiohttp.ClientSession) -> None: + """ + Perform a SimpleSAML login flow and populate the session cookies. + """ + + # Start at the local login entrypoint which may redirect to SimpleSAML + url = f"{self._ilias_url}/saml.php" + async with sess.get(url) as response: + saml_url = response.url + # If the redirect stayed on the ILIAS host, assume we're already logged in + if str(saml_url).startswith(self._ilias_url): + log.explain("ILIAS recognized our simple-saml token and logged us in in the background, returning") + return + soup: BeautifulSoup = soupify(await response.read()) + + # The SimpleSAML login page uses a form POST similar to Shibboleth. + # Attempt to login using credentials. + while not self._login_successful(soup): + form = cast(Tag, soup.find("form", {"method": "post"})) + action = cast(str, form["action"]) + if action.startswith("https"): # FAU uses full URL here + url = action + else: + url = str(saml_url.origin()) + action #KIT uses relative URL here + + username, password = await self._auth.credentials() + data = { + "username": username, + "password": password, + } + if csrf_token_input := form.find("input", {"name": "csrf_token"}): + data["csrf_token"] = csrf_token_input["value"] # type: ignore + + soup = await _post(sess, url, data) + + # Detect attribute release prompt + if soup.find(id="attributeRelease"): + raise CrawlError( + "ILIAS SimpleSAML entitlements changed! Please log in once in your browser and review them" + ) + + if self._tfa_required(soup): + soup = await self._authenticate_tfa(sess, soup, saml_url) + + if not self._login_successful(soup): + self._auth.invalidate_credentials() + + # Equivalent: Being redirected via JS automatically + # (or clicking "Continue" if you have JS disabled) + relay_state = cast(Tag, soup.find("input", {"name": "RelayState"})) + saml_response = cast(Tag, soup.find("input", {"name": "SAMLResponse"})) + url = cast(str, cast(Tag, soup.find("form", {"method": "post"}))["action"]) + data = { # using the info obtained in the while loop above + "RelayState": cast(str, relay_state["value"]), + "SAMLResponse": cast(str, saml_response["value"]), + } + await sess.post(cast(str, url), data=data) + + async def _authenticate_tfa( + self, session: aiohttp.ClientSession, soup: BeautifulSoup, saml_url: yarl.URL + ) -> BeautifulSoup: + if not self._tfa_auth: + self._tfa_auth = TfaAuthenticator("ilias-anon-tfa") + + tfa_token = await self._tfa_auth.password() + + # Searching the form here so that this fails before asking for + # credentials rather than after asking. + form = cast(Tag, soup.find("form", {"method": "post"})) + action = cast(str, form["action"]) + + url = str(saml_url.origin()) + action + data = { + "_eventId_proceed": "", + "fudis_otp_input": tfa_token, + } + if csrf_token_input := form.find("input", {"name": "csrf_token"}): + data["csrf_token"] = csrf_token_input["value"] # type: ignore + return await _post(session, url, data) + + @staticmethod + def _login_successful(soup: BeautifulSoup) -> bool: + relay_state = soup.find("input", {"name": "RelayState"}) + saml_response = soup.find("input", {"name": "SAMLResponse"}) + return relay_state is not None and saml_response is not None + + @staticmethod + def _tfa_required(soup: BeautifulSoup) -> bool: + return soup.find(id="fudiscr-form") is not None + + +async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup: + async with session.post(url, data=data) as response: + return soupify(await response.read()) From 31d2c86a05b0a48dd47680b28aa87edbafacfbf0 Mon Sep 17 00:00:00 2001 From: NIKL45 Date: Mon, 3 Nov 2025 21:28:57 +0100 Subject: [PATCH 12/15] impemented SimpleSAMLLogin option into the IliasWebCrawler --- PFERD/crawl/ilias/ilias_web_crawler.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/PFERD/crawl/ilias/ilias_web_crawler.py b/PFERD/crawl/ilias/ilias_web_crawler.py index 12d8700..d8e96a3 100644 --- a/PFERD/crawl/ilias/ilias_web_crawler.py +++ b/PFERD/crawl/ilias/ilias_web_crawler.py @@ -32,6 +32,7 @@ from .kit_ilias_html import ( parse_ilias_forum_export, ) from .shibboleth_login import ShibbolethLogin +from .simplesaml_login import SimpleSAMLLogin TargetType = str | int @@ -49,12 +50,14 @@ class IliasWebCrawlerSection(HttpCrawlerSection): return base_url - def login(self) -> Literal["shibboleth"] | LoginTypeLocal: + def login(self) -> Literal["shibboleth", "simple-saml"] | LoginTypeLocal: login_type = self.s.get("login_type") if not login_type: self.missing_value("login_type") if login_type == "shibboleth": return "shibboleth" + if login_type == "simple-saml": + return "simple-saml" if login_type == "local": client_id = self.s.get("client_id") if not client_id: @@ -194,7 +197,14 @@ instance's greatest bottleneck. if isinstance(self._login_type, LoginTypeLocal): self._client_id = self._login_type.client_id else: - self._shibboleth_login = ShibbolethLogin(self._base_url, self._auth, self._tfa_auth) + # Allow multiple remote login backends + if self._login_type == "shibboleth": + self._shibboleth_login = ShibbolethLogin(self._base_url, self._auth, self._tfa_auth) + elif self._login_type == "simple-saml": + self._simplesaml_login = SimpleSAMLLogin(self._base_url, self._auth, self._tfa_auth) + else: + # Fallback to shibboleth to avoid breaking older configs + self._shibboleth_login = ShibbolethLogin(self._base_url, self._auth, self._tfa_auth) self._target = section.target() self._link_file_redirect_delay = section.link_redirect_delay() @@ -1044,6 +1054,8 @@ instance's greatest bottleneck. # fill the session with the correct cookies if self._login_type == "shibboleth": await self._shibboleth_login.login(self.session) + elif self._login_type == "simple-saml": + await self._simplesaml_login.login(self.session) else: params = { "client_id": self._client_id, From 40715d648b0a6940329f0c4663473452f27be09d Mon Sep 17 00:00:00 2001 From: NIKL45 Date: Mon, 3 Nov 2025 21:28:57 +0100 Subject: [PATCH 13/15] fixed and tested TFA (OTP) for FAU --- PFERD/crawl/ilias/simplesaml_login.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/PFERD/crawl/ilias/simplesaml_login.py b/PFERD/crawl/ilias/simplesaml_login.py index d6a629b..7357efc 100644 --- a/PFERD/crawl/ilias/simplesaml_login.py +++ b/PFERD/crawl/ilias/simplesaml_login.py @@ -96,10 +96,12 @@ class SimpleSAMLLogin: form = cast(Tag, soup.find("form", {"method": "post"})) action = cast(str, form["action"]) - url = str(saml_url.origin()) + action - data = { - "_eventId_proceed": "", - "fudis_otp_input": tfa_token, + if action.startswith("https"): # FAU uses full URL here + url = action + else: + url = str(saml_url.origin()) + action #KIT uses relative URL here + data = { # for www.sso.uni-erlangen.de/simplesaml/module.php/mfa/otp?... + "otp": tfa_token } if csrf_token_input := form.find("input", {"name": "csrf_token"}): data["csrf_token"] = csrf_token_input["value"] # type: ignore @@ -113,7 +115,9 @@ class SimpleSAMLLogin: @staticmethod def _tfa_required(soup: BeautifulSoup) -> bool: - return soup.find(id="fudiscr-form") is not None + # Also treat a body with id="mfa:otp" as TFA required (for FAU) + body = soup.find("body") + return body is not None and body.get("id") == "mfa:otp" async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup: From 31758e7cfb6bd474fde9f9ec271f812428cdf653 Mon Sep 17 00:00:00 2001 From: NIKL45 Date: Mon, 3 Nov 2025 21:28:57 +0100 Subject: [PATCH 14/15] checked and formatted --- PFERD/crawl/ilias/simplesaml_login.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/PFERD/crawl/ilias/simplesaml_login.py b/PFERD/crawl/ilias/simplesaml_login.py index 7357efc..85c137f 100644 --- a/PFERD/crawl/ilias/simplesaml_login.py +++ b/PFERD/crawl/ilias/simplesaml_login.py @@ -14,7 +14,7 @@ class SimpleSAMLLogin: """ Login via a SimpleSAML system. - It performs a basic authentication by following the login redirect + It performs a basic authentication by following the login redirect and posting credentials to the indicated form. It also supports TFA similar to Shibboleth. """ @@ -36,7 +36,7 @@ class SimpleSAMLLogin: saml_url = response.url # If the redirect stayed on the ILIAS host, assume we're already logged in if str(saml_url).startswith(self._ilias_url): - log.explain("ILIAS recognized our simple-saml token and logged us in in the background, returning") + log.explain("ILIAS recognized our SAML token and logged us in in the background, returning") return soup: BeautifulSoup = soupify(await response.read()) @@ -45,10 +45,8 @@ class SimpleSAMLLogin: while not self._login_successful(soup): form = cast(Tag, soup.find("form", {"method": "post"})) action = cast(str, form["action"]) - if action.startswith("https"): # FAU uses full URL here - url = action - else: - url = str(saml_url.origin()) + action #KIT uses relative URL here + # dynamically determine full URL from action (FAU uses full URL here, KIT uses relative URL) + url = action if action.startswith("https") else str(saml_url.origin()) + action username, password = await self._auth.credentials() data = { @@ -63,7 +61,7 @@ class SimpleSAMLLogin: # Detect attribute release prompt if soup.find(id="attributeRelease"): raise CrawlError( - "ILIAS SimpleSAML entitlements changed! Please log in once in your browser and review them" + "ILIAS SAML entitlements changed! Please log in once in your browser and review them" ) if self._tfa_required(soup): @@ -95,12 +93,10 @@ class SimpleSAMLLogin: # credentials rather than after asking. form = cast(Tag, soup.find("form", {"method": "post"})) action = cast(str, form["action"]) + # dynamically determine full URL from action (FAU uses full URL here, KIT uses relative URL) + url = action if action.startswith("https") else str(saml_url.origin()) + action - if action.startswith("https"): # FAU uses full URL here - url = action - else: - url = str(saml_url.origin()) + action #KIT uses relative URL here - data = { # for www.sso.uni-erlangen.de/simplesaml/module.php/mfa/otp?... + data = { # for www.sso.uni-erlangen.de/simplesaml/module.php/mfa/otp?... "otp": tfa_token } if csrf_token_input := form.find("input", {"name": "csrf_token"}): From 5ba56f71c9c0a6bc2bb980b4edee76c16f42e014 Mon Sep 17 00:00:00 2001 From: NIKL45 Date: Mon, 3 Nov 2025 21:28:57 +0100 Subject: [PATCH 15/15] added simple-saml to documentation, added name to LICENSE --- CONFIG.md | 4 +++- LICENSE | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CONFIG.md b/CONFIG.md index 4bf082f..05f7f9c 100644 --- a/CONFIG.md +++ b/CONFIG.md @@ -172,6 +172,7 @@ out of the box for the corresponding universities: | Uni Stuttgart | https://ilias3.uni-stuttgart.de | local | Uni_Stuttgart | | Uni Tübingen | https://ovidius.uni-tuebingen.de/ilias3 | shibboleth | | | KIT ILIAS Pilot | https://pilot.ilias.studium.kit.edu | shibboleth | pilot | +| FAU StudOn | https://www.studon.fau.de/studon | simple-saml | StudOn | If your university isn't listed, try navigating to your instance's login page. Assuming no custom login service is used, the URL will look something like this: @@ -186,8 +187,9 @@ If the values work, feel free to submit a PR and add them to the table above. - `login_type`: How you authenticate. (Required) - `local`: Use `client_id` for authentication. - `shibboleth`: Use shibboleth for authentication. + - `simple-saml`: Use SimpleSAML based authentication. - `client_id`: An ID used for authentication if `login_type` is `local`. Is - ignored if `login_type` is `shibboleth`. + ignored if `login_type` is `shibboleth` or `simple-saml`. - `target`: The ILIAS element to crawl. (Required) - `desktop`: Crawl your personal desktop / dashboard - ``: Crawl the course with the given id diff --git a/LICENSE b/LICENSE index ccccbe3..6e965e3 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ Copyright 2019-2024 Garmelon, I-Al-Istannen, danstooamerican, pavelzw, TheChristophe, Scriptim, thelukasprobst, Toorero, - Mr-Pine, p-fruck, PinieP + Mr-Pine, p-fruck, PinieP, NIKL45 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in