This commit is contained in:
Niklas 2026-01-21 00:54:46 +00:00 committed by GitHub
commit a0b348785a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 139 additions and 4 deletions

View file

@ -173,6 +173,7 @@ out of the box for the corresponding universities:
| Uni Stuttgart | https://ilias3.uni-stuttgart.de | local | Uni_Stuttgart |
| Uni Tübingen | https://ovidius.uni-tuebingen.de/ilias3 | shibboleth | |
| KIT ILIAS Pilot | https://pilot.ilias.studium.kit.edu | shibboleth | pilot |
| FAU StudOn | https://www.studon.fau.de/studon | simple-saml | StudOn |
If your university isn't listed, try navigating to your instance's login page.
Assuming no custom login service is used, the URL will look something like this:
@ -187,8 +188,9 @@ If the values work, feel free to submit a PR and add them to the table above.
- `login_type`: How you authenticate. (Required)
- `local`: Use `client_id` for authentication.
- `shibboleth`: Use shibboleth for authentication.
- `simple-saml`: Use SimpleSAML based authentication.
- `client_id`: An ID used for authentication if `login_type` is `local`. Is
ignored if `login_type` is `shibboleth`.
ignored if `login_type` is `shibboleth` or `simple-saml`.
- `target`: The ILIAS element to crawl. (Required)
- `desktop`: Crawl your personal desktop / dashboard
- `<course id>`: Crawl the course with the given id

View file

@ -1,6 +1,6 @@
Copyright 2019-2024 Garmelon, I-Al-Istannen, danstooamerican, pavelzw,
TheChristophe, Scriptim, thelukasprobst, Toorero,
Mr-Pine, p-fruck, PinieP
Mr-Pine, p-fruck, PinieP, NIKL45
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in

View file

@ -31,6 +31,7 @@ from .kit_ilias_html import (
parse_ilias_forum_export,
)
from .shibboleth_login import ShibbolethLogin
from .simplesaml_login import SimpleSAMLLogin
TargetType = str | int
@ -48,12 +49,14 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
return base_url
def login(self) -> Literal["shibboleth"] | LoginTypeLocal:
def login(self) -> Literal["shibboleth", "simple-saml"] | LoginTypeLocal:
login_type = self.s.get("login_type")
if not login_type:
self.missing_value("login_type")
if login_type == "shibboleth":
return "shibboleth"
if login_type == "simple-saml":
return "simple-saml"
if login_type == "local":
client_id = self.s.get("client_id")
if not client_id:
@ -193,7 +196,14 @@ instance's greatest bottleneck.
if isinstance(self._login_type, LoginTypeLocal):
self._client_id = self._login_type.client_id
else:
self._shibboleth_login = ShibbolethLogin(self._base_url, self._auth, self._tfa_auth)
# Allow multiple remote login backends
if self._login_type == "shibboleth":
self._shibboleth_login = ShibbolethLogin(self._base_url, self._auth, self._tfa_auth)
elif self._login_type == "simple-saml":
self._simplesaml_login = SimpleSAMLLogin(self._base_url, self._auth, self._tfa_auth)
else:
# Fallback to shibboleth to avoid breaking older configs
self._shibboleth_login = ShibbolethLogin(self._base_url, self._auth, self._tfa_auth)
self._target = section.target()
self._link_file_redirect_delay = section.link_redirect_delay()
@ -1045,6 +1055,8 @@ instance's greatest bottleneck.
# fill the session with the correct cookies
if self._login_type == "shibboleth":
await self._shibboleth_login.login(self.session)
elif self._login_type == "simple-saml":
await self._simplesaml_login.login(self.session)
else:
params = {
"client_id": self._client_id,

View file

@ -0,0 +1,121 @@
from typing import Any, Optional, cast
import aiohttp
import yarl
from bs4 import BeautifulSoup, Tag
from ...auth import Authenticator, TfaAuthenticator
from ...logging import log
from ...utils import soupify
from ..crawler import CrawlError
class SimpleSAMLLogin:
"""
Login via a SimpleSAML system.
It performs a basic authentication by following the login redirect
and posting credentials to the indicated form. It also supports TFA similar to Shibboleth.
"""
def __init__(
self, ilias_url: str, authenticator: Authenticator, tfa_authenticator: Optional[Authenticator]
) -> None:
self._ilias_url = ilias_url
self._auth = authenticator
self._tfa_auth = tfa_authenticator
async def login(self, sess: aiohttp.ClientSession) -> None:
"""
Perform a SimpleSAML login flow and populate the session cookies.
"""
# Start at the local login entrypoint which may redirect to SimpleSAML
url = f"{self._ilias_url}/saml.php"
async with sess.get(url) as response:
saml_url = response.url
# If the redirect stayed on the ILIAS host, assume we're already logged in
if str(saml_url).startswith(self._ilias_url):
log.explain("ILIAS recognized our SAML token and logged us in in the background, returning")
return
soup: BeautifulSoup = soupify(await response.read())
# The SimpleSAML login page uses a form POST similar to Shibboleth.
# Attempt to login using credentials.
while not self._login_successful(soup):
form = cast(Tag, soup.find("form", {"method": "post"}))
action = cast(str, form["action"])
# dynamically determine full URL from action (FAU uses full URL here, KIT uses relative URL)
url = action if action.startswith("https") else str(saml_url.origin()) + action
username, password = await self._auth.credentials()
data = {
"username": username,
"password": password,
}
if csrf_token_input := form.find("input", {"name": "csrf_token"}):
data["csrf_token"] = csrf_token_input["value"] # type: ignore
soup = await _post(sess, url, data)
# Detect attribute release prompt
if soup.find(id="attributeRelease"):
raise CrawlError(
"ILIAS SAML entitlements changed! Please log in once in your browser and review them"
)
if self._tfa_required(soup):
soup = await self._authenticate_tfa(sess, soup, saml_url)
if not self._login_successful(soup):
self._auth.invalidate_credentials()
# Equivalent: Being redirected via JS automatically
# (or clicking "Continue" if you have JS disabled)
relay_state = cast(Tag, soup.find("input", {"name": "RelayState"}))
saml_response = cast(Tag, soup.find("input", {"name": "SAMLResponse"}))
url = cast(str, cast(Tag, soup.find("form", {"method": "post"}))["action"])
data = { # using the info obtained in the while loop above
"RelayState": cast(str, relay_state["value"]),
"SAMLResponse": cast(str, saml_response["value"]),
}
await sess.post(cast(str, url), data=data)
async def _authenticate_tfa(
self, session: aiohttp.ClientSession, soup: BeautifulSoup, saml_url: yarl.URL
) -> BeautifulSoup:
if not self._tfa_auth:
self._tfa_auth = TfaAuthenticator("ilias-anon-tfa")
tfa_token = await self._tfa_auth.password()
# Searching the form here so that this fails before asking for
# credentials rather than after asking.
form = cast(Tag, soup.find("form", {"method": "post"}))
action = cast(str, form["action"])
# dynamically determine full URL from action (FAU uses full URL here, KIT uses relative URL)
url = action if action.startswith("https") else str(saml_url.origin()) + action
data = { # for www.sso.uni-erlangen.de/simplesaml/module.php/mfa/otp?...
"otp": tfa_token
}
if csrf_token_input := form.find("input", {"name": "csrf_token"}):
data["csrf_token"] = csrf_token_input["value"] # type: ignore
return await _post(session, url, data)
@staticmethod
def _login_successful(soup: BeautifulSoup) -> bool:
relay_state = soup.find("input", {"name": "RelayState"})
saml_response = soup.find("input", {"name": "SAMLResponse"})
return relay_state is not None and saml_response is not None
@staticmethod
def _tfa_required(soup: BeautifulSoup) -> bool:
# Also treat a body with id="mfa:otp" as TFA required (for FAU)
body = soup.find("body")
return body is not None and body.get("id") == "mfa:otp"
async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup:
async with session.post(url, data=data) as response:
return soupify(await response.read())