mirror of
https://github.com/Garmelon/PFERD.git
synced 2026-04-12 15:35:05 +02:00
Merge afe5819388 into e246053de2
This commit is contained in:
commit
a0b348785a
4 changed files with 139 additions and 4 deletions
|
|
@ -173,6 +173,7 @@ out of the box for the corresponding universities:
|
||||||
| Uni Stuttgart | https://ilias3.uni-stuttgart.de | local | Uni_Stuttgart |
|
| Uni Stuttgart | https://ilias3.uni-stuttgart.de | local | Uni_Stuttgart |
|
||||||
| Uni Tübingen | https://ovidius.uni-tuebingen.de/ilias3 | shibboleth | |
|
| Uni Tübingen | https://ovidius.uni-tuebingen.de/ilias3 | shibboleth | |
|
||||||
| KIT ILIAS Pilot | https://pilot.ilias.studium.kit.edu | shibboleth | pilot |
|
| KIT ILIAS Pilot | https://pilot.ilias.studium.kit.edu | shibboleth | pilot |
|
||||||
|
| FAU StudOn | https://www.studon.fau.de/studon | simple-saml | StudOn |
|
||||||
|
|
||||||
If your university isn't listed, try navigating to your instance's login page.
|
If your university isn't listed, try navigating to your instance's login page.
|
||||||
Assuming no custom login service is used, the URL will look something like this:
|
Assuming no custom login service is used, the URL will look something like this:
|
||||||
|
|
@ -187,8 +188,9 @@ If the values work, feel free to submit a PR and add them to the table above.
|
||||||
- `login_type`: How you authenticate. (Required)
|
- `login_type`: How you authenticate. (Required)
|
||||||
- `local`: Use `client_id` for authentication.
|
- `local`: Use `client_id` for authentication.
|
||||||
- `shibboleth`: Use shibboleth for authentication.
|
- `shibboleth`: Use shibboleth for authentication.
|
||||||
|
- `simple-saml`: Use SimpleSAML based authentication.
|
||||||
- `client_id`: An ID used for authentication if `login_type` is `local`. Is
|
- `client_id`: An ID used for authentication if `login_type` is `local`. Is
|
||||||
ignored if `login_type` is `shibboleth`.
|
ignored if `login_type` is `shibboleth` or `simple-saml`.
|
||||||
- `target`: The ILIAS element to crawl. (Required)
|
- `target`: The ILIAS element to crawl. (Required)
|
||||||
- `desktop`: Crawl your personal desktop / dashboard
|
- `desktop`: Crawl your personal desktop / dashboard
|
||||||
- `<course id>`: Crawl the course with the given id
|
- `<course id>`: Crawl the course with the given id
|
||||||
|
|
|
||||||
2
LICENSE
2
LICENSE
|
|
@ -1,6 +1,6 @@
|
||||||
Copyright 2019-2024 Garmelon, I-Al-Istannen, danstooamerican, pavelzw,
|
Copyright 2019-2024 Garmelon, I-Al-Istannen, danstooamerican, pavelzw,
|
||||||
TheChristophe, Scriptim, thelukasprobst, Toorero,
|
TheChristophe, Scriptim, thelukasprobst, Toorero,
|
||||||
Mr-Pine, p-fruck, PinieP
|
Mr-Pine, p-fruck, PinieP, NIKL45
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
this software and associated documentation files (the "Software"), to deal in
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,7 @@ from .kit_ilias_html import (
|
||||||
parse_ilias_forum_export,
|
parse_ilias_forum_export,
|
||||||
)
|
)
|
||||||
from .shibboleth_login import ShibbolethLogin
|
from .shibboleth_login import ShibbolethLogin
|
||||||
|
from .simplesaml_login import SimpleSAMLLogin
|
||||||
|
|
||||||
TargetType = str | int
|
TargetType = str | int
|
||||||
|
|
||||||
|
|
@ -48,12 +49,14 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
|
||||||
|
|
||||||
return base_url
|
return base_url
|
||||||
|
|
||||||
def login(self) -> Literal["shibboleth"] | LoginTypeLocal:
|
def login(self) -> Literal["shibboleth", "simple-saml"] | LoginTypeLocal:
|
||||||
login_type = self.s.get("login_type")
|
login_type = self.s.get("login_type")
|
||||||
if not login_type:
|
if not login_type:
|
||||||
self.missing_value("login_type")
|
self.missing_value("login_type")
|
||||||
if login_type == "shibboleth":
|
if login_type == "shibboleth":
|
||||||
return "shibboleth"
|
return "shibboleth"
|
||||||
|
if login_type == "simple-saml":
|
||||||
|
return "simple-saml"
|
||||||
if login_type == "local":
|
if login_type == "local":
|
||||||
client_id = self.s.get("client_id")
|
client_id = self.s.get("client_id")
|
||||||
if not client_id:
|
if not client_id:
|
||||||
|
|
@ -193,6 +196,13 @@ instance's greatest bottleneck.
|
||||||
if isinstance(self._login_type, LoginTypeLocal):
|
if isinstance(self._login_type, LoginTypeLocal):
|
||||||
self._client_id = self._login_type.client_id
|
self._client_id = self._login_type.client_id
|
||||||
else:
|
else:
|
||||||
|
# Allow multiple remote login backends
|
||||||
|
if self._login_type == "shibboleth":
|
||||||
|
self._shibboleth_login = ShibbolethLogin(self._base_url, self._auth, self._tfa_auth)
|
||||||
|
elif self._login_type == "simple-saml":
|
||||||
|
self._simplesaml_login = SimpleSAMLLogin(self._base_url, self._auth, self._tfa_auth)
|
||||||
|
else:
|
||||||
|
# Fallback to shibboleth to avoid breaking older configs
|
||||||
self._shibboleth_login = ShibbolethLogin(self._base_url, self._auth, self._tfa_auth)
|
self._shibboleth_login = ShibbolethLogin(self._base_url, self._auth, self._tfa_auth)
|
||||||
|
|
||||||
self._target = section.target()
|
self._target = section.target()
|
||||||
|
|
@ -1045,6 +1055,8 @@ instance's greatest bottleneck.
|
||||||
# fill the session with the correct cookies
|
# fill the session with the correct cookies
|
||||||
if self._login_type == "shibboleth":
|
if self._login_type == "shibboleth":
|
||||||
await self._shibboleth_login.login(self.session)
|
await self._shibboleth_login.login(self.session)
|
||||||
|
elif self._login_type == "simple-saml":
|
||||||
|
await self._simplesaml_login.login(self.session)
|
||||||
else:
|
else:
|
||||||
params = {
|
params = {
|
||||||
"client_id": self._client_id,
|
"client_id": self._client_id,
|
||||||
|
|
|
||||||
121
PFERD/crawl/ilias/simplesaml_login.py
Normal file
121
PFERD/crawl/ilias/simplesaml_login.py
Normal file
|
|
@ -0,0 +1,121 @@
|
||||||
|
from typing import Any, Optional, cast
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
import yarl
|
||||||
|
from bs4 import BeautifulSoup, Tag
|
||||||
|
|
||||||
|
from ...auth import Authenticator, TfaAuthenticator
|
||||||
|
from ...logging import log
|
||||||
|
from ...utils import soupify
|
||||||
|
from ..crawler import CrawlError
|
||||||
|
|
||||||
|
|
||||||
|
class SimpleSAMLLogin:
|
||||||
|
"""
|
||||||
|
Login via a SimpleSAML system.
|
||||||
|
|
||||||
|
It performs a basic authentication by following the login redirect
|
||||||
|
and posting credentials to the indicated form. It also supports TFA similar to Shibboleth.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self, ilias_url: str, authenticator: Authenticator, tfa_authenticator: Optional[Authenticator]
|
||||||
|
) -> None:
|
||||||
|
self._ilias_url = ilias_url
|
||||||
|
self._auth = authenticator
|
||||||
|
self._tfa_auth = tfa_authenticator
|
||||||
|
|
||||||
|
async def login(self, sess: aiohttp.ClientSession) -> None:
|
||||||
|
"""
|
||||||
|
Perform a SimpleSAML login flow and populate the session cookies.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Start at the local login entrypoint which may redirect to SimpleSAML
|
||||||
|
url = f"{self._ilias_url}/saml.php"
|
||||||
|
async with sess.get(url) as response:
|
||||||
|
saml_url = response.url
|
||||||
|
# If the redirect stayed on the ILIAS host, assume we're already logged in
|
||||||
|
if str(saml_url).startswith(self._ilias_url):
|
||||||
|
log.explain("ILIAS recognized our SAML token and logged us in in the background, returning")
|
||||||
|
return
|
||||||
|
soup: BeautifulSoup = soupify(await response.read())
|
||||||
|
|
||||||
|
# The SimpleSAML login page uses a form POST similar to Shibboleth.
|
||||||
|
# Attempt to login using credentials.
|
||||||
|
while not self._login_successful(soup):
|
||||||
|
form = cast(Tag, soup.find("form", {"method": "post"}))
|
||||||
|
action = cast(str, form["action"])
|
||||||
|
# dynamically determine full URL from action (FAU uses full URL here, KIT uses relative URL)
|
||||||
|
url = action if action.startswith("https") else str(saml_url.origin()) + action
|
||||||
|
|
||||||
|
username, password = await self._auth.credentials()
|
||||||
|
data = {
|
||||||
|
"username": username,
|
||||||
|
"password": password,
|
||||||
|
}
|
||||||
|
if csrf_token_input := form.find("input", {"name": "csrf_token"}):
|
||||||
|
data["csrf_token"] = csrf_token_input["value"] # type: ignore
|
||||||
|
|
||||||
|
soup = await _post(sess, url, data)
|
||||||
|
|
||||||
|
# Detect attribute release prompt
|
||||||
|
if soup.find(id="attributeRelease"):
|
||||||
|
raise CrawlError(
|
||||||
|
"ILIAS SAML entitlements changed! Please log in once in your browser and review them"
|
||||||
|
)
|
||||||
|
|
||||||
|
if self._tfa_required(soup):
|
||||||
|
soup = await self._authenticate_tfa(sess, soup, saml_url)
|
||||||
|
|
||||||
|
if not self._login_successful(soup):
|
||||||
|
self._auth.invalidate_credentials()
|
||||||
|
|
||||||
|
# Equivalent: Being redirected via JS automatically
|
||||||
|
# (or clicking "Continue" if you have JS disabled)
|
||||||
|
relay_state = cast(Tag, soup.find("input", {"name": "RelayState"}))
|
||||||
|
saml_response = cast(Tag, soup.find("input", {"name": "SAMLResponse"}))
|
||||||
|
url = cast(str, cast(Tag, soup.find("form", {"method": "post"}))["action"])
|
||||||
|
data = { # using the info obtained in the while loop above
|
||||||
|
"RelayState": cast(str, relay_state["value"]),
|
||||||
|
"SAMLResponse": cast(str, saml_response["value"]),
|
||||||
|
}
|
||||||
|
await sess.post(cast(str, url), data=data)
|
||||||
|
|
||||||
|
async def _authenticate_tfa(
|
||||||
|
self, session: aiohttp.ClientSession, soup: BeautifulSoup, saml_url: yarl.URL
|
||||||
|
) -> BeautifulSoup:
|
||||||
|
if not self._tfa_auth:
|
||||||
|
self._tfa_auth = TfaAuthenticator("ilias-anon-tfa")
|
||||||
|
|
||||||
|
tfa_token = await self._tfa_auth.password()
|
||||||
|
|
||||||
|
# Searching the form here so that this fails before asking for
|
||||||
|
# credentials rather than after asking.
|
||||||
|
form = cast(Tag, soup.find("form", {"method": "post"}))
|
||||||
|
action = cast(str, form["action"])
|
||||||
|
# dynamically determine full URL from action (FAU uses full URL here, KIT uses relative URL)
|
||||||
|
url = action if action.startswith("https") else str(saml_url.origin()) + action
|
||||||
|
|
||||||
|
data = { # for www.sso.uni-erlangen.de/simplesaml/module.php/mfa/otp?...
|
||||||
|
"otp": tfa_token
|
||||||
|
}
|
||||||
|
if csrf_token_input := form.find("input", {"name": "csrf_token"}):
|
||||||
|
data["csrf_token"] = csrf_token_input["value"] # type: ignore
|
||||||
|
return await _post(session, url, data)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _login_successful(soup: BeautifulSoup) -> bool:
|
||||||
|
relay_state = soup.find("input", {"name": "RelayState"})
|
||||||
|
saml_response = soup.find("input", {"name": "SAMLResponse"})
|
||||||
|
return relay_state is not None and saml_response is not None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _tfa_required(soup: BeautifulSoup) -> bool:
|
||||||
|
# Also treat a body with id="mfa:otp" as TFA required (for FAU)
|
||||||
|
body = soup.find("body")
|
||||||
|
return body is not None and body.get("id") == "mfa:otp"
|
||||||
|
|
||||||
|
|
||||||
|
async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup:
|
||||||
|
async with session.post(url, data=data) as response:
|
||||||
|
return soupify(await response.read())
|
||||||
Loading…
Add table
Add a link
Reference in a new issue