mirror of
https://github.com/Garmelon/PFERD.git
synced 2026-04-12 07:25:04 +02:00
Merge afe5819388 into e246053de2
This commit is contained in:
commit
a0b348785a
4 changed files with 139 additions and 4 deletions
|
|
@ -173,6 +173,7 @@ out of the box for the corresponding universities:
|
|||
| Uni Stuttgart | https://ilias3.uni-stuttgart.de | local | Uni_Stuttgart |
|
||||
| Uni Tübingen | https://ovidius.uni-tuebingen.de/ilias3 | shibboleth | |
|
||||
| KIT ILIAS Pilot | https://pilot.ilias.studium.kit.edu | shibboleth | pilot |
|
||||
| FAU StudOn | https://www.studon.fau.de/studon | simple-saml | StudOn |
|
||||
|
||||
If your university isn't listed, try navigating to your instance's login page.
|
||||
Assuming no custom login service is used, the URL will look something like this:
|
||||
|
|
@ -187,8 +188,9 @@ If the values work, feel free to submit a PR and add them to the table above.
|
|||
- `login_type`: How you authenticate. (Required)
|
||||
- `local`: Use `client_id` for authentication.
|
||||
- `shibboleth`: Use shibboleth for authentication.
|
||||
- `simple-saml`: Use SimpleSAML based authentication.
|
||||
- `client_id`: An ID used for authentication if `login_type` is `local`. Is
|
||||
ignored if `login_type` is `shibboleth`.
|
||||
ignored if `login_type` is `shibboleth` or `simple-saml`.
|
||||
- `target`: The ILIAS element to crawl. (Required)
|
||||
- `desktop`: Crawl your personal desktop / dashboard
|
||||
- `<course id>`: Crawl the course with the given id
|
||||
|
|
|
|||
2
LICENSE
2
LICENSE
|
|
@ -1,6 +1,6 @@
|
|||
Copyright 2019-2024 Garmelon, I-Al-Istannen, danstooamerican, pavelzw,
|
||||
TheChristophe, Scriptim, thelukasprobst, Toorero,
|
||||
Mr-Pine, p-fruck, PinieP
|
||||
Mr-Pine, p-fruck, PinieP, NIKL45
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ from .kit_ilias_html import (
|
|||
parse_ilias_forum_export,
|
||||
)
|
||||
from .shibboleth_login import ShibbolethLogin
|
||||
from .simplesaml_login import SimpleSAMLLogin
|
||||
|
||||
TargetType = str | int
|
||||
|
||||
|
|
@ -48,12 +49,14 @@ class IliasWebCrawlerSection(HttpCrawlerSection):
|
|||
|
||||
return base_url
|
||||
|
||||
def login(self) -> Literal["shibboleth"] | LoginTypeLocal:
|
||||
def login(self) -> Literal["shibboleth", "simple-saml"] | LoginTypeLocal:
|
||||
login_type = self.s.get("login_type")
|
||||
if not login_type:
|
||||
self.missing_value("login_type")
|
||||
if login_type == "shibboleth":
|
||||
return "shibboleth"
|
||||
if login_type == "simple-saml":
|
||||
return "simple-saml"
|
||||
if login_type == "local":
|
||||
client_id = self.s.get("client_id")
|
||||
if not client_id:
|
||||
|
|
@ -193,7 +196,14 @@ instance's greatest bottleneck.
|
|||
if isinstance(self._login_type, LoginTypeLocal):
|
||||
self._client_id = self._login_type.client_id
|
||||
else:
|
||||
self._shibboleth_login = ShibbolethLogin(self._base_url, self._auth, self._tfa_auth)
|
||||
# Allow multiple remote login backends
|
||||
if self._login_type == "shibboleth":
|
||||
self._shibboleth_login = ShibbolethLogin(self._base_url, self._auth, self._tfa_auth)
|
||||
elif self._login_type == "simple-saml":
|
||||
self._simplesaml_login = SimpleSAMLLogin(self._base_url, self._auth, self._tfa_auth)
|
||||
else:
|
||||
# Fallback to shibboleth to avoid breaking older configs
|
||||
self._shibboleth_login = ShibbolethLogin(self._base_url, self._auth, self._tfa_auth)
|
||||
|
||||
self._target = section.target()
|
||||
self._link_file_redirect_delay = section.link_redirect_delay()
|
||||
|
|
@ -1045,6 +1055,8 @@ instance's greatest bottleneck.
|
|||
# fill the session with the correct cookies
|
||||
if self._login_type == "shibboleth":
|
||||
await self._shibboleth_login.login(self.session)
|
||||
elif self._login_type == "simple-saml":
|
||||
await self._simplesaml_login.login(self.session)
|
||||
else:
|
||||
params = {
|
||||
"client_id": self._client_id,
|
||||
|
|
|
|||
121
PFERD/crawl/ilias/simplesaml_login.py
Normal file
121
PFERD/crawl/ilias/simplesaml_login.py
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
from typing import Any, Optional, cast
|
||||
|
||||
import aiohttp
|
||||
import yarl
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
|
||||
from ...auth import Authenticator, TfaAuthenticator
|
||||
from ...logging import log
|
||||
from ...utils import soupify
|
||||
from ..crawler import CrawlError
|
||||
|
||||
|
||||
class SimpleSAMLLogin:
|
||||
"""
|
||||
Login via a SimpleSAML system.
|
||||
|
||||
It performs a basic authentication by following the login redirect
|
||||
and posting credentials to the indicated form. It also supports TFA similar to Shibboleth.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, ilias_url: str, authenticator: Authenticator, tfa_authenticator: Optional[Authenticator]
|
||||
) -> None:
|
||||
self._ilias_url = ilias_url
|
||||
self._auth = authenticator
|
||||
self._tfa_auth = tfa_authenticator
|
||||
|
||||
async def login(self, sess: aiohttp.ClientSession) -> None:
|
||||
"""
|
||||
Perform a SimpleSAML login flow and populate the session cookies.
|
||||
"""
|
||||
|
||||
# Start at the local login entrypoint which may redirect to SimpleSAML
|
||||
url = f"{self._ilias_url}/saml.php"
|
||||
async with sess.get(url) as response:
|
||||
saml_url = response.url
|
||||
# If the redirect stayed on the ILIAS host, assume we're already logged in
|
||||
if str(saml_url).startswith(self._ilias_url):
|
||||
log.explain("ILIAS recognized our SAML token and logged us in in the background, returning")
|
||||
return
|
||||
soup: BeautifulSoup = soupify(await response.read())
|
||||
|
||||
# The SimpleSAML login page uses a form POST similar to Shibboleth.
|
||||
# Attempt to login using credentials.
|
||||
while not self._login_successful(soup):
|
||||
form = cast(Tag, soup.find("form", {"method": "post"}))
|
||||
action = cast(str, form["action"])
|
||||
# dynamically determine full URL from action (FAU uses full URL here, KIT uses relative URL)
|
||||
url = action if action.startswith("https") else str(saml_url.origin()) + action
|
||||
|
||||
username, password = await self._auth.credentials()
|
||||
data = {
|
||||
"username": username,
|
||||
"password": password,
|
||||
}
|
||||
if csrf_token_input := form.find("input", {"name": "csrf_token"}):
|
||||
data["csrf_token"] = csrf_token_input["value"] # type: ignore
|
||||
|
||||
soup = await _post(sess, url, data)
|
||||
|
||||
# Detect attribute release prompt
|
||||
if soup.find(id="attributeRelease"):
|
||||
raise CrawlError(
|
||||
"ILIAS SAML entitlements changed! Please log in once in your browser and review them"
|
||||
)
|
||||
|
||||
if self._tfa_required(soup):
|
||||
soup = await self._authenticate_tfa(sess, soup, saml_url)
|
||||
|
||||
if not self._login_successful(soup):
|
||||
self._auth.invalidate_credentials()
|
||||
|
||||
# Equivalent: Being redirected via JS automatically
|
||||
# (or clicking "Continue" if you have JS disabled)
|
||||
relay_state = cast(Tag, soup.find("input", {"name": "RelayState"}))
|
||||
saml_response = cast(Tag, soup.find("input", {"name": "SAMLResponse"}))
|
||||
url = cast(str, cast(Tag, soup.find("form", {"method": "post"}))["action"])
|
||||
data = { # using the info obtained in the while loop above
|
||||
"RelayState": cast(str, relay_state["value"]),
|
||||
"SAMLResponse": cast(str, saml_response["value"]),
|
||||
}
|
||||
await sess.post(cast(str, url), data=data)
|
||||
|
||||
async def _authenticate_tfa(
|
||||
self, session: aiohttp.ClientSession, soup: BeautifulSoup, saml_url: yarl.URL
|
||||
) -> BeautifulSoup:
|
||||
if not self._tfa_auth:
|
||||
self._tfa_auth = TfaAuthenticator("ilias-anon-tfa")
|
||||
|
||||
tfa_token = await self._tfa_auth.password()
|
||||
|
||||
# Searching the form here so that this fails before asking for
|
||||
# credentials rather than after asking.
|
||||
form = cast(Tag, soup.find("form", {"method": "post"}))
|
||||
action = cast(str, form["action"])
|
||||
# dynamically determine full URL from action (FAU uses full URL here, KIT uses relative URL)
|
||||
url = action if action.startswith("https") else str(saml_url.origin()) + action
|
||||
|
||||
data = { # for www.sso.uni-erlangen.de/simplesaml/module.php/mfa/otp?...
|
||||
"otp": tfa_token
|
||||
}
|
||||
if csrf_token_input := form.find("input", {"name": "csrf_token"}):
|
||||
data["csrf_token"] = csrf_token_input["value"] # type: ignore
|
||||
return await _post(session, url, data)
|
||||
|
||||
@staticmethod
|
||||
def _login_successful(soup: BeautifulSoup) -> bool:
|
||||
relay_state = soup.find("input", {"name": "RelayState"})
|
||||
saml_response = soup.find("input", {"name": "SAMLResponse"})
|
||||
return relay_state is not None and saml_response is not None
|
||||
|
||||
@staticmethod
|
||||
def _tfa_required(soup: BeautifulSoup) -> bool:
|
||||
# Also treat a body with id="mfa:otp" as TFA required (for FAU)
|
||||
body = soup.find("body")
|
||||
return body is not None and body.get("id") == "mfa:otp"
|
||||
|
||||
|
||||
async def _post(session: aiohttp.ClientSession, url: str, data: Any) -> BeautifulSoup:
|
||||
async with session.post(url, data=data) as response:
|
||||
return soupify(await response.read())
|
||||
Loading…
Add table
Add a link
Reference in a new issue