Detect redirect cycles

This commit is contained in:
Joscha 2024-12-31 16:40:55 +01:00
parent 1f20e0519a
commit 04482f9f2f

View file

@ -1,4 +1,4 @@
use std::{fmt, time::Instant}; use std::{collections::HashSet, fmt, time::Instant};
use regex::Regex; use regex::Regex;
use thousands::Separable; use thousands::Separable;
@ -160,11 +160,25 @@ pub fn locate_title(normalizer: &TitleNormalizer, data: &Data, title: &str) -> N
.expect("invalid title") .expect("invalid title")
} }
pub fn resolve_redirects(data: &Data, mut page: NodeIdx) -> NodeIdx { pub fn resolve_redirects(data: &Data, node: NodeIdx) -> NodeIdx {
while let Some(target) = data.redirect_target(page) { let mut curr = node;
page = target; let mut seen = HashSet::new();
seen.insert(curr);
while let Some(target) = data.redirect_target(curr) {
if seen.contains(&target) {
println!(
" Redirect cycle deteted: {:?}",
data.pages[node.usize()].title
);
break;
} }
page
seen.insert(target);
curr = target;
}
curr
} }
pub fn resolve_title(normalizer: &TitleNormalizer, data: &Data, title: &str) -> NodeIdx { pub fn resolve_title(normalizer: &TitleNormalizer, data: &Data, title: &str) -> NodeIdx {