Detect redirect cycles
This commit is contained in:
parent
1f20e0519a
commit
04482f9f2f
1 changed files with 19 additions and 5 deletions
|
|
@ -1,4 +1,4 @@
|
||||||
use std::{fmt, time::Instant};
|
use std::{collections::HashSet, fmt, time::Instant};
|
||||||
|
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use thousands::Separable;
|
use thousands::Separable;
|
||||||
|
|
@ -160,11 +160,25 @@ pub fn locate_title(normalizer: &TitleNormalizer, data: &Data, title: &str) -> N
|
||||||
.expect("invalid title")
|
.expect("invalid title")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn resolve_redirects(data: &Data, mut page: NodeIdx) -> NodeIdx {
|
pub fn resolve_redirects(data: &Data, node: NodeIdx) -> NodeIdx {
|
||||||
while let Some(target) = data.redirect_target(page) {
|
let mut curr = node;
|
||||||
page = target;
|
let mut seen = HashSet::new();
|
||||||
|
|
||||||
|
seen.insert(curr);
|
||||||
|
while let Some(target) = data.redirect_target(curr) {
|
||||||
|
if seen.contains(&target) {
|
||||||
|
println!(
|
||||||
|
" Redirect cycle deteted: {:?}",
|
||||||
|
data.pages[node.usize()].title
|
||||||
|
);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
seen.insert(target);
|
||||||
|
curr = target;
|
||||||
}
|
}
|
||||||
page
|
|
||||||
|
curr
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn resolve_title(normalizer: &TitleNormalizer, data: &Data, title: &str) -> NodeIdx {
|
pub fn resolve_title(normalizer: &TitleNormalizer, data: &Data, title: &str) -> NodeIdx {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue