Add redirects command
This commit is contained in:
parent
e90864a097
commit
76efd6d728
3 changed files with 110 additions and 0 deletions
|
|
@ -1,4 +1,5 @@
|
|||
pub mod export;
|
||||
pub mod ingest;
|
||||
pub mod path;
|
||||
pub mod redirects;
|
||||
pub mod show;
|
||||
|
|
|
|||
107
brood/src/commands/redirects.rs
Normal file
107
brood/src/commands/redirects.rs
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
use std::{cmp::Reverse, collections::HashSet, io};
|
||||
|
||||
use thousands::Separable;
|
||||
|
||||
use crate::{data::Data, graph::NodeIdx, util};
|
||||
|
||||
fn find_redirects(data: &Data) -> Vec<(NodeIdx, NodeIdx, usize)> {
|
||||
let mut redirects = Vec::<(NodeIdx, NodeIdx, usize)>::new();
|
||||
|
||||
for node in data.graph.nodes() {
|
||||
if !data.pages[node.usize()].redirect {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut seen = HashSet::new();
|
||||
|
||||
let mut curr = node;
|
||||
seen.insert(node);
|
||||
|
||||
while let Some(next) = data.redirect_target(curr) {
|
||||
if seen.contains(&next) {
|
||||
println!(" Redirect loop: {}", data.pages[node.usize()].title);
|
||||
break;
|
||||
}
|
||||
|
||||
curr = next;
|
||||
seen.insert(next);
|
||||
}
|
||||
|
||||
redirects.push((node, curr, seen.len() - 1));
|
||||
}
|
||||
|
||||
redirects
|
||||
}
|
||||
|
||||
fn follow_redirect(data: &Data, start: NodeIdx) -> Vec<NodeIdx> {
|
||||
let mut seen = HashSet::new();
|
||||
let mut nodes = Vec::new();
|
||||
|
||||
let mut curr = start;
|
||||
seen.insert(curr);
|
||||
nodes.push(curr);
|
||||
|
||||
while let Some(next) = data.redirect_target(curr) {
|
||||
if seen.contains(&next) {
|
||||
break;
|
||||
}
|
||||
|
||||
curr = next;
|
||||
seen.insert(curr);
|
||||
nodes.push(curr);
|
||||
}
|
||||
|
||||
nodes
|
||||
}
|
||||
|
||||
/// Show interesting redirect stats.
|
||||
#[derive(Debug, clap::Parser)]
|
||||
pub struct Cmd {
|
||||
#[arg(long, short)]
|
||||
long: bool,
|
||||
}
|
||||
|
||||
impl Cmd {
|
||||
pub fn run(self, data: Data) -> io::Result<()> {
|
||||
println!(">> Resolving redirects");
|
||||
let redirects = find_redirects(&data);
|
||||
|
||||
println!();
|
||||
println!(
|
||||
"There is a total of {} redirects.",
|
||||
redirects.len().separate_with_underscores()
|
||||
);
|
||||
|
||||
let mut long = redirects
|
||||
.iter()
|
||||
.filter(|(_, _, l)| *l > 1)
|
||||
.collect::<Vec<_>>();
|
||||
long.sort_by_key(|(_, _, l)| Reverse(l));
|
||||
|
||||
println!(
|
||||
"{} redirects take more than one step to reach an article.",
|
||||
long.len().separate_with_underscores()
|
||||
);
|
||||
|
||||
println!(
|
||||
"The longest redirect chain takes {} steps.",
|
||||
long.iter().map(|(_, _, l)| l).max().copied().unwrap_or(0),
|
||||
);
|
||||
|
||||
println!("Though these redirect chains are usually swiftly fixed by bots.");
|
||||
|
||||
if self.long {
|
||||
println!();
|
||||
println!("Redirect chains with length > 1:");
|
||||
|
||||
for (start, _, _) in long {
|
||||
println!();
|
||||
for step in follow_redirect(&data, *start) {
|
||||
println!("{}", util::fmt_page(&data.pages[step.usize()]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
@ -15,6 +15,7 @@ enum Command {
|
|||
Export(commands::export::Cmd),
|
||||
Show(commands::show::Cmd),
|
||||
Path(commands::path::Cmd),
|
||||
Redirects(commands::redirects::Cmd),
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
|
|
@ -73,5 +74,6 @@ fn main() -> io::Result<()> {
|
|||
Command::Export(cmd) => cmd.run(data),
|
||||
Command::Show(cmd) => cmd.run(data),
|
||||
Command::Path(cmd) => cmd.run(data),
|
||||
Command::Redirects(cmd) => cmd.run(data),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue