Add show command

This commit is contained in:
Joscha 2024-12-31 14:48:59 +01:00
parent aa4187fcd8
commit ceb987bbbc
5 changed files with 93 additions and 9 deletions

View file

@ -1,2 +1,3 @@
pub mod ingest; pub mod ingest;
pub mod path; pub mod path;
pub mod show;

View file

@ -0,0 +1,79 @@
use std::{io, path::Path};
use thousands::Separable;
use crate::{
data::Data,
util::{self, TitleNormalizer},
};
/// Show info about a specific article.
#[derive(Debug, clap::Parser)]
pub struct Cmd {
title: String,
}
impl Cmd {
pub fn run(self, data: &Path) -> io::Result<()> {
let normalizer = TitleNormalizer::new();
println!(">> Import");
let data = Data::read_from_file(data)?;
println!(">> Locate article");
let mut node = util::locate_title(&normalizer, &data, &self.title);
loop {
let page = &data.pages[node.usize()];
const W_LABEL: usize = 12;
const W_NUM: usize = 11;
println!();
println!("{:>W_LABEL$}: {}", "Title", page.title);
println!(
"{:>W_LABEL$}: {}",
"Title (norm)",
normalizer.normalize(&page.title)
);
println!("{:>W_LABEL$}: {}", "Redirect", page.redirect);
println!("{:>W_LABEL$}: {:>W_NUM$}", "ID", page.id);
println!(
"{:>W_LABEL$}: {:>W_NUM$}",
"Length",
page.length.separate_with_underscores()
);
println!(
"{:>W_LABEL$}: {:>W_NUM$}",
"Links (out)",
data.graph
.edge_range(node)
.len()
.separate_with_underscores()
);
println!(
"{:>W_LABEL$}: {:>W_NUM$}",
"Links (in)",
data.graph
.edges()
.filter(|(_, target)| *target == node)
.count()
.separate_with_underscores()
);
node = match data.redirect_target(node) {
Some(target) => target,
None => break,
};
}
Ok(())
}
}

View file

@ -193,4 +193,12 @@ impl Data {
let mut file = BufReader::new(File::open(path)?); let mut file = BufReader::new(File::open(path)?);
Self::read(&mut file) Self::read(&mut file)
} }
pub fn redirect_target(&self, node: NodeIdx) -> Option<NodeIdx> {
if !self.pages[node.usize()].redirect {
return None;
}
self.graph.edge_slice(node).first().copied()
}
} }

View file

@ -11,6 +11,7 @@ use clap::Parser;
#[derive(Debug, Parser)] #[derive(Debug, Parser)]
enum Command { enum Command {
Ingest(commands::ingest::Cmd), Ingest(commands::ingest::Cmd),
Show(commands::show::Cmd),
Path(commands::path::Cmd), Path(commands::path::Cmd),
} }
@ -25,6 +26,7 @@ fn main() -> io::Result<()> {
let args = Args::parse(); let args = Args::parse();
match args.command { match args.command {
Command::Ingest(cmd) => cmd.run(&args.datafile), Command::Ingest(cmd) => cmd.run(&args.datafile),
Command::Show(cmd) => cmd.run(&args.datafile),
Command::Path(cmd) => cmd.run(&args.datafile), Command::Path(cmd) => cmd.run(&args.datafile),
} }
} }

View file

@ -161,16 +161,10 @@ pub fn locate_title(normalizer: &TitleNormalizer, data: &Data, title: &str) -> N
} }
pub fn resolve_redirects(data: &Data, mut page: NodeIdx) -> NodeIdx { pub fn resolve_redirects(data: &Data, mut page: NodeIdx) -> NodeIdx {
loop { while let Some(target) = data.redirect_target(page) {
if data.pages[page.usize()].redirect { page = target;
if let Some(target) = data.graph.edge_slice(page).first() {
page = *target;
continue;
}
}
return page;
} }
page
} }
pub fn resolve_title(normalizer: &TitleNormalizer, data: &Data, title: &str) -> NodeIdx { pub fn resolve_title(normalizer: &TitleNormalizer, data: &Data, title: &str) -> NodeIdx {