From ceb987bbbc8a6c47d6126843fc4c218e70d80540 Mon Sep 17 00:00:00 2001 From: Joscha Date: Tue, 31 Dec 2024 14:48:59 +0100 Subject: [PATCH] Add show command --- brood/src/commands.rs | 1 + brood/src/commands/show.rs | 79 ++++++++++++++++++++++++++++++++++++++ brood/src/data.rs | 8 ++++ brood/src/main.rs | 2 + brood/src/util.rs | 12 ++---- 5 files changed, 93 insertions(+), 9 deletions(-) create mode 100644 brood/src/commands/show.rs diff --git a/brood/src/commands.rs b/brood/src/commands.rs index 2e77470..d72d397 100644 --- a/brood/src/commands.rs +++ b/brood/src/commands.rs @@ -1,2 +1,3 @@ pub mod ingest; pub mod path; +pub mod show; diff --git a/brood/src/commands/show.rs b/brood/src/commands/show.rs new file mode 100644 index 0000000..2e14aed --- /dev/null +++ b/brood/src/commands/show.rs @@ -0,0 +1,79 @@ +use std::{io, path::Path}; + +use thousands::Separable; + +use crate::{ + data::Data, + util::{self, TitleNormalizer}, +}; + +/// Show info about a specific article. +#[derive(Debug, clap::Parser)] +pub struct Cmd { + title: String, +} + +impl Cmd { + pub fn run(self, data: &Path) -> io::Result<()> { + let normalizer = TitleNormalizer::new(); + + println!(">> Import"); + let data = Data::read_from_file(data)?; + + println!(">> Locate article"); + let mut node = util::locate_title(&normalizer, &data, &self.title); + + loop { + let page = &data.pages[node.usize()]; + + const W_LABEL: usize = 12; + const W_NUM: usize = 11; + + println!(); + + println!("{:>W_LABEL$}: {}", "Title", page.title); + + println!( + "{:>W_LABEL$}: {}", + "Title (norm)", + normalizer.normalize(&page.title) + ); + + println!("{:>W_LABEL$}: {}", "Redirect", page.redirect); + + println!("{:>W_LABEL$}: {:>W_NUM$}", "ID", page.id); + + println!( + "{:>W_LABEL$}: {:>W_NUM$}", + "Length", + page.length.separate_with_underscores() + ); + + println!( + "{:>W_LABEL$}: {:>W_NUM$}", + "Links (out)", + data.graph + .edge_range(node) + .len() + .separate_with_underscores() + ); + + println!( + "{:>W_LABEL$}: {:>W_NUM$}", + "Links (in)", + data.graph + .edges() + .filter(|(_, target)| *target == node) + .count() + .separate_with_underscores() + ); + + node = match data.redirect_target(node) { + Some(target) => target, + None => break, + }; + } + + Ok(()) + } +} diff --git a/brood/src/data.rs b/brood/src/data.rs index 20c95a6..2c3213c 100644 --- a/brood/src/data.rs +++ b/brood/src/data.rs @@ -193,4 +193,12 @@ impl Data { let mut file = BufReader::new(File::open(path)?); Self::read(&mut file) } + + pub fn redirect_target(&self, node: NodeIdx) -> Option { + if !self.pages[node.usize()].redirect { + return None; + } + + self.graph.edge_slice(node).first().copied() + } } diff --git a/brood/src/main.rs b/brood/src/main.rs index c31b1f4..db547ce 100644 --- a/brood/src/main.rs +++ b/brood/src/main.rs @@ -11,6 +11,7 @@ use clap::Parser; #[derive(Debug, Parser)] enum Command { Ingest(commands::ingest::Cmd), + Show(commands::show::Cmd), Path(commands::path::Cmd), } @@ -25,6 +26,7 @@ fn main() -> io::Result<()> { let args = Args::parse(); match args.command { Command::Ingest(cmd) => cmd.run(&args.datafile), + Command::Show(cmd) => cmd.run(&args.datafile), Command::Path(cmd) => cmd.run(&args.datafile), } } diff --git a/brood/src/util.rs b/brood/src/util.rs index d908a42..0f76c67 100644 --- a/brood/src/util.rs +++ b/brood/src/util.rs @@ -161,16 +161,10 @@ pub fn locate_title(normalizer: &TitleNormalizer, data: &Data, title: &str) -> N } pub fn resolve_redirects(data: &Data, mut page: NodeIdx) -> NodeIdx { - loop { - if data.pages[page.usize()].redirect { - if let Some(target) = data.graph.edge_slice(page).first() { - page = *target; - continue; - } - } - - return page; + while let Some(target) = data.redirect_target(page) { + page = target; } + page } pub fn resolve_title(normalizer: &TitleNormalizer, data: &Data, title: &str) -> NodeIdx {