diff --git a/brood/src/commands/list_links.rs b/brood/src/commands/list_links.rs deleted file mode 100644 index 37c9972..0000000 --- a/brood/src/commands/list_links.rs +++ /dev/null @@ -1,87 +0,0 @@ -use std::{ - collections::HashSet, - fs::File, - io::{self, BufReader}, - path::Path, -}; - -use crate::{ - data::{ - adjacency_list::AdjacencyList, - info::{LinkInfo, PageInfo}, - store, - }, - util, -}; - -fn links_from(data: &AdjacencyList, idx: u32) -> HashSet { - data.links(idx).map(|(_, ld)| ld.to).collect() -} - -fn links_to(data: &AdjacencyList, idx: u32) -> HashSet { - let mut links = HashSet::::new(); - for (pi, _) in data.pages() { - for (_, ld) in data.links(pi) { - if ld.to == idx { - links.insert(pi); - continue; - } - } - } - links -} - -fn print_links(data: &AdjacencyList, name: &str, links: &HashSet) { - let mut links = links - .iter() - .map(|pi| { - let page = data.page(*pi); - (&page.data.title as &str, page.data.redirect) - }) - .collect::>(); - - links.sort(); - - println!(">> {name} ({}):", links.len()); - for (title, redirect) in links { - if redirect { - println!("v {title}"); - } else { - println!("- {title}"); - } - } -} - -pub fn run(datafile: &Path, page: &str) -> io::Result<()> { - println!(">> Import"); - let mut databuf = BufReader::new(File::open(datafile)?); - let data = store::read_adjacency_list(&mut databuf)?; - - println!(">> Locate page"); - let idx = util::resolve_redirects(&data, util::find_index_of_title(&data.pages, page)); - println!("Page: {:?}", data.page(idx).data.title); - - println!(">> Find links"); - let from = links_from(&data, idx); - let to = links_to(&data, idx); - let twins = from.intersection(&to).copied().collect::>(); - let twinless_from = from.difference(&twins).copied().collect::>(); - let twinless_to = to.difference(&twins).copied().collect::>(); - - println!(); - print_links(&data, "From", &from); - - println!(); - print_links(&data, "To", &to); - - println!(); - print_links(&data, "Twins", &twins); - - println!(); - print_links(&data, "From without twins", &twinless_from); - - println!(); - print_links(&data, "To without twins", &twinless_to); - - Ok(()) -} diff --git a/brood/src/commands/path.rs b/brood/src/commands/path.rs index 882203c..ebea2a6 100644 --- a/brood/src/commands/path.rs +++ b/brood/src/commands/path.rs @@ -48,12 +48,7 @@ impl Cmd { println!(); println!("Path found (cost {cost}, length {}):", path.len()); for page in path { - let info = &data.pages[page.usize()]; - if info.redirect { - println!("v {:?}", info.title); - } else { - println!("- {:?}", info.title); - } + println!("{}", util::fmt_page(&data.pages[page.usize()])); } Ok(()) diff --git a/brood/src/commands/show.rs b/brood/src/commands/show.rs index 2e14aed..894d11d 100644 --- a/brood/src/commands/show.rs +++ b/brood/src/commands/show.rs @@ -1,4 +1,4 @@ -use std::{io, path::Path}; +use std::{collections::HashSet, io, path::Path}; use thousands::Separable; @@ -11,6 +11,10 @@ use crate::{ #[derive(Debug, clap::Parser)] pub struct Cmd { title: String, + + /// Print links in more detail. + #[arg(long, short)] + links: bool, } impl Cmd { @@ -49,25 +53,96 @@ impl Cmd { page.length.separate_with_underscores() ); + let outlinks = data.graph.edge_slice(node).to_vec(); + let inlinks = data + .graph + .edges() + .filter(|(_, target)| *target == node) + .map(|(source, _)| source) + .collect::>(); + + let outlinks_set = outlinks.iter().copied().collect::>(); + let inlinks_set = inlinks.iter().copied().collect::>(); + let twins_set = outlinks_set + .intersection(&inlinks_set) + .copied() + .collect::>(); + println!( "{:>W_LABEL$}: {:>W_NUM$}", "Links (out)", - data.graph - .edge_range(node) - .len() - .separate_with_underscores() + outlinks.len().separate_with_underscores() + ); + + println!( + "{:>W_LABEL$}: {:>W_NUM$}", + "unique", + outlinks_set.len().separate_with_underscores() ); println!( "{:>W_LABEL$}: {:>W_NUM$}", "Links (in)", - data.graph - .edges() - .filter(|(_, target)| *target == node) - .count() - .separate_with_underscores() + inlinks.len().separate_with_underscores() ); + println!( + "{:>W_LABEL$}: {:>W_NUM$}", + "unique", + inlinks_set.len().separate_with_underscores() + ); + + println!( + "{:>W_LABEL$}: {:>W_NUM$}", + "Twins", + twins_set.len().separate_with_underscores() + ); + + if self.links { + let mut twin_pages = twins_set + .iter() + .map(|n| &data.pages[n.usize()]) + .collect::>(); + + let mut outlink_only_pages = outlinks_set + .difference(&twins_set) + .map(|n| &data.pages[n.usize()]) + .collect::>(); + + let mut inlink_only_pages = inlinks_set + .difference(&twins_set) + .map(|n| &data.pages[n.usize()]) + .collect::>(); + + twin_pages.sort_by_key(|p| &p.title); + outlink_only_pages.sort_by_key(|p| &p.title); + inlink_only_pages.sort_by_key(|p| &p.title); + + println!(); + println!("Twins ({}):", twin_pages.len().separate_with_underscores()); + for page in twin_pages { + println!("{}", util::fmt_page(page)); + } + + println!(); + println!( + "Only outlinks ({}):", + outlink_only_pages.len().separate_with_underscores() + ); + for page in outlink_only_pages { + println!("{}", util::fmt_page(page)); + } + + println!(); + println!( + "Only inlinks ({}):", + inlink_only_pages.len().separate_with_underscores() + ); + for page in inlink_only_pages { + println!("{}", util::fmt_page(page)); + } + } + node = match data.redirect_target(node) { Some(target) => target, None => break, diff --git a/brood/src/util.rs b/brood/src/util.rs index 0f76c67..2a8f1d0 100644 --- a/brood/src/util.rs +++ b/brood/src/util.rs @@ -170,3 +170,11 @@ pub fn resolve_redirects(data: &Data, mut page: NodeIdx) -> NodeIdx { pub fn resolve_title(normalizer: &TitleNormalizer, data: &Data, title: &str) -> NodeIdx { resolve_redirects(data, locate_title(normalizer, data, title)) } + +pub fn fmt_page(page: &Page) -> String { + if page.redirect { + format!("v {}", page.title) + } else { + format!("- {}", page.title) + } +}