From 49665f74ce762ea475824f0423f9716e109160e4 Mon Sep 17 00:00:00 2001 From: Joscha Date: Mon, 30 Dec 2024 13:12:14 +0100 Subject: [PATCH] List links to and from an article --- brood/src/commands.rs | 1 + brood/src/commands/list_links.rs | 87 ++++++++++++++++++++++++++++++++ brood/src/main.rs | 6 +++ 3 files changed, 94 insertions(+) create mode 100644 brood/src/commands/list_links.rs diff --git a/brood/src/commands.rs b/brood/src/commands.rs index ffff9d3..6da3050 100644 --- a/brood/src/commands.rs +++ b/brood/src/commands.rs @@ -1,4 +1,5 @@ pub mod ingest; +pub mod list_links; pub mod list_pages; pub mod longest_shortest_path; pub mod path; diff --git a/brood/src/commands/list_links.rs b/brood/src/commands/list_links.rs new file mode 100644 index 0000000..37c9972 --- /dev/null +++ b/brood/src/commands/list_links.rs @@ -0,0 +1,87 @@ +use std::{ + collections::HashSet, + fs::File, + io::{self, BufReader}, + path::Path, +}; + +use crate::{ + data::{ + adjacency_list::AdjacencyList, + info::{LinkInfo, PageInfo}, + store, + }, + util, +}; + +fn links_from(data: &AdjacencyList, idx: u32) -> HashSet { + data.links(idx).map(|(_, ld)| ld.to).collect() +} + +fn links_to(data: &AdjacencyList, idx: u32) -> HashSet { + let mut links = HashSet::::new(); + for (pi, _) in data.pages() { + for (_, ld) in data.links(pi) { + if ld.to == idx { + links.insert(pi); + continue; + } + } + } + links +} + +fn print_links(data: &AdjacencyList, name: &str, links: &HashSet) { + let mut links = links + .iter() + .map(|pi| { + let page = data.page(*pi); + (&page.data.title as &str, page.data.redirect) + }) + .collect::>(); + + links.sort(); + + println!(">> {name} ({}):", links.len()); + for (title, redirect) in links { + if redirect { + println!("v {title}"); + } else { + println!("- {title}"); + } + } +} + +pub fn run(datafile: &Path, page: &str) -> io::Result<()> { + println!(">> Import"); + let mut databuf = BufReader::new(File::open(datafile)?); + let data = store::read_adjacency_list(&mut databuf)?; + + println!(">> Locate page"); + let idx = util::resolve_redirects(&data, util::find_index_of_title(&data.pages, page)); + println!("Page: {:?}", data.page(idx).data.title); + + println!(">> Find links"); + let from = links_from(&data, idx); + let to = links_to(&data, idx); + let twins = from.intersection(&to).copied().collect::>(); + let twinless_from = from.difference(&twins).copied().collect::>(); + let twinless_to = to.difference(&twins).copied().collect::>(); + + println!(); + print_links(&data, "From", &from); + + println!(); + print_links(&data, "To", &to); + + println!(); + print_links(&data, "Twins", &twins); + + println!(); + print_links(&data, "From without twins", &twinless_from); + + println!(); + print_links(&data, "To without twins", &twinless_to); + + Ok(()) +} diff --git a/brood/src/main.rs b/brood/src/main.rs index e4b4074..57d1b81 100644 --- a/brood/src/main.rs +++ b/brood/src/main.rs @@ -44,6 +44,11 @@ enum Command { }, /// Print all page titles. ListPages, + /// Print all links. + ListLinks { + /// The page to inspect. + page: String, + }, } #[derive(Debug, Parser)] @@ -76,5 +81,6 @@ fn main() -> io::Result<()> { commands::philosophy_game::run(&args.datafile, subcmd) } Command::ListPages => commands::list_pages::run(&args.datafile), + Command::ListLinks { page } => commands::list_links::run(&args.datafile, &page), } }