diff --git a/brood/Cargo.lock b/brood/Cargo.lock index 813574a..ff57248 100644 --- a/brood/Cargo.lock +++ b/brood/Cargo.lock @@ -56,6 +56,7 @@ name = "brood" version = "0.0.0" dependencies = [ "clap", + "petgraph", "rustc-hash", "serde", "serde_json", @@ -107,12 +108,40 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + +[[package]] +name = "hashbrown" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" + [[package]] name = "heck" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "indexmap" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f" +dependencies = [ + "equivalent", + "hashbrown", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -131,6 +160,16 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "petgraph" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +dependencies = [ + "fixedbitset", + "indexmap", +] + [[package]] name = "proc-macro2" version = "1.0.92" diff --git a/brood/Cargo.toml b/brood/Cargo.toml index f53334d..97393e9 100644 --- a/brood/Cargo.toml +++ b/brood/Cargo.toml @@ -5,6 +5,7 @@ edition = "2021" [dependencies] clap = { version = "4.5.23", features = ["derive", "deprecated"] } +petgraph = "0.6.5" rustc-hash = "2.1.0" serde = { version = "1.0.217", features = ["derive"] } serde_json = "1.0.134" diff --git a/brood/src/commands.rs b/brood/src/commands.rs index 6da3050..a5b0156 100644 --- a/brood/src/commands.rs +++ b/brood/src/commands.rs @@ -3,5 +3,6 @@ pub mod list_links; pub mod list_pages; pub mod longest_shortest_path; pub mod path; +pub mod path_petgraph; pub mod philosophy_game; pub mod reexport; diff --git a/brood/src/commands/path_petgraph.rs b/brood/src/commands/path_petgraph.rs new file mode 100644 index 0000000..02262e8 --- /dev/null +++ b/brood/src/commands/path_petgraph.rs @@ -0,0 +1,77 @@ +use std::{ + fs::File, + io::{self, BufReader}, + path::Path, +}; + +use petgraph::{ + algo, + graph::NodeIndex, + visit::{EdgeRef, IntoNodeReferences}, + Graph, +}; + +use crate::{ + data::{ + info::{LinkInfo, PageInfo}, + store, + }, + util::{self, normalize_link}, +}; + +pub fn find_index_of_title(graph: &Graph, title: &str) -> NodeIndex { + let title = util::normalize_link(title); + graph + .node_references() + .find(|(_, nw)| normalize_link(&nw.title) == title) + .map(|(ni, _)| ni) + .expect("invalid title") +} + +pub fn resolve_redirects(graph: &Graph, mut page: NodeIndex) -> NodeIndex { + loop { + if graph.node_weight(page).unwrap().redirect { + if let Some(link) = graph.edges(page).next() { + page = link.target(); + continue; + } + } + return page; + } +} + +pub fn path(datafile: &Path, from: &str, to: &str) -> io::Result<()> { + println!(">> Import"); + let mut databuf = BufReader::new(File::open(datafile)?); + let graph = store::read_petgraph(&mut databuf)?; + + println!(">> Locate from and to"); + let start = resolve_redirects(&graph, find_index_of_title(&graph, from)); + let goal = resolve_redirects(&graph, find_index_of_title(&graph, to)); + println!("From: {:?}", graph.node_weight(start).unwrap().title); + println!("To: {:?}", graph.node_weight(goal).unwrap().title); + + println!(">> Find path"); + let Some((cost, path)) = algo::astar( + &graph, + start, + |n| n == goal, + |e| !graph.node_weight(e.source()).unwrap().redirect as u32, + |_| 0, + ) else { + println!("No path found"); + return Ok(()); + }; + + println!("Path found (cost {cost}, length {}):", path.len()); + for page in path { + let page = graph.node_weight(page).unwrap(); + if page.redirect { + println!(" v {:?}", page.title); + } else { + println!(" - {:?}", page.title); + } + } + + Ok(()) +} diff --git a/brood/src/data/store.rs b/brood/src/data/store.rs index afba1a3..118e8a1 100644 --- a/brood/src/data/store.rs +++ b/brood/src/data/store.rs @@ -1,5 +1,7 @@ use std::io::{self, Read, Write}; +use petgraph::{graph::NodeIndex, Directed, Graph}; + use super::{ adjacency_list::{AdjacencyList, Link, Page}, info::{LinkInfo, PageInfo}, @@ -132,3 +134,34 @@ pub fn read_adjacency_list(from: &mut R) -> io::Result(from: &mut R) -> io::Result> { + let n_pages = read_u32(from)?; + let n_links = read_u32(from)?; + + let mut graph = Graph::<_, _, Directed, _>::with_capacity(n_pages as usize, n_links as usize); + let mut page_starts = Vec::with_capacity(n_pages as usize); + + for _ in 0..n_pages { + let page = read_page(from)?; + page_starts.push(page.start); + graph.add_node(page.data); + } + + let mut ni = 0; + for ei in 0..n_links { + while ei >= page_starts.get(ni).copied().unwrap_or(u32::MAX) { + ni += 1; + } + ni -= 1; + + let link = read_link(from)?; + graph.add_edge( + NodeIndex::new(ni), + NodeIndex::new(link.to as usize), + link.data, + ); + } + + Ok(graph) +} diff --git a/brood/src/main.rs b/brood/src/main.rs index 57d1b81..774f6b0 100644 --- a/brood/src/main.rs +++ b/brood/src/main.rs @@ -35,6 +35,14 @@ enum Command { #[arg(short, long)] flip: bool, }, + /// Find a path from one article to another. + PathPetgraph { + from: String, + to: String, + /// Flip start and end article. + #[arg(short, long)] + flip: bool, + }, /// Find the longest shortest path starting at an article. LongestShortestPath { from: String }, /// Analyze articles using "Philosophy Game" rules. @@ -74,6 +82,13 @@ fn main() -> io::Result<()> { commands::path::path(&args.datafile, &from, &to) } } + Command::PathPetgraph { from, to, flip } => { + if flip { + commands::path_petgraph::path(&args.datafile, &to, &from) + } else { + commands::path_petgraph::path(&args.datafile, &from, &to) + } + } Command::LongestShortestPath { from } => { commands::longest_shortest_path::run(&args.datafile, &from) }