From 4e41084f2a44477a5845feb5a90db7c17c63a027 Mon Sep 17 00:00:00 2001 From: Joscha Date: Tue, 31 Dec 2024 02:48:28 +0100 Subject: [PATCH] Port path command --- brood/src/commands.rs | 1 + brood/src/commands/path.rs | 114 ++++++++++++++++--------------------- brood/src/main.rs | 2 + brood/src/util.rs | 38 ++++++++----- 4 files changed, 77 insertions(+), 78 deletions(-) diff --git a/brood/src/commands.rs b/brood/src/commands.rs index b3ac910..2e77470 100644 --- a/brood/src/commands.rs +++ b/brood/src/commands.rs @@ -1 +1,2 @@ pub mod ingest; +pub mod path; diff --git a/brood/src/commands/path.rs b/brood/src/commands/path.rs index 55c72ed..7a5dcb9 100644 --- a/brood/src/commands/path.rs +++ b/brood/src/commands/path.rs @@ -1,77 +1,61 @@ -use std::{ - fs::File, - io::{self, BufReader}, - path::Path, -}; +use std::{io, path::Path}; use crate::{ algo::Dijkstra, - data::{info::PageInfo, store}, - graph::{Graph, NodeIdx}, - util, + data, + util::{self, TitleNormalizer}, }; -pub fn find_index_of_title(pages: &[PageInfo], title: &str) -> NodeIdx { - let title = util::normalize_link(title); - pages - .iter() - .enumerate() - .find(|(_, p)| util::normalize_link(&p.title) == title) - .map(|(i, _)| NodeIdx::new(i)) - .expect("invalid title") +/// Find the shortest path between two articles. +#[derive(Debug, clap::Parser)] +pub struct Cmd { + start: String, + goal: String, } -pub fn resolve_redirects(pages: &[PageInfo], graph: &Graph, mut page: NodeIdx) -> NodeIdx { - loop { - if pages[page.usize()].redirect { - if let Some(next) = graph.edges_for(page).first() { - page = *next; - continue; +impl Cmd { + pub fn run(self, data: &Path) -> io::Result<()> { + let normalizer = TitleNormalizer::new(); + + println!(">> Import"); + let (pages, _links, graph) = data::read_from_file(data)?; + + println!(">> Resolve articles"); + let start = util::resolve_title(&normalizer, &pages, &graph, &self.start); + let goal = util::resolve_title(&normalizer, &pages, &graph, &self.goal); + println!("Start: {}", pages[start.usize()].title); + println!("Goal: {}", pages[goal.usize()].title); + + println!(">> Find path"); + println!("> Preparing dijkstra"); + let mut dijkstra = Dijkstra::new(&graph); + println!("> Running dijkstra"); + dijkstra.run( + start, + |node| node == goal, + |source, _edge, _target| !pages[source.usize()].redirect as u32, + ); + + if dijkstra.cost(goal) == u32::MAX { + println!("No path found"); + return Ok(()); + } + + println!("> Collecting path"); + let path = dijkstra.path(goal); + let cost = dijkstra.cost(goal); + + println!(); + println!("Path found (cost {cost}, length {}):", path.len()); + for page in path { + let info = &pages[page.usize()]; + if info.redirect { + println!("v {:?}", info.title); + } else { + println!("- {:?}", info.title); } } - return page; + Ok(()) } } - -pub fn path(datafile: &Path, start: &str, goal: &str) -> io::Result<()> { - println!(">> Import"); - let mut databuf = BufReader::new(File::open(datafile)?); - let (pages, _links, graph) = store::read_graph(&mut databuf)?; - - println!(">> Locate from and to"); - let start = resolve_redirects(&pages, &graph, find_index_of_title(&pages, start)); - let goal = resolve_redirects(&pages, &graph, find_index_of_title(&pages, goal)); - println!("Start: {:?}", pages[start.usize()].title); - println!("Goal: {:?}", pages[goal.usize()].title); - - println!(">> Find path"); - println!("> Preparing dijkstra"); - let mut dijkstra = Dijkstra::new(&graph); - println!("> Running dijkstra"); - dijkstra.run( - start, - |node| node == goal, - |source, _edge, _target| !pages[source.usize()].redirect as u32, - ); - - if dijkstra.cost(goal) == u32::MAX { - println!("No path found"); - return Ok(()); - } - - println!("> Collecting path"); - let path = dijkstra.path(goal); - let cost = dijkstra.cost(goal); - println!("Path found (cost {cost}, length {}):", path.len()); - for page in path { - let info = &pages[page.usize()]; - if info.redirect { - println!(" v {:?}", info.title); - } else { - println!(" - {:?}", info.title); - } - } - - Ok(()) -} diff --git a/brood/src/main.rs b/brood/src/main.rs index 45bff55..c31b1f4 100644 --- a/brood/src/main.rs +++ b/brood/src/main.rs @@ -11,6 +11,7 @@ use clap::Parser; #[derive(Debug, Parser)] enum Command { Ingest(commands::ingest::Cmd), + Path(commands::path::Cmd), } #[derive(Debug, Parser)] @@ -24,5 +25,6 @@ fn main() -> io::Result<()> { let args = Args::parse(); match args.command { Command::Ingest(cmd) => cmd.run(&args.datafile), + Command::Path(cmd) => cmd.run(&args.datafile), } } diff --git a/brood/src/util.rs b/brood/src/util.rs index 1cc1ab8..f594058 100644 --- a/brood/src/util.rs +++ b/brood/src/util.rs @@ -1,8 +1,13 @@ -use std::{fmt, iter, time::Instant}; +use std::{fmt, iter, thread::panicking, time::Instant}; use regex::Regex; use thousands::Separable; +use crate::{ + data::Page, + graph::{Graph, NodeIdx}, +}; + pub struct Counter { n: usize, last_print: Instant, @@ -145,27 +150,34 @@ impl TitleNormalizer { } } -/* -pub fn find_index_of_title(pages: &[Page], title: &str) -> u32 { - let title = normalize_link(title); +pub fn locate_title(normalizer: &TitleNormalizer, pages: &[Page], title: &str) -> NodeIdx { + let normalized = normalizer.normalize(title); pages .iter() .enumerate() - .find(|(_, p)| normalize_link(&p.data.title) == title) - .map(|(i, _)| i) - .expect("invalid title") as u32 + .find(|(_, p)| normalizer.normalize(&p.title) == normalized) + .map(|(i, _)| NodeIdx::new(i)) + .expect("invalid title") } -pub fn resolve_redirects(data: &AdjacencyList, mut page_idx: u32) -> u32 { +pub fn resolve_redirects(pages: &[Page], graph: &Graph, mut page: NodeIdx) -> NodeIdx { loop { - if data.page(page_idx).data.redirect { - if let Some(link_idx) = data.link_redirect(page_idx) { - page_idx = data.link(link_idx).to; + if pages[page.usize()].redirect { + if let Some(target) = graph.edges_for(page).first() { + page = *target; continue; } } - return page_idx; + return page; } } -*/ + +pub fn resolve_title( + normalizer: &TitleNormalizer, + pages: &[Page], + graph: &Graph, + title: &str, +) -> NodeIdx { + resolve_redirects(pages, graph, locate_title(normalizer, pages, title)) +}