From f819f5bf698830ac10bfd720f19bc932587a614e Mon Sep 17 00:00:00 2001 From: Joscha Date: Mon, 30 Dec 2024 19:34:33 +0100 Subject: [PATCH] Remove adjlist-based path implementation --- brood/src/commands.rs | 1 - brood/src/commands/path.rs | 192 +++++++++++------------------------- brood/src/commands/path2.rs | 77 --------------- brood/src/main.rs | 15 --- 4 files changed, 55 insertions(+), 230 deletions(-) delete mode 100644 brood/src/commands/path2.rs diff --git a/brood/src/commands.rs b/brood/src/commands.rs index d4b8155..6da3050 100644 --- a/brood/src/commands.rs +++ b/brood/src/commands.rs @@ -3,6 +3,5 @@ pub mod list_links; pub mod list_pages; pub mod longest_shortest_path; pub mod path; -pub mod path2; pub mod philosophy_game; pub mod reexport; diff --git a/brood/src/commands/path.rs b/brood/src/commands/path.rs index 82079d2..55c72ed 100644 --- a/brood/src/commands/path.rs +++ b/brood/src/commands/path.rs @@ -1,158 +1,76 @@ -use std::collections::BinaryHeap; -use std::fs::File; -use std::io::{self, BufReader}; -use std::path::Path; +use std::{ + fs::File, + io::{self, BufReader}, + path::Path, +}; -use crate::data::adjacency_list::AdjacencyList; -use crate::data::info::{LinkInfo, PageInfo}; -use crate::data::store; -use crate::util; +use crate::{ + algo::Dijkstra, + data::{info::PageInfo, store}, + graph::{Graph, NodeIdx}, + util, +}; -struct DijkstraPageInfo { - cost: u32, - prev: u32, - redirect: bool, +pub fn find_index_of_title(pages: &[PageInfo], title: &str) -> NodeIdx { + let title = util::normalize_link(title); + pages + .iter() + .enumerate() + .find(|(_, p)| util::normalize_link(&p.title) == title) + .map(|(i, _)| NodeIdx::new(i)) + .expect("invalid title") } -impl DijkstraPageInfo { - fn from_page_info(info: PageInfo) -> Self { - Self { - cost: u32::MAX, - prev: u32::MAX, - redirect: info.redirect, - } - } -} - -struct DijkstraLinkInfo { - cost: u32, -} - -impl DijkstraLinkInfo { - fn from_link_info(info: LinkInfo) -> Self { - Self { - cost: 1, - // cost: 1000 + info.start, - // cost: 10000 + info.start, - // cost: 1000 + info.start / 10, - } - } -} - -#[derive(Clone, Copy, PartialEq, Eq)] -struct Entry { - cost: u32, - page_idx: u32, -} - -impl Entry { - pub fn new(cost: u32, page_idx: u32) -> Self { - Self { cost, page_idx } - } -} - -// Manual implementation so the queue is a min-heap instead of a max-heap. -impl Ord for Entry { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - other - .cost - .cmp(&self.cost) - .then_with(|| self.page_idx.cmp(&other.page_idx)) - } -} - -impl PartialOrd for Entry { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -/// Closely matches the dijkstra example in [std::collections::binary_heap]. -fn dijkstra(data: AdjacencyList, from: u32, to: u32) -> Option> { - println!("> Prepare state"); - let mut data = data - .change_page_data(DijkstraPageInfo::from_page_info) - .change_link_data(DijkstraLinkInfo::from_link_info); - let mut queue = BinaryHeap::new(); - data.page_mut(from).data.cost = 0; - queue.push(Entry::new(0, from)); - - println!("> Run dijkstra"); - while let Some(Entry { cost, page_idx }) = queue.pop() { - if page_idx == to { - // We've found the shortest path to our target - break; - } - - let page = data.page(page_idx); - if cost > page.data.cost { - // This queue entry is outdated - continue; - } - - let redirect = page.data.redirect; - for link_idx in data.link_range(page_idx) { - let link = data.link(link_idx); - - let next = Entry { - cost: cost + if redirect { 0 } else { link.data.cost }, - page_idx: link.to, - }; - - let target_page = data.page_mut(link.to); - if next.cost < target_page.data.cost { - target_page.data.cost = next.cost; - target_page.data.prev = page_idx; - queue.push(next); +pub fn resolve_redirects(pages: &[PageInfo], graph: &Graph, mut page: NodeIdx) -> NodeIdx { + loop { + if pages[page.usize()].redirect { + if let Some(next) = graph.edges_for(page).first() { + page = *next; + continue; } } - } - println!("> Collect results"); - let mut steps = vec![]; - let mut at = to; - loop { - steps.push(at); - at = data.page(at).data.prev; - if at == u32::MAX { - break; - }; - } - steps.reverse(); - if steps.first() == Some(&from) { - Some(steps) - } else { - None + return page; } } -pub fn path(datafile: &Path, from: &str, to: &str) -> io::Result<()> { +pub fn path(datafile: &Path, start: &str, goal: &str) -> io::Result<()> { println!(">> Import"); let mut databuf = BufReader::new(File::open(datafile)?); - let data = store::read_adjacency_list(&mut databuf)?; - let pages = data.pages.clone(); + let (pages, _links, graph) = store::read_graph(&mut databuf)?; println!(">> Locate from and to"); - let from_idx = util::resolve_redirects(&data, util::find_index_of_title(&pages, from)); - let to_idx = util::resolve_redirects(&data, util::find_index_of_title(&pages, to)); - println!("From: {:?}", data.page(from_idx).data.title); - println!("To: {:?}", data.page(to_idx).data.title); + let start = resolve_redirects(&pages, &graph, find_index_of_title(&pages, start)); + let goal = resolve_redirects(&pages, &graph, find_index_of_title(&pages, goal)); + println!("Start: {:?}", pages[start.usize()].title); + println!("Goal: {:?}", pages[goal.usize()].title); println!(">> Find path"); - let path = dijkstra(data, from_idx, to_idx); + println!("> Preparing dijkstra"); + let mut dijkstra = Dijkstra::new(&graph); + println!("> Running dijkstra"); + dijkstra.run( + start, + |node| node == goal, + |source, _edge, _target| !pages[source.usize()].redirect as u32, + ); - if let Some(path) = path { - println!("Path found:"); - for page_idx in path { - let page = &pages[page_idx as usize]; - if page.data.redirect { - println!(" v {:?}", page.data.title); - } else { - println!(" - {:?}", page.data.title); - } - } - } else { + if dijkstra.cost(goal) == u32::MAX { println!("No path found"); + return Ok(()); + } + + println!("> Collecting path"); + let path = dijkstra.path(goal); + let cost = dijkstra.cost(goal); + println!("Path found (cost {cost}, length {}):", path.len()); + for page in path { + let info = &pages[page.usize()]; + if info.redirect { + println!(" v {:?}", info.title); + } else { + println!(" - {:?}", info.title); + } } Ok(()) diff --git a/brood/src/commands/path2.rs b/brood/src/commands/path2.rs deleted file mode 100644 index 55c72ed..0000000 --- a/brood/src/commands/path2.rs +++ /dev/null @@ -1,77 +0,0 @@ -use std::{ - fs::File, - io::{self, BufReader}, - path::Path, -}; - -use crate::{ - algo::Dijkstra, - data::{info::PageInfo, store}, - graph::{Graph, NodeIdx}, - util, -}; - -pub fn find_index_of_title(pages: &[PageInfo], title: &str) -> NodeIdx { - let title = util::normalize_link(title); - pages - .iter() - .enumerate() - .find(|(_, p)| util::normalize_link(&p.title) == title) - .map(|(i, _)| NodeIdx::new(i)) - .expect("invalid title") -} - -pub fn resolve_redirects(pages: &[PageInfo], graph: &Graph, mut page: NodeIdx) -> NodeIdx { - loop { - if pages[page.usize()].redirect { - if let Some(next) = graph.edges_for(page).first() { - page = *next; - continue; - } - } - - return page; - } -} - -pub fn path(datafile: &Path, start: &str, goal: &str) -> io::Result<()> { - println!(">> Import"); - let mut databuf = BufReader::new(File::open(datafile)?); - let (pages, _links, graph) = store::read_graph(&mut databuf)?; - - println!(">> Locate from and to"); - let start = resolve_redirects(&pages, &graph, find_index_of_title(&pages, start)); - let goal = resolve_redirects(&pages, &graph, find_index_of_title(&pages, goal)); - println!("Start: {:?}", pages[start.usize()].title); - println!("Goal: {:?}", pages[goal.usize()].title); - - println!(">> Find path"); - println!("> Preparing dijkstra"); - let mut dijkstra = Dijkstra::new(&graph); - println!("> Running dijkstra"); - dijkstra.run( - start, - |node| node == goal, - |source, _edge, _target| !pages[source.usize()].redirect as u32, - ); - - if dijkstra.cost(goal) == u32::MAX { - println!("No path found"); - return Ok(()); - } - - println!("> Collecting path"); - let path = dijkstra.path(goal); - let cost = dijkstra.cost(goal); - println!("Path found (cost {cost}, length {}):", path.len()); - for page in path { - let info = &pages[page.usize()]; - if info.redirect { - println!(" v {:?}", info.title); - } else { - println!(" - {:?}", info.title); - } - } - - Ok(()) -} diff --git a/brood/src/main.rs b/brood/src/main.rs index 501540b..3b93e2e 100644 --- a/brood/src/main.rs +++ b/brood/src/main.rs @@ -40,14 +40,6 @@ enum Command { #[arg(short, long)] flip: bool, }, - /// Find a path from one article to another. - Path2 { - from: String, - to: String, - /// Flip start and end article. - #[arg(short, long)] - flip: bool, - }, /// Find the longest shortest path starting at an article. LongestShortestPath { from: String, @@ -90,13 +82,6 @@ fn main() -> io::Result<()> { commands::path::path(&args.datafile, &from, &to) } } - Command::Path2 { from, to, flip } => { - if flip { - commands::path2::path(&args.datafile, &to, &from) - } else { - commands::path2::path(&args.datafile, &from, &to) - } - } Command::LongestShortestPath { from } => { commands::longest_shortest_path::run(&args.datafile, &from) }