Remove adjlist-based path implementation

This commit is contained in:
Joscha 2024-12-30 19:34:33 +01:00
parent 18e54c4ce1
commit f819f5bf69
4 changed files with 55 additions and 230 deletions

View file

@ -3,6 +3,5 @@ pub mod list_links;
pub mod list_pages; pub mod list_pages;
pub mod longest_shortest_path; pub mod longest_shortest_path;
pub mod path; pub mod path;
pub mod path2;
pub mod philosophy_game; pub mod philosophy_game;
pub mod reexport; pub mod reexport;

View file

@ -1,158 +1,76 @@
use std::collections::BinaryHeap; use std::{
use std::fs::File; fs::File,
use std::io::{self, BufReader}; io::{self, BufReader},
use std::path::Path; path::Path,
};
use crate::data::adjacency_list::AdjacencyList; use crate::{
use crate::data::info::{LinkInfo, PageInfo}; algo::Dijkstra,
use crate::data::store; data::{info::PageInfo, store},
use crate::util; graph::{Graph, NodeIdx},
util,
};
struct DijkstraPageInfo { pub fn find_index_of_title(pages: &[PageInfo], title: &str) -> NodeIdx {
cost: u32, let title = util::normalize_link(title);
prev: u32, pages
redirect: bool, .iter()
.enumerate()
.find(|(_, p)| util::normalize_link(&p.title) == title)
.map(|(i, _)| NodeIdx::new(i))
.expect("invalid title")
} }
impl DijkstraPageInfo { pub fn resolve_redirects(pages: &[PageInfo], graph: &Graph, mut page: NodeIdx) -> NodeIdx {
fn from_page_info(info: PageInfo) -> Self { loop {
Self { if pages[page.usize()].redirect {
cost: u32::MAX, if let Some(next) = graph.edges_for(page).first() {
prev: u32::MAX, page = *next;
redirect: info.redirect, continue;
}
}
}
struct DijkstraLinkInfo {
cost: u32,
}
impl DijkstraLinkInfo {
fn from_link_info(info: LinkInfo) -> Self {
Self {
cost: 1,
// cost: 1000 + info.start,
// cost: 10000 + info.start,
// cost: 1000 + info.start / 10,
}
}
}
#[derive(Clone, Copy, PartialEq, Eq)]
struct Entry {
cost: u32,
page_idx: u32,
}
impl Entry {
pub fn new(cost: u32, page_idx: u32) -> Self {
Self { cost, page_idx }
}
}
// Manual implementation so the queue is a min-heap instead of a max-heap.
impl Ord for Entry {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
other
.cost
.cmp(&self.cost)
.then_with(|| self.page_idx.cmp(&other.page_idx))
}
}
impl PartialOrd for Entry {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
/// Closely matches the dijkstra example in [std::collections::binary_heap].
fn dijkstra(data: AdjacencyList<PageInfo, LinkInfo>, from: u32, to: u32) -> Option<Vec<u32>> {
println!("> Prepare state");
let mut data = data
.change_page_data(DijkstraPageInfo::from_page_info)
.change_link_data(DijkstraLinkInfo::from_link_info);
let mut queue = BinaryHeap::new();
data.page_mut(from).data.cost = 0;
queue.push(Entry::new(0, from));
println!("> Run dijkstra");
while let Some(Entry { cost, page_idx }) = queue.pop() {
if page_idx == to {
// We've found the shortest path to our target
break;
}
let page = data.page(page_idx);
if cost > page.data.cost {
// This queue entry is outdated
continue;
}
let redirect = page.data.redirect;
for link_idx in data.link_range(page_idx) {
let link = data.link(link_idx);
let next = Entry {
cost: cost + if redirect { 0 } else { link.data.cost },
page_idx: link.to,
};
let target_page = data.page_mut(link.to);
if next.cost < target_page.data.cost {
target_page.data.cost = next.cost;
target_page.data.prev = page_idx;
queue.push(next);
} }
} }
}
println!("> Collect results"); return page;
let mut steps = vec![];
let mut at = to;
loop {
steps.push(at);
at = data.page(at).data.prev;
if at == u32::MAX {
break;
};
}
steps.reverse();
if steps.first() == Some(&from) {
Some(steps)
} else {
None
} }
} }
pub fn path(datafile: &Path, from: &str, to: &str) -> io::Result<()> { pub fn path(datafile: &Path, start: &str, goal: &str) -> io::Result<()> {
println!(">> Import"); println!(">> Import");
let mut databuf = BufReader::new(File::open(datafile)?); let mut databuf = BufReader::new(File::open(datafile)?);
let data = store::read_adjacency_list(&mut databuf)?; let (pages, _links, graph) = store::read_graph(&mut databuf)?;
let pages = data.pages.clone();
println!(">> Locate from and to"); println!(">> Locate from and to");
let from_idx = util::resolve_redirects(&data, util::find_index_of_title(&pages, from)); let start = resolve_redirects(&pages, &graph, find_index_of_title(&pages, start));
let to_idx = util::resolve_redirects(&data, util::find_index_of_title(&pages, to)); let goal = resolve_redirects(&pages, &graph, find_index_of_title(&pages, goal));
println!("From: {:?}", data.page(from_idx).data.title); println!("Start: {:?}", pages[start.usize()].title);
println!("To: {:?}", data.page(to_idx).data.title); println!("Goal: {:?}", pages[goal.usize()].title);
println!(">> Find path"); println!(">> Find path");
let path = dijkstra(data, from_idx, to_idx); println!("> Preparing dijkstra");
let mut dijkstra = Dijkstra::new(&graph);
println!("> Running dijkstra");
dijkstra.run(
start,
|node| node == goal,
|source, _edge, _target| !pages[source.usize()].redirect as u32,
);
if let Some(path) = path { if dijkstra.cost(goal) == u32::MAX {
println!("Path found:");
for page_idx in path {
let page = &pages[page_idx as usize];
if page.data.redirect {
println!(" v {:?}", page.data.title);
} else {
println!(" - {:?}", page.data.title);
}
}
} else {
println!("No path found"); println!("No path found");
return Ok(());
}
println!("> Collecting path");
let path = dijkstra.path(goal);
let cost = dijkstra.cost(goal);
println!("Path found (cost {cost}, length {}):", path.len());
for page in path {
let info = &pages[page.usize()];
if info.redirect {
println!(" v {:?}", info.title);
} else {
println!(" - {:?}", info.title);
}
} }
Ok(()) Ok(())

View file

@ -1,77 +0,0 @@
use std::{
fs::File,
io::{self, BufReader},
path::Path,
};
use crate::{
algo::Dijkstra,
data::{info::PageInfo, store},
graph::{Graph, NodeIdx},
util,
};
pub fn find_index_of_title(pages: &[PageInfo], title: &str) -> NodeIdx {
let title = util::normalize_link(title);
pages
.iter()
.enumerate()
.find(|(_, p)| util::normalize_link(&p.title) == title)
.map(|(i, _)| NodeIdx::new(i))
.expect("invalid title")
}
pub fn resolve_redirects(pages: &[PageInfo], graph: &Graph, mut page: NodeIdx) -> NodeIdx {
loop {
if pages[page.usize()].redirect {
if let Some(next) = graph.edges_for(page).first() {
page = *next;
continue;
}
}
return page;
}
}
pub fn path(datafile: &Path, start: &str, goal: &str) -> io::Result<()> {
println!(">> Import");
let mut databuf = BufReader::new(File::open(datafile)?);
let (pages, _links, graph) = store::read_graph(&mut databuf)?;
println!(">> Locate from and to");
let start = resolve_redirects(&pages, &graph, find_index_of_title(&pages, start));
let goal = resolve_redirects(&pages, &graph, find_index_of_title(&pages, goal));
println!("Start: {:?}", pages[start.usize()].title);
println!("Goal: {:?}", pages[goal.usize()].title);
println!(">> Find path");
println!("> Preparing dijkstra");
let mut dijkstra = Dijkstra::new(&graph);
println!("> Running dijkstra");
dijkstra.run(
start,
|node| node == goal,
|source, _edge, _target| !pages[source.usize()].redirect as u32,
);
if dijkstra.cost(goal) == u32::MAX {
println!("No path found");
return Ok(());
}
println!("> Collecting path");
let path = dijkstra.path(goal);
let cost = dijkstra.cost(goal);
println!("Path found (cost {cost}, length {}):", path.len());
for page in path {
let info = &pages[page.usize()];
if info.redirect {
println!(" v {:?}", info.title);
} else {
println!(" - {:?}", info.title);
}
}
Ok(())
}

View file

@ -40,14 +40,6 @@ enum Command {
#[arg(short, long)] #[arg(short, long)]
flip: bool, flip: bool,
}, },
/// Find a path from one article to another.
Path2 {
from: String,
to: String,
/// Flip start and end article.
#[arg(short, long)]
flip: bool,
},
/// Find the longest shortest path starting at an article. /// Find the longest shortest path starting at an article.
LongestShortestPath { LongestShortestPath {
from: String, from: String,
@ -90,13 +82,6 @@ fn main() -> io::Result<()> {
commands::path::path(&args.datafile, &from, &to) commands::path::path(&args.datafile, &from, &to)
} }
} }
Command::Path2 { from, to, flip } => {
if flip {
commands::path2::path(&args.datafile, &to, &from)
} else {
commands::path2::path(&args.datafile, &from, &to)
}
}
Command::LongestShortestPath { from } => { Command::LongestShortestPath { from } => {
commands::longest_shortest_path::run(&args.datafile, &from) commands::longest_shortest_path::run(&args.datafile, &from)
} }