Implement new graph data structure and dijkstra

This commit is contained in:
Joscha 2024-12-30 18:19:20 +01:00
parent 778cb6748d
commit 18e54c4ce1
6 changed files with 516 additions and 3 deletions

77
brood/src/algo.rs Normal file
View file

@ -0,0 +1,77 @@
use std::{cmp::Reverse, collections::BinaryHeap};
use crate::graph::{EdgeIdx, Graph, NodeIdx};
pub struct Dijkstra<'a> {
graph: &'a Graph,
cost: Vec<u32>,
pred: Vec<NodeIdx>,
}
impl<'a> Dijkstra<'a> {
pub fn new(graph: &'a Graph) -> Self {
Self {
graph,
cost: vec![u32::MAX; graph.nodes.len()],
pred: vec![NodeIdx::NONE; graph.nodes.len()],
}
}
pub fn run(
&mut self,
start: NodeIdx,
goal: impl Fn(NodeIdx) -> bool,
cost: impl Fn(NodeIdx, EdgeIdx, NodeIdx) -> u32,
) {
self.cost[start.usize()] = 0;
let mut queue = BinaryHeap::new();
queue.push((Reverse(0), start));
while let Some((Reverse(curr_cost), curr)) = queue.pop() {
if goal(curr) {
break; // We've found the shortest path to our target
}
// These seem to never actually occur
// if curr_cost > self.cost[curr.usize()] {
// continue; // Outdated entry
// }
for edge in self.graph.edge_range(curr).map(EdgeIdx::new) {
let next = self.graph.edges[edge.usize()];
let next_cost = curr_cost + cost(curr, edge, next);
if next_cost < self.cost[next.usize()] {
self.cost[next.usize()] = next_cost;
self.pred[next.usize()] = curr;
queue.push((Reverse(next_cost), next));
}
}
}
}
#[inline]
pub fn cost(&self, node: NodeIdx) -> u32 {
self.cost[node.usize()]
}
#[inline]
pub fn pred(&self, node: NodeIdx) -> NodeIdx {
self.pred[node.usize()]
}
pub fn path(&self, goal: NodeIdx) -> Vec<NodeIdx> {
let mut path = vec![];
let mut at = goal;
loop {
path.push(at);
at = self.pred(at);
if at == NodeIdx::NONE {
break;
}
}
path.reverse();
path
}
}

View file

@ -3,5 +3,6 @@ pub mod list_links;
pub mod list_pages;
pub mod longest_shortest_path;
pub mod path;
pub mod path2;
pub mod philosophy_game;
pub mod reexport;

View file

@ -0,0 +1,77 @@
use std::{
fs::File,
io::{self, BufReader},
path::Path,
};
use crate::{
algo::Dijkstra,
data::{info::PageInfo, store},
graph::{Graph, NodeIdx},
util,
};
pub fn find_index_of_title(pages: &[PageInfo], title: &str) -> NodeIdx {
let title = util::normalize_link(title);
pages
.iter()
.enumerate()
.find(|(_, p)| util::normalize_link(&p.title) == title)
.map(|(i, _)| NodeIdx::new(i))
.expect("invalid title")
}
pub fn resolve_redirects(pages: &[PageInfo], graph: &Graph, mut page: NodeIdx) -> NodeIdx {
loop {
if pages[page.usize()].redirect {
if let Some(next) = graph.edges_for(page).first() {
page = *next;
continue;
}
}
return page;
}
}
pub fn path(datafile: &Path, start: &str, goal: &str) -> io::Result<()> {
println!(">> Import");
let mut databuf = BufReader::new(File::open(datafile)?);
let (pages, _links, graph) = store::read_graph(&mut databuf)?;
println!(">> Locate from and to");
let start = resolve_redirects(&pages, &graph, find_index_of_title(&pages, start));
let goal = resolve_redirects(&pages, &graph, find_index_of_title(&pages, goal));
println!("Start: {:?}", pages[start.usize()].title);
println!("Goal: {:?}", pages[goal.usize()].title);
println!(">> Find path");
println!("> Preparing dijkstra");
let mut dijkstra = Dijkstra::new(&graph);
println!("> Running dijkstra");
dijkstra.run(
start,
|node| node == goal,
|source, _edge, _target| !pages[source.usize()].redirect as u32,
);
if dijkstra.cost(goal) == u32::MAX {
println!("No path found");
return Ok(());
}
println!("> Collecting path");
let path = dijkstra.path(goal);
let cost = dijkstra.cost(goal);
println!("Path found (cost {cost}, length {}):", path.len());
for page in path {
let info = &pages[page.usize()];
if info.redirect {
println!(" v {:?}", info.title);
} else {
println!(" - {:?}", info.title);
}
}
Ok(())
}

View file

@ -1,5 +1,7 @@
use std::io::{self, Read, Write};
use crate::graph::{EdgeIdx, Graph, NodeIdx};
use super::{
adjacency_list::{AdjacencyList, Link, Page},
info::{LinkInfo, PageInfo},
@ -132,3 +134,27 @@ pub fn read_adjacency_list<R: Read>(from: &mut R) -> io::Result<AdjacencyList<Pa
Ok(AdjacencyList { pages, links })
}
pub fn read_graph(from: &mut impl Read) -> io::Result<(Vec<PageInfo>, Vec<LinkInfo>, Graph)> {
let n_pages = read_u32(from)?;
let n_links = read_u32(from)?;
let mut pages = Vec::with_capacity(n_pages as usize);
let mut links = Vec::with_capacity(n_links as usize);
let mut graph = Graph::with_capacity(n_pages as usize, n_links as usize);
for _ in 0..n_pages {
let page = read_page(from)?;
graph.nodes.push(EdgeIdx(page.start));
pages.push(page.data);
}
for _ in 0..n_links {
let link = read_link(from)?;
graph.edges.push(NodeIdx(link.to));
links.push(link.data);
}
graph.check_consistency();
Ok((pages, links, graph))
}

293
brood/src/graph.rs Normal file
View file

@ -0,0 +1,293 @@
use std::ops::{Add, AddAssign, Range, Sub, SubAssign};
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct NodeIdx(pub u32);
impl NodeIdx {
pub const NONE: Self = Self(u32::MAX);
#[inline]
pub const fn new(value: usize) -> Self {
Self(value as u32)
}
#[inline]
pub const fn usize(self) -> usize {
self.0 as usize
}
}
impl From<u32> for NodeIdx {
fn from(value: u32) -> Self {
Self(value)
}
}
impl From<usize> for NodeIdx {
fn from(value: usize) -> Self {
Self::new(value)
}
}
impl Add for NodeIdx {
type Output = Self;
fn add(self, rhs: Self) -> Self::Output {
Self(self.0 + rhs.0)
}
}
impl AddAssign for NodeIdx {
fn add_assign(&mut self, rhs: Self) {
self.0 += rhs.0;
}
}
impl Sub for NodeIdx {
type Output = Self;
fn sub(self, rhs: Self) -> Self::Output {
Self(self.0 - rhs.0)
}
}
impl SubAssign for NodeIdx {
fn sub_assign(&mut self, rhs: Self) {
self.0 -= rhs.0;
}
}
impl Add<u32> for NodeIdx {
type Output = Self;
fn add(self, rhs: u32) -> Self::Output {
Self(self.0 + rhs)
}
}
impl AddAssign<u32> for NodeIdx {
fn add_assign(&mut self, rhs: u32) {
self.0 += rhs;
}
}
impl Sub<u32> for NodeIdx {
type Output = Self;
fn sub(self, rhs: u32) -> Self::Output {
Self(self.0 - rhs)
}
}
impl SubAssign<u32> for NodeIdx {
fn sub_assign(&mut self, rhs: u32) {
self.0 -= rhs;
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct EdgeIdx(pub u32);
impl EdgeIdx {
pub const NONE: Self = Self(u32::MAX);
#[inline]
pub const fn new(value: usize) -> Self {
Self(value as u32)
}
#[inline]
pub const fn usize(self) -> usize {
self.0 as usize
}
}
impl From<u32> for EdgeIdx {
fn from(value: u32) -> Self {
Self(value)
}
}
impl From<usize> for EdgeIdx {
fn from(value: usize) -> Self {
Self::new(value)
}
}
impl Add for EdgeIdx {
type Output = Self;
fn add(self, rhs: Self) -> Self::Output {
Self(self.0 + rhs.0)
}
}
impl AddAssign for EdgeIdx {
fn add_assign(&mut self, rhs: Self) {
self.0 += rhs.0;
}
}
impl Sub for EdgeIdx {
type Output = Self;
fn sub(self, rhs: Self) -> Self::Output {
Self(self.0 - rhs.0)
}
}
impl SubAssign for EdgeIdx {
fn sub_assign(&mut self, rhs: Self) {
self.0 -= rhs.0;
}
}
impl Add<u32> for EdgeIdx {
type Output = Self;
fn add(self, rhs: u32) -> Self::Output {
Self(self.0 + rhs)
}
}
impl AddAssign<u32> for EdgeIdx {
fn add_assign(&mut self, rhs: u32) {
self.0 += rhs;
}
}
impl Sub<u32> for EdgeIdx {
type Output = Self;
fn sub(self, rhs: u32) -> Self::Output {
Self(self.0 - rhs)
}
}
impl SubAssign<u32> for EdgeIdx {
fn sub_assign(&mut self, rhs: u32) {
self.0 -= rhs;
}
}
#[derive(Default)]
pub struct Graph {
/// A node points to the first of its edges.
///
/// A special case is that if the subsequent node points to the same edge,
/// the current node has no edges.
pub nodes: Vec<EdgeIdx>,
/// An edge points to a target node.
///
/// The source node is defined implicitly by the graph data structure.
pub edges: Vec<NodeIdx>,
}
impl Graph {
pub fn new() -> Self {
Self::default()
}
pub fn with_capacity(nodes: usize, edges: usize) -> Self {
Self {
nodes: Vec::with_capacity(nodes),
edges: Vec::with_capacity(edges),
}
}
pub fn check_consistency(&self) {
if self.nodes.is_empty() {
assert!(self.edges.is_empty(), "edges must belong to existing nodes");
return;
}
assert!(self.nodes.len() < u32::MAX as usize, "too many nodes");
assert!(self.edges.len() < u32::MAX as usize, "too many edges");
assert_eq!(
*self.nodes.first().unwrap(),
EdgeIdx(0),
"first node pointer must be 0"
);
for (ni, node) in self.nodes.iter().cloned().enumerate() {
assert!(
node.usize() < self.edges.len(),
"node pointers must in range"
);
if let Some(succ) = self.nodes.get(ni + 1) {
assert!(node <= *succ, "node pointers must be well-ordered");
}
}
for edge in &self.edges {
assert!(
edge.usize() < self.nodes.len(),
"edge pointers must be in range"
);
}
}
pub fn nodes(&self) -> impl Iterator<Item = NodeIdx> + '_ {
(0..self.nodes.len()).map(NodeIdx::new)
}
pub fn edges(&self) -> impl Iterator<Item = (NodeIdx, NodeIdx)> + '_ {
Edges::new(self)
}
pub fn edge_start(&self, node: NodeIdx) -> EdgeIdx {
self.nodes
.get(node.usize())
.copied()
.unwrap_or_else(|| self.edges.len().into())
}
pub fn edge_range(&self, node: NodeIdx) -> Range<usize> {
let start = self.nodes[node.usize()];
let end = self.edge_start(node + 1);
start.usize()..end.usize()
}
pub fn edges_for(&self, node: NodeIdx) -> &[NodeIdx] {
&self.edges[self.edge_range(node)]
}
}
struct Edges<'a> {
graph: &'a Graph,
ni: NodeIdx,
ei: EdgeIdx,
}
impl<'a> Edges<'a> {
fn new(graph: &'a Graph) -> Self {
Self {
graph,
ni: NodeIdx(0),
ei: EdgeIdx(0),
}
}
}
impl Iterator for Edges<'_> {
type Item = (NodeIdx, NodeIdx);
fn next(&mut self) -> Option<Self::Item> {
if self.ei.usize() >= self.graph.edges.len() {
return None;
}
let to = self.graph.edges[self.ei.usize()];
// if would not be sufficient because some nodes may not have any edges.
while self.ei >= self.graph.edge_start(self.ni + 1) {
self.ni += 1;
}
let from = self.ni;
self.ei += 1;
Some((from, to))
}
}

View file

@ -1,11 +1,16 @@
mod algo;
pub mod commands;
mod data;
mod graph;
mod util;
use std::io;
use std::path::PathBuf;
use std::fs::File;
use std::io::{self, BufReader};
use std::path::{Path, PathBuf};
use std::time::Instant;
use clap::Parser;
use data::store;
#[derive(Debug, PartialEq, Eq, Parser)]
pub enum PhilosophyGameCmd {
@ -35,8 +40,18 @@ enum Command {
#[arg(short, long)]
flip: bool,
},
/// Find a path from one article to another.
Path2 {
from: String,
to: String,
/// Flip start and end article.
#[arg(short, long)]
flip: bool,
},
/// Find the longest shortest path starting at an article.
LongestShortestPath { from: String },
LongestShortestPath {
from: String,
},
/// Analyze articles using "Philosophy Game" rules.
PhilosophyGame {
#[command(subcommand)]
@ -49,6 +64,7 @@ enum Command {
/// The page to inspect.
page: String,
},
Test,
}
#[derive(Debug, Parser)]
@ -74,6 +90,13 @@ fn main() -> io::Result<()> {
commands::path::path(&args.datafile, &from, &to)
}
}
Command::Path2 { from, to, flip } => {
if flip {
commands::path2::path(&args.datafile, &to, &from)
} else {
commands::path2::path(&args.datafile, &from, &to)
}
}
Command::LongestShortestPath { from } => {
commands::longest_shortest_path::run(&args.datafile, &from)
}
@ -82,5 +105,21 @@ fn main() -> io::Result<()> {
}
Command::ListPages => commands::list_pages::run(&args.datafile),
Command::ListLinks { page } => commands::list_links::run(&args.datafile, &page),
Command::Test => test(&args.datafile),
}
}
fn test(datafile: &Path) -> io::Result<()> {
let a = Instant::now();
// println!(">> Import adjacency list");
// let mut databuf = BufReader::new(File::open(datafile)?);
// let adjlist = store::read_adjacency_list(&mut databuf)?;
println!(">> Import graph");
let mut databuf = BufReader::new(File::open(datafile)?);
let (pages, links, graph) = store::read_graph(&mut databuf)?;
let b = Instant::now();
println!("{:?}", b.duration_since(a));
Ok(())
}