Implement new graph data structure and dijkstra
This commit is contained in:
parent
778cb6748d
commit
18e54c4ce1
6 changed files with 516 additions and 3 deletions
77
brood/src/algo.rs
Normal file
77
brood/src/algo.rs
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
use std::{cmp::Reverse, collections::BinaryHeap};
|
||||
|
||||
use crate::graph::{EdgeIdx, Graph, NodeIdx};
|
||||
|
||||
pub struct Dijkstra<'a> {
|
||||
graph: &'a Graph,
|
||||
cost: Vec<u32>,
|
||||
pred: Vec<NodeIdx>,
|
||||
}
|
||||
|
||||
impl<'a> Dijkstra<'a> {
|
||||
pub fn new(graph: &'a Graph) -> Self {
|
||||
Self {
|
||||
graph,
|
||||
cost: vec![u32::MAX; graph.nodes.len()],
|
||||
pred: vec![NodeIdx::NONE; graph.nodes.len()],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn run(
|
||||
&mut self,
|
||||
start: NodeIdx,
|
||||
goal: impl Fn(NodeIdx) -> bool,
|
||||
cost: impl Fn(NodeIdx, EdgeIdx, NodeIdx) -> u32,
|
||||
) {
|
||||
self.cost[start.usize()] = 0;
|
||||
let mut queue = BinaryHeap::new();
|
||||
queue.push((Reverse(0), start));
|
||||
|
||||
while let Some((Reverse(curr_cost), curr)) = queue.pop() {
|
||||
if goal(curr) {
|
||||
break; // We've found the shortest path to our target
|
||||
}
|
||||
|
||||
// These seem to never actually occur
|
||||
// if curr_cost > self.cost[curr.usize()] {
|
||||
// continue; // Outdated entry
|
||||
// }
|
||||
|
||||
for edge in self.graph.edge_range(curr).map(EdgeIdx::new) {
|
||||
let next = self.graph.edges[edge.usize()];
|
||||
let next_cost = curr_cost + cost(curr, edge, next);
|
||||
if next_cost < self.cost[next.usize()] {
|
||||
self.cost[next.usize()] = next_cost;
|
||||
self.pred[next.usize()] = curr;
|
||||
queue.push((Reverse(next_cost), next));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn cost(&self, node: NodeIdx) -> u32 {
|
||||
self.cost[node.usize()]
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn pred(&self, node: NodeIdx) -> NodeIdx {
|
||||
self.pred[node.usize()]
|
||||
}
|
||||
|
||||
pub fn path(&self, goal: NodeIdx) -> Vec<NodeIdx> {
|
||||
let mut path = vec![];
|
||||
let mut at = goal;
|
||||
|
||||
loop {
|
||||
path.push(at);
|
||||
at = self.pred(at);
|
||||
if at == NodeIdx::NONE {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
path.reverse();
|
||||
path
|
||||
}
|
||||
}
|
||||
|
|
@ -3,5 +3,6 @@ pub mod list_links;
|
|||
pub mod list_pages;
|
||||
pub mod longest_shortest_path;
|
||||
pub mod path;
|
||||
pub mod path2;
|
||||
pub mod philosophy_game;
|
||||
pub mod reexport;
|
||||
|
|
|
|||
77
brood/src/commands/path2.rs
Normal file
77
brood/src/commands/path2.rs
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
use std::{
|
||||
fs::File,
|
||||
io::{self, BufReader},
|
||||
path::Path,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
algo::Dijkstra,
|
||||
data::{info::PageInfo, store},
|
||||
graph::{Graph, NodeIdx},
|
||||
util,
|
||||
};
|
||||
|
||||
pub fn find_index_of_title(pages: &[PageInfo], title: &str) -> NodeIdx {
|
||||
let title = util::normalize_link(title);
|
||||
pages
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find(|(_, p)| util::normalize_link(&p.title) == title)
|
||||
.map(|(i, _)| NodeIdx::new(i))
|
||||
.expect("invalid title")
|
||||
}
|
||||
|
||||
pub fn resolve_redirects(pages: &[PageInfo], graph: &Graph, mut page: NodeIdx) -> NodeIdx {
|
||||
loop {
|
||||
if pages[page.usize()].redirect {
|
||||
if let Some(next) = graph.edges_for(page).first() {
|
||||
page = *next;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return page;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn path(datafile: &Path, start: &str, goal: &str) -> io::Result<()> {
|
||||
println!(">> Import");
|
||||
let mut databuf = BufReader::new(File::open(datafile)?);
|
||||
let (pages, _links, graph) = store::read_graph(&mut databuf)?;
|
||||
|
||||
println!(">> Locate from and to");
|
||||
let start = resolve_redirects(&pages, &graph, find_index_of_title(&pages, start));
|
||||
let goal = resolve_redirects(&pages, &graph, find_index_of_title(&pages, goal));
|
||||
println!("Start: {:?}", pages[start.usize()].title);
|
||||
println!("Goal: {:?}", pages[goal.usize()].title);
|
||||
|
||||
println!(">> Find path");
|
||||
println!("> Preparing dijkstra");
|
||||
let mut dijkstra = Dijkstra::new(&graph);
|
||||
println!("> Running dijkstra");
|
||||
dijkstra.run(
|
||||
start,
|
||||
|node| node == goal,
|
||||
|source, _edge, _target| !pages[source.usize()].redirect as u32,
|
||||
);
|
||||
|
||||
if dijkstra.cost(goal) == u32::MAX {
|
||||
println!("No path found");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
println!("> Collecting path");
|
||||
let path = dijkstra.path(goal);
|
||||
let cost = dijkstra.cost(goal);
|
||||
println!("Path found (cost {cost}, length {}):", path.len());
|
||||
for page in path {
|
||||
let info = &pages[page.usize()];
|
||||
if info.redirect {
|
||||
println!(" v {:?}", info.title);
|
||||
} else {
|
||||
println!(" - {:?}", info.title);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -1,5 +1,7 @@
|
|||
use std::io::{self, Read, Write};
|
||||
|
||||
use crate::graph::{EdgeIdx, Graph, NodeIdx};
|
||||
|
||||
use super::{
|
||||
adjacency_list::{AdjacencyList, Link, Page},
|
||||
info::{LinkInfo, PageInfo},
|
||||
|
|
@ -132,3 +134,27 @@ pub fn read_adjacency_list<R: Read>(from: &mut R) -> io::Result<AdjacencyList<Pa
|
|||
|
||||
Ok(AdjacencyList { pages, links })
|
||||
}
|
||||
|
||||
pub fn read_graph(from: &mut impl Read) -> io::Result<(Vec<PageInfo>, Vec<LinkInfo>, Graph)> {
|
||||
let n_pages = read_u32(from)?;
|
||||
let n_links = read_u32(from)?;
|
||||
|
||||
let mut pages = Vec::with_capacity(n_pages as usize);
|
||||
let mut links = Vec::with_capacity(n_links as usize);
|
||||
let mut graph = Graph::with_capacity(n_pages as usize, n_links as usize);
|
||||
|
||||
for _ in 0..n_pages {
|
||||
let page = read_page(from)?;
|
||||
graph.nodes.push(EdgeIdx(page.start));
|
||||
pages.push(page.data);
|
||||
}
|
||||
|
||||
for _ in 0..n_links {
|
||||
let link = read_link(from)?;
|
||||
graph.edges.push(NodeIdx(link.to));
|
||||
links.push(link.data);
|
||||
}
|
||||
|
||||
graph.check_consistency();
|
||||
Ok((pages, links, graph))
|
||||
}
|
||||
|
|
|
|||
293
brood/src/graph.rs
Normal file
293
brood/src/graph.rs
Normal file
|
|
@ -0,0 +1,293 @@
|
|||
use std::ops::{Add, AddAssign, Range, Sub, SubAssign};
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct NodeIdx(pub u32);
|
||||
|
||||
impl NodeIdx {
|
||||
pub const NONE: Self = Self(u32::MAX);
|
||||
|
||||
#[inline]
|
||||
pub const fn new(value: usize) -> Self {
|
||||
Self(value as u32)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub const fn usize(self) -> usize {
|
||||
self.0 as usize
|
||||
}
|
||||
}
|
||||
|
||||
impl From<u32> for NodeIdx {
|
||||
fn from(value: u32) -> Self {
|
||||
Self(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<usize> for NodeIdx {
|
||||
fn from(value: usize) -> Self {
|
||||
Self::new(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl Add for NodeIdx {
|
||||
type Output = Self;
|
||||
|
||||
fn add(self, rhs: Self) -> Self::Output {
|
||||
Self(self.0 + rhs.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl AddAssign for NodeIdx {
|
||||
fn add_assign(&mut self, rhs: Self) {
|
||||
self.0 += rhs.0;
|
||||
}
|
||||
}
|
||||
|
||||
impl Sub for NodeIdx {
|
||||
type Output = Self;
|
||||
|
||||
fn sub(self, rhs: Self) -> Self::Output {
|
||||
Self(self.0 - rhs.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl SubAssign for NodeIdx {
|
||||
fn sub_assign(&mut self, rhs: Self) {
|
||||
self.0 -= rhs.0;
|
||||
}
|
||||
}
|
||||
|
||||
impl Add<u32> for NodeIdx {
|
||||
type Output = Self;
|
||||
|
||||
fn add(self, rhs: u32) -> Self::Output {
|
||||
Self(self.0 + rhs)
|
||||
}
|
||||
}
|
||||
|
||||
impl AddAssign<u32> for NodeIdx {
|
||||
fn add_assign(&mut self, rhs: u32) {
|
||||
self.0 += rhs;
|
||||
}
|
||||
}
|
||||
|
||||
impl Sub<u32> for NodeIdx {
|
||||
type Output = Self;
|
||||
|
||||
fn sub(self, rhs: u32) -> Self::Output {
|
||||
Self(self.0 - rhs)
|
||||
}
|
||||
}
|
||||
|
||||
impl SubAssign<u32> for NodeIdx {
|
||||
fn sub_assign(&mut self, rhs: u32) {
|
||||
self.0 -= rhs;
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct EdgeIdx(pub u32);
|
||||
|
||||
impl EdgeIdx {
|
||||
pub const NONE: Self = Self(u32::MAX);
|
||||
|
||||
#[inline]
|
||||
pub const fn new(value: usize) -> Self {
|
||||
Self(value as u32)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub const fn usize(self) -> usize {
|
||||
self.0 as usize
|
||||
}
|
||||
}
|
||||
|
||||
impl From<u32> for EdgeIdx {
|
||||
fn from(value: u32) -> Self {
|
||||
Self(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<usize> for EdgeIdx {
|
||||
fn from(value: usize) -> Self {
|
||||
Self::new(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl Add for EdgeIdx {
|
||||
type Output = Self;
|
||||
|
||||
fn add(self, rhs: Self) -> Self::Output {
|
||||
Self(self.0 + rhs.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl AddAssign for EdgeIdx {
|
||||
fn add_assign(&mut self, rhs: Self) {
|
||||
self.0 += rhs.0;
|
||||
}
|
||||
}
|
||||
|
||||
impl Sub for EdgeIdx {
|
||||
type Output = Self;
|
||||
|
||||
fn sub(self, rhs: Self) -> Self::Output {
|
||||
Self(self.0 - rhs.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl SubAssign for EdgeIdx {
|
||||
fn sub_assign(&mut self, rhs: Self) {
|
||||
self.0 -= rhs.0;
|
||||
}
|
||||
}
|
||||
|
||||
impl Add<u32> for EdgeIdx {
|
||||
type Output = Self;
|
||||
|
||||
fn add(self, rhs: u32) -> Self::Output {
|
||||
Self(self.0 + rhs)
|
||||
}
|
||||
}
|
||||
|
||||
impl AddAssign<u32> for EdgeIdx {
|
||||
fn add_assign(&mut self, rhs: u32) {
|
||||
self.0 += rhs;
|
||||
}
|
||||
}
|
||||
|
||||
impl Sub<u32> for EdgeIdx {
|
||||
type Output = Self;
|
||||
|
||||
fn sub(self, rhs: u32) -> Self::Output {
|
||||
Self(self.0 - rhs)
|
||||
}
|
||||
}
|
||||
|
||||
impl SubAssign<u32> for EdgeIdx {
|
||||
fn sub_assign(&mut self, rhs: u32) {
|
||||
self.0 -= rhs;
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct Graph {
|
||||
/// A node points to the first of its edges.
|
||||
///
|
||||
/// A special case is that if the subsequent node points to the same edge,
|
||||
/// the current node has no edges.
|
||||
pub nodes: Vec<EdgeIdx>,
|
||||
|
||||
/// An edge points to a target node.
|
||||
///
|
||||
/// The source node is defined implicitly by the graph data structure.
|
||||
pub edges: Vec<NodeIdx>,
|
||||
}
|
||||
|
||||
impl Graph {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
pub fn with_capacity(nodes: usize, edges: usize) -> Self {
|
||||
Self {
|
||||
nodes: Vec::with_capacity(nodes),
|
||||
edges: Vec::with_capacity(edges),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_consistency(&self) {
|
||||
if self.nodes.is_empty() {
|
||||
assert!(self.edges.is_empty(), "edges must belong to existing nodes");
|
||||
return;
|
||||
}
|
||||
|
||||
assert!(self.nodes.len() < u32::MAX as usize, "too many nodes");
|
||||
assert!(self.edges.len() < u32::MAX as usize, "too many edges");
|
||||
|
||||
assert_eq!(
|
||||
*self.nodes.first().unwrap(),
|
||||
EdgeIdx(0),
|
||||
"first node pointer must be 0"
|
||||
);
|
||||
|
||||
for (ni, node) in self.nodes.iter().cloned().enumerate() {
|
||||
assert!(
|
||||
node.usize() < self.edges.len(),
|
||||
"node pointers must in range"
|
||||
);
|
||||
|
||||
if let Some(succ) = self.nodes.get(ni + 1) {
|
||||
assert!(node <= *succ, "node pointers must be well-ordered");
|
||||
}
|
||||
}
|
||||
|
||||
for edge in &self.edges {
|
||||
assert!(
|
||||
edge.usize() < self.nodes.len(),
|
||||
"edge pointers must be in range"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn nodes(&self) -> impl Iterator<Item = NodeIdx> + '_ {
|
||||
(0..self.nodes.len()).map(NodeIdx::new)
|
||||
}
|
||||
|
||||
pub fn edges(&self) -> impl Iterator<Item = (NodeIdx, NodeIdx)> + '_ {
|
||||
Edges::new(self)
|
||||
}
|
||||
|
||||
pub fn edge_start(&self, node: NodeIdx) -> EdgeIdx {
|
||||
self.nodes
|
||||
.get(node.usize())
|
||||
.copied()
|
||||
.unwrap_or_else(|| self.edges.len().into())
|
||||
}
|
||||
|
||||
pub fn edge_range(&self, node: NodeIdx) -> Range<usize> {
|
||||
let start = self.nodes[node.usize()];
|
||||
let end = self.edge_start(node + 1);
|
||||
start.usize()..end.usize()
|
||||
}
|
||||
|
||||
pub fn edges_for(&self, node: NodeIdx) -> &[NodeIdx] {
|
||||
&self.edges[self.edge_range(node)]
|
||||
}
|
||||
}
|
||||
|
||||
struct Edges<'a> {
|
||||
graph: &'a Graph,
|
||||
ni: NodeIdx,
|
||||
ei: EdgeIdx,
|
||||
}
|
||||
|
||||
impl<'a> Edges<'a> {
|
||||
fn new(graph: &'a Graph) -> Self {
|
||||
Self {
|
||||
graph,
|
||||
ni: NodeIdx(0),
|
||||
ei: EdgeIdx(0),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for Edges<'_> {
|
||||
type Item = (NodeIdx, NodeIdx);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.ei.usize() >= self.graph.edges.len() {
|
||||
return None;
|
||||
}
|
||||
let to = self.graph.edges[self.ei.usize()];
|
||||
|
||||
// if would not be sufficient because some nodes may not have any edges.
|
||||
while self.ei >= self.graph.edge_start(self.ni + 1) {
|
||||
self.ni += 1;
|
||||
}
|
||||
let from = self.ni;
|
||||
|
||||
self.ei += 1;
|
||||
Some((from, to))
|
||||
}
|
||||
}
|
||||
|
|
@ -1,11 +1,16 @@
|
|||
mod algo;
|
||||
pub mod commands;
|
||||
mod data;
|
||||
mod graph;
|
||||
mod util;
|
||||
|
||||
use std::io;
|
||||
use std::path::PathBuf;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::Instant;
|
||||
|
||||
use clap::Parser;
|
||||
use data::store;
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Parser)]
|
||||
pub enum PhilosophyGameCmd {
|
||||
|
|
@ -35,8 +40,18 @@ enum Command {
|
|||
#[arg(short, long)]
|
||||
flip: bool,
|
||||
},
|
||||
/// Find a path from one article to another.
|
||||
Path2 {
|
||||
from: String,
|
||||
to: String,
|
||||
/// Flip start and end article.
|
||||
#[arg(short, long)]
|
||||
flip: bool,
|
||||
},
|
||||
/// Find the longest shortest path starting at an article.
|
||||
LongestShortestPath { from: String },
|
||||
LongestShortestPath {
|
||||
from: String,
|
||||
},
|
||||
/// Analyze articles using "Philosophy Game" rules.
|
||||
PhilosophyGame {
|
||||
#[command(subcommand)]
|
||||
|
|
@ -49,6 +64,7 @@ enum Command {
|
|||
/// The page to inspect.
|
||||
page: String,
|
||||
},
|
||||
Test,
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
|
|
@ -74,6 +90,13 @@ fn main() -> io::Result<()> {
|
|||
commands::path::path(&args.datafile, &from, &to)
|
||||
}
|
||||
}
|
||||
Command::Path2 { from, to, flip } => {
|
||||
if flip {
|
||||
commands::path2::path(&args.datafile, &to, &from)
|
||||
} else {
|
||||
commands::path2::path(&args.datafile, &from, &to)
|
||||
}
|
||||
}
|
||||
Command::LongestShortestPath { from } => {
|
||||
commands::longest_shortest_path::run(&args.datafile, &from)
|
||||
}
|
||||
|
|
@ -82,5 +105,21 @@ fn main() -> io::Result<()> {
|
|||
}
|
||||
Command::ListPages => commands::list_pages::run(&args.datafile),
|
||||
Command::ListLinks { page } => commands::list_links::run(&args.datafile, &page),
|
||||
Command::Test => test(&args.datafile),
|
||||
}
|
||||
}
|
||||
|
||||
fn test(datafile: &Path) -> io::Result<()> {
|
||||
let a = Instant::now();
|
||||
// println!(">> Import adjacency list");
|
||||
// let mut databuf = BufReader::new(File::open(datafile)?);
|
||||
// let adjlist = store::read_adjacency_list(&mut databuf)?;
|
||||
println!(">> Import graph");
|
||||
let mut databuf = BufReader::new(File::open(datafile)?);
|
||||
let (pages, links, graph) = store::read_graph(&mut databuf)?;
|
||||
let b = Instant::now();
|
||||
|
||||
println!("{:?}", b.duration_since(a));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue