Add longest-path command
This commit is contained in:
parent
3045d6d6c6
commit
b2a8597c6f
5 changed files with 75 additions and 198 deletions
|
|
@ -1,5 +1,6 @@
|
|||
pub mod export;
|
||||
pub mod ingest;
|
||||
pub mod longest_path;
|
||||
pub mod path;
|
||||
pub mod show;
|
||||
pub mod stats;
|
||||
|
|
|
|||
|
|
@ -1,23 +0,0 @@
|
|||
use std::fs::File;
|
||||
use std::io::{self, BufReader};
|
||||
use std::path::Path;
|
||||
|
||||
use crate::data::store;
|
||||
|
||||
pub fn run(datafile: &Path) -> io::Result<()> {
|
||||
let mut databuf = BufReader::new(File::open(datafile)?);
|
||||
let data = store::read_adjacency_list(&mut databuf)?;
|
||||
|
||||
for (page_idx, page) in data.pages() {
|
||||
if page.data.redirect {
|
||||
for link_idx in data.link_range(page_idx) {
|
||||
let target_page = data.page(data.link(link_idx).to);
|
||||
println!("{:?} -> {:?}", page.data.title, target_page.data.title);
|
||||
}
|
||||
} else {
|
||||
println!("{:?}", page.data.title);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
70
brood/src/commands/longest_path.rs
Normal file
70
brood/src/commands/longest_path.rs
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
use std::io;
|
||||
|
||||
use crate::{
|
||||
algo::Dijkstra,
|
||||
data::Data,
|
||||
graph::NodeIdx,
|
||||
util::{self, TitleNormalizer},
|
||||
};
|
||||
|
||||
/// Find the article with the longest shortest path away from the starting
|
||||
/// article.
|
||||
#[derive(Debug, clap::Parser)]
|
||||
pub struct Cmd {
|
||||
start: String,
|
||||
#[arg(long, short, default_value_t = 1)]
|
||||
top: usize,
|
||||
}
|
||||
|
||||
fn print_path(data: &Data, start: NodeIdx, goal: NodeIdx, path: Option<(u32, Vec<NodeIdx>)>) {
|
||||
let start = &data.pages[start.usize()].title;
|
||||
let goal = &data.pages[goal.usize()].title;
|
||||
|
||||
let Some((cost, path)) = path else {
|
||||
println!("No path found from {start} to {goal}");
|
||||
return;
|
||||
};
|
||||
|
||||
println!("Path found (cost {cost}, length {}):", path.len());
|
||||
|
||||
for page in path {
|
||||
println!("{}", util::fmt_page(&data.pages[page.usize()]));
|
||||
}
|
||||
}
|
||||
|
||||
impl Cmd {
|
||||
pub fn run(self, data: Data) -> io::Result<()> {
|
||||
let normalizer = TitleNormalizer::new();
|
||||
|
||||
println!(">> Resolve article");
|
||||
let start = util::resolve_title(&normalizer, &data, &self.start);
|
||||
println!("Start: {}", data.pages[start.usize()].title);
|
||||
|
||||
println!(">> Search paths");
|
||||
println!("> Preparing dijkstra");
|
||||
let mut dijkstra = Dijkstra::new(&data.graph);
|
||||
println!("> Running dijkstra");
|
||||
dijkstra.run(
|
||||
start,
|
||||
|_| false,
|
||||
|source, _edge, _target| !data.pages[source.usize()].redirect as u32,
|
||||
);
|
||||
|
||||
println!(">> Find longest paths");
|
||||
let mut costs = data
|
||||
.graph
|
||||
.nodes()
|
||||
.map(|n| (dijkstra.cost(n), n))
|
||||
.filter(|(c, _)| *c < u32::MAX) // Only reachable nodes please
|
||||
.collect::<Vec<_>>();
|
||||
costs.sort_unstable();
|
||||
|
||||
for (cost, goal) in costs.iter().rev().take(self.top) {
|
||||
let path = dijkstra.path(*goal);
|
||||
println!();
|
||||
print_path(&data, start, *goal, Some((*cost, path)));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
@ -1,173 +0,0 @@
|
|||
use std::collections::BinaryHeap;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader};
|
||||
use std::path::Path;
|
||||
|
||||
use crate::data::adjacency_list::AdjacencyList;
|
||||
use crate::data::info::{LinkInfo, PageInfo};
|
||||
use crate::data::store;
|
||||
use crate::util;
|
||||
|
||||
struct DijkstraPageInfo {
|
||||
cost: u32,
|
||||
/// Index of the previous page.
|
||||
prev: u32,
|
||||
redirect: bool,
|
||||
}
|
||||
|
||||
impl DijkstraPageInfo {
|
||||
fn from_page_info(info: PageInfo) -> Self {
|
||||
Self {
|
||||
cost: u32::MAX,
|
||||
prev: u32::MAX,
|
||||
redirect: info.redirect,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct DijkstraLinkInfo {
|
||||
cost: u32,
|
||||
}
|
||||
|
||||
impl DijkstraLinkInfo {
|
||||
fn from_link_info(info: LinkInfo) -> Self {
|
||||
Self {
|
||||
cost: 1,
|
||||
// cost: 1000 + info.start,
|
||||
// cost: 10000 + info.start,
|
||||
// cost: 1000 + info.start / 10,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, Eq)]
|
||||
struct Entry {
|
||||
cost: u32,
|
||||
page_idx: u32,
|
||||
}
|
||||
|
||||
impl Entry {
|
||||
pub fn new(cost: u32, page_idx: u32) -> Self {
|
||||
Self { cost, page_idx }
|
||||
}
|
||||
}
|
||||
|
||||
// Manual implementation so the queue is a min-heap instead of a max-heap.
|
||||
impl Ord for Entry {
|
||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
other
|
||||
.cost
|
||||
.cmp(&self.cost)
|
||||
.then_with(|| self.page_idx.cmp(&other.page_idx))
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for Entry {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
/// Closely matches the dijkstra example in [std::collections::binary_heap].
|
||||
fn full_dijkstra(
|
||||
data: AdjacencyList<PageInfo, LinkInfo>,
|
||||
from: u32,
|
||||
) -> AdjacencyList<DijkstraPageInfo, DijkstraLinkInfo> {
|
||||
println!("> Prepare state");
|
||||
let mut data = data
|
||||
.change_page_data(DijkstraPageInfo::from_page_info)
|
||||
.change_link_data(DijkstraLinkInfo::from_link_info);
|
||||
let mut queue = BinaryHeap::new();
|
||||
data.page_mut(from).data.cost = 0;
|
||||
queue.push(Entry::new(0, from));
|
||||
|
||||
println!("> Run dijkstra");
|
||||
while let Some(Entry { cost, page_idx }) = queue.pop() {
|
||||
let page = data.page(page_idx);
|
||||
if cost > page.data.cost {
|
||||
// This queue entry is outdated
|
||||
continue;
|
||||
}
|
||||
|
||||
let redirect = page.data.redirect;
|
||||
for link_idx in data.link_range(page_idx) {
|
||||
let link = data.link(link_idx);
|
||||
|
||||
let next = Entry {
|
||||
cost: cost + if redirect { 0 } else { link.data.cost },
|
||||
page_idx: link.to,
|
||||
};
|
||||
|
||||
let target_page = data.page_mut(link.to);
|
||||
if next.cost < target_page.data.cost {
|
||||
target_page.data.cost = next.cost;
|
||||
target_page.data.prev = page_idx;
|
||||
queue.push(next);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
data
|
||||
}
|
||||
|
||||
fn find_longest_shortest_path(
|
||||
data: AdjacencyList<DijkstraPageInfo, DijkstraLinkInfo>,
|
||||
from: u32,
|
||||
) -> Option<Vec<u32>> {
|
||||
let to = data
|
||||
.pages
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_, p)| p.data.cost != u32::MAX)
|
||||
.max_by_key(|(_, p)| p.data.cost)?
|
||||
.0 as u32;
|
||||
|
||||
let mut steps = vec![];
|
||||
let mut at = to;
|
||||
loop {
|
||||
steps.push(at);
|
||||
at = data.page(at).data.prev;
|
||||
if at == u32::MAX {
|
||||
break;
|
||||
};
|
||||
}
|
||||
steps.reverse();
|
||||
if steps.first() == Some(&from) {
|
||||
Some(steps)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn run(datafile: &Path, from: &str) -> io::Result<()> {
|
||||
println!(">> Import");
|
||||
let mut databuf = BufReader::new(File::open(datafile)?);
|
||||
let data = store::read_adjacency_list(&mut databuf)?;
|
||||
let pages = data.pages.clone();
|
||||
|
||||
println!(">> Locate from and to");
|
||||
let from_idx = util::resolve_redirects(&data, util::find_index_of_title(&pages, from));
|
||||
println!("From: {:?}", data.page(from_idx).data.title);
|
||||
|
||||
println!(">> Find all shortest paths");
|
||||
let data = full_dijkstra(data, from_idx);
|
||||
|
||||
println!(">> Find longest shortest path");
|
||||
let path = find_longest_shortest_path(data, from_idx);
|
||||
|
||||
if let Some(path) = path {
|
||||
println!("Path found:");
|
||||
for page_idx in path {
|
||||
let page = &pages[page_idx as usize];
|
||||
if page.data.redirect {
|
||||
println!(" v {:?}", page.data.title);
|
||||
} else {
|
||||
println!(" - {:?}", page.data.title);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
println!("No path found");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -14,8 +14,9 @@ enum Command {
|
|||
Ingest(commands::ingest::Cmd),
|
||||
Export(commands::export::Cmd),
|
||||
Show(commands::show::Cmd),
|
||||
Path(commands::path::Cmd),
|
||||
Stats(commands::stats::Cmd),
|
||||
Path(commands::path::Cmd),
|
||||
LongestPath(commands::longest_path::Cmd),
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
|
|
@ -73,7 +74,8 @@ fn main() -> io::Result<()> {
|
|||
Command::Ingest(_) => unreachable!(),
|
||||
Command::Export(cmd) => cmd.run(data),
|
||||
Command::Show(cmd) => cmd.run(data),
|
||||
Command::Path(cmd) => cmd.run(data),
|
||||
Command::Stats(cmd) => cmd.run(data),
|
||||
Command::Path(cmd) => cmd.run(data),
|
||||
Command::LongestPath(cmd) => cmd.run(data),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue