Port path command
This commit is contained in:
parent
abd6b3519c
commit
4e41084f2a
4 changed files with 77 additions and 78 deletions
|
|
@ -1 +1,2 @@
|
|||
pub mod ingest;
|
||||
pub mod path;
|
||||
|
|
|
|||
|
|
@ -1,77 +1,61 @@
|
|||
use std::{
|
||||
fs::File,
|
||||
io::{self, BufReader},
|
||||
path::Path,
|
||||
};
|
||||
use std::{io, path::Path};
|
||||
|
||||
use crate::{
|
||||
algo::Dijkstra,
|
||||
data::{info::PageInfo, store},
|
||||
graph::{Graph, NodeIdx},
|
||||
util,
|
||||
data,
|
||||
util::{self, TitleNormalizer},
|
||||
};
|
||||
|
||||
pub fn find_index_of_title(pages: &[PageInfo], title: &str) -> NodeIdx {
|
||||
let title = util::normalize_link(title);
|
||||
pages
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find(|(_, p)| util::normalize_link(&p.title) == title)
|
||||
.map(|(i, _)| NodeIdx::new(i))
|
||||
.expect("invalid title")
|
||||
/// Find the shortest path between two articles.
|
||||
#[derive(Debug, clap::Parser)]
|
||||
pub struct Cmd {
|
||||
start: String,
|
||||
goal: String,
|
||||
}
|
||||
|
||||
pub fn resolve_redirects(pages: &[PageInfo], graph: &Graph, mut page: NodeIdx) -> NodeIdx {
|
||||
loop {
|
||||
if pages[page.usize()].redirect {
|
||||
if let Some(next) = graph.edges_for(page).first() {
|
||||
page = *next;
|
||||
continue;
|
||||
impl Cmd {
|
||||
pub fn run(self, data: &Path) -> io::Result<()> {
|
||||
let normalizer = TitleNormalizer::new();
|
||||
|
||||
println!(">> Import");
|
||||
let (pages, _links, graph) = data::read_from_file(data)?;
|
||||
|
||||
println!(">> Resolve articles");
|
||||
let start = util::resolve_title(&normalizer, &pages, &graph, &self.start);
|
||||
let goal = util::resolve_title(&normalizer, &pages, &graph, &self.goal);
|
||||
println!("Start: {}", pages[start.usize()].title);
|
||||
println!("Goal: {}", pages[goal.usize()].title);
|
||||
|
||||
println!(">> Find path");
|
||||
println!("> Preparing dijkstra");
|
||||
let mut dijkstra = Dijkstra::new(&graph);
|
||||
println!("> Running dijkstra");
|
||||
dijkstra.run(
|
||||
start,
|
||||
|node| node == goal,
|
||||
|source, _edge, _target| !pages[source.usize()].redirect as u32,
|
||||
);
|
||||
|
||||
if dijkstra.cost(goal) == u32::MAX {
|
||||
println!("No path found");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
println!("> Collecting path");
|
||||
let path = dijkstra.path(goal);
|
||||
let cost = dijkstra.cost(goal);
|
||||
|
||||
println!();
|
||||
println!("Path found (cost {cost}, length {}):", path.len());
|
||||
for page in path {
|
||||
let info = &pages[page.usize()];
|
||||
if info.redirect {
|
||||
println!("v {:?}", info.title);
|
||||
} else {
|
||||
println!("- {:?}", info.title);
|
||||
}
|
||||
}
|
||||
|
||||
return page;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn path(datafile: &Path, start: &str, goal: &str) -> io::Result<()> {
|
||||
println!(">> Import");
|
||||
let mut databuf = BufReader::new(File::open(datafile)?);
|
||||
let (pages, _links, graph) = store::read_graph(&mut databuf)?;
|
||||
|
||||
println!(">> Locate from and to");
|
||||
let start = resolve_redirects(&pages, &graph, find_index_of_title(&pages, start));
|
||||
let goal = resolve_redirects(&pages, &graph, find_index_of_title(&pages, goal));
|
||||
println!("Start: {:?}", pages[start.usize()].title);
|
||||
println!("Goal: {:?}", pages[goal.usize()].title);
|
||||
|
||||
println!(">> Find path");
|
||||
println!("> Preparing dijkstra");
|
||||
let mut dijkstra = Dijkstra::new(&graph);
|
||||
println!("> Running dijkstra");
|
||||
dijkstra.run(
|
||||
start,
|
||||
|node| node == goal,
|
||||
|source, _edge, _target| !pages[source.usize()].redirect as u32,
|
||||
);
|
||||
|
||||
if dijkstra.cost(goal) == u32::MAX {
|
||||
println!("No path found");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
println!("> Collecting path");
|
||||
let path = dijkstra.path(goal);
|
||||
let cost = dijkstra.cost(goal);
|
||||
println!("Path found (cost {cost}, length {}):", path.len());
|
||||
for page in path {
|
||||
let info = &pages[page.usize()];
|
||||
if info.redirect {
|
||||
println!(" v {:?}", info.title);
|
||||
} else {
|
||||
println!(" - {:?}", info.title);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ use clap::Parser;
|
|||
#[derive(Debug, Parser)]
|
||||
enum Command {
|
||||
Ingest(commands::ingest::Cmd),
|
||||
Path(commands::path::Cmd),
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
|
|
@ -24,5 +25,6 @@ fn main() -> io::Result<()> {
|
|||
let args = Args::parse();
|
||||
match args.command {
|
||||
Command::Ingest(cmd) => cmd.run(&args.datafile),
|
||||
Command::Path(cmd) => cmd.run(&args.datafile),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,8 +1,13 @@
|
|||
use std::{fmt, iter, time::Instant};
|
||||
use std::{fmt, iter, thread::panicking, time::Instant};
|
||||
|
||||
use regex::Regex;
|
||||
use thousands::Separable;
|
||||
|
||||
use crate::{
|
||||
data::Page,
|
||||
graph::{Graph, NodeIdx},
|
||||
};
|
||||
|
||||
pub struct Counter {
|
||||
n: usize,
|
||||
last_print: Instant,
|
||||
|
|
@ -145,27 +150,34 @@ impl TitleNormalizer {
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
pub fn find_index_of_title(pages: &[Page<PageInfo>], title: &str) -> u32 {
|
||||
let title = normalize_link(title);
|
||||
pub fn locate_title(normalizer: &TitleNormalizer, pages: &[Page], title: &str) -> NodeIdx {
|
||||
let normalized = normalizer.normalize(title);
|
||||
pages
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find(|(_, p)| normalize_link(&p.data.title) == title)
|
||||
.map(|(i, _)| i)
|
||||
.expect("invalid title") as u32
|
||||
.find(|(_, p)| normalizer.normalize(&p.title) == normalized)
|
||||
.map(|(i, _)| NodeIdx::new(i))
|
||||
.expect("invalid title")
|
||||
}
|
||||
|
||||
pub fn resolve_redirects(data: &AdjacencyList<PageInfo, LinkInfo>, mut page_idx: u32) -> u32 {
|
||||
pub fn resolve_redirects(pages: &[Page], graph: &Graph, mut page: NodeIdx) -> NodeIdx {
|
||||
loop {
|
||||
if data.page(page_idx).data.redirect {
|
||||
if let Some(link_idx) = data.link_redirect(page_idx) {
|
||||
page_idx = data.link(link_idx).to;
|
||||
if pages[page.usize()].redirect {
|
||||
if let Some(target) = graph.edges_for(page).first() {
|
||||
page = *target;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return page_idx;
|
||||
return page;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
pub fn resolve_title(
|
||||
normalizer: &TitleNormalizer,
|
||||
pages: &[Page],
|
||||
graph: &Graph,
|
||||
title: &str,
|
||||
) -> NodeIdx {
|
||||
resolve_redirects(pages, graph, locate_title(normalizer, pages, title))
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue