Port path command
This commit is contained in:
parent
abd6b3519c
commit
4e41084f2a
4 changed files with 77 additions and 78 deletions
|
|
@ -1 +1,2 @@
|
||||||
pub mod ingest;
|
pub mod ingest;
|
||||||
|
pub mod path;
|
||||||
|
|
|
||||||
|
|
@ -1,49 +1,30 @@
|
||||||
use std::{
|
use std::{io, path::Path};
|
||||||
fs::File,
|
|
||||||
io::{self, BufReader},
|
|
||||||
path::Path,
|
|
||||||
};
|
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
algo::Dijkstra,
|
algo::Dijkstra,
|
||||||
data::{info::PageInfo, store},
|
data,
|
||||||
graph::{Graph, NodeIdx},
|
util::{self, TitleNormalizer},
|
||||||
util,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
pub fn find_index_of_title(pages: &[PageInfo], title: &str) -> NodeIdx {
|
/// Find the shortest path between two articles.
|
||||||
let title = util::normalize_link(title);
|
#[derive(Debug, clap::Parser)]
|
||||||
pages
|
pub struct Cmd {
|
||||||
.iter()
|
start: String,
|
||||||
.enumerate()
|
goal: String,
|
||||||
.find(|(_, p)| util::normalize_link(&p.title) == title)
|
|
||||||
.map(|(i, _)| NodeIdx::new(i))
|
|
||||||
.expect("invalid title")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn resolve_redirects(pages: &[PageInfo], graph: &Graph, mut page: NodeIdx) -> NodeIdx {
|
impl Cmd {
|
||||||
loop {
|
pub fn run(self, data: &Path) -> io::Result<()> {
|
||||||
if pages[page.usize()].redirect {
|
let normalizer = TitleNormalizer::new();
|
||||||
if let Some(next) = graph.edges_for(page).first() {
|
|
||||||
page = *next;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return page;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn path(datafile: &Path, start: &str, goal: &str) -> io::Result<()> {
|
|
||||||
println!(">> Import");
|
println!(">> Import");
|
||||||
let mut databuf = BufReader::new(File::open(datafile)?);
|
let (pages, _links, graph) = data::read_from_file(data)?;
|
||||||
let (pages, _links, graph) = store::read_graph(&mut databuf)?;
|
|
||||||
|
|
||||||
println!(">> Locate from and to");
|
println!(">> Resolve articles");
|
||||||
let start = resolve_redirects(&pages, &graph, find_index_of_title(&pages, start));
|
let start = util::resolve_title(&normalizer, &pages, &graph, &self.start);
|
||||||
let goal = resolve_redirects(&pages, &graph, find_index_of_title(&pages, goal));
|
let goal = util::resolve_title(&normalizer, &pages, &graph, &self.goal);
|
||||||
println!("Start: {:?}", pages[start.usize()].title);
|
println!("Start: {}", pages[start.usize()].title);
|
||||||
println!("Goal: {:?}", pages[goal.usize()].title);
|
println!("Goal: {}", pages[goal.usize()].title);
|
||||||
|
|
||||||
println!(">> Find path");
|
println!(">> Find path");
|
||||||
println!("> Preparing dijkstra");
|
println!("> Preparing dijkstra");
|
||||||
|
|
@ -63,6 +44,8 @@ pub fn path(datafile: &Path, start: &str, goal: &str) -> io::Result<()> {
|
||||||
println!("> Collecting path");
|
println!("> Collecting path");
|
||||||
let path = dijkstra.path(goal);
|
let path = dijkstra.path(goal);
|
||||||
let cost = dijkstra.cost(goal);
|
let cost = dijkstra.cost(goal);
|
||||||
|
|
||||||
|
println!();
|
||||||
println!("Path found (cost {cost}, length {}):", path.len());
|
println!("Path found (cost {cost}, length {}):", path.len());
|
||||||
for page in path {
|
for page in path {
|
||||||
let info = &pages[page.usize()];
|
let info = &pages[page.usize()];
|
||||||
|
|
@ -75,3 +58,4 @@ pub fn path(datafile: &Path, start: &str, goal: &str) -> io::Result<()> {
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,7 @@ use clap::Parser;
|
||||||
#[derive(Debug, Parser)]
|
#[derive(Debug, Parser)]
|
||||||
enum Command {
|
enum Command {
|
||||||
Ingest(commands::ingest::Cmd),
|
Ingest(commands::ingest::Cmd),
|
||||||
|
Path(commands::path::Cmd),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Parser)]
|
#[derive(Debug, Parser)]
|
||||||
|
|
@ -24,5 +25,6 @@ fn main() -> io::Result<()> {
|
||||||
let args = Args::parse();
|
let args = Args::parse();
|
||||||
match args.command {
|
match args.command {
|
||||||
Command::Ingest(cmd) => cmd.run(&args.datafile),
|
Command::Ingest(cmd) => cmd.run(&args.datafile),
|
||||||
|
Command::Path(cmd) => cmd.run(&args.datafile),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,13 @@
|
||||||
use std::{fmt, iter, time::Instant};
|
use std::{fmt, iter, thread::panicking, time::Instant};
|
||||||
|
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use thousands::Separable;
|
use thousands::Separable;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
data::Page,
|
||||||
|
graph::{Graph, NodeIdx},
|
||||||
|
};
|
||||||
|
|
||||||
pub struct Counter {
|
pub struct Counter {
|
||||||
n: usize,
|
n: usize,
|
||||||
last_print: Instant,
|
last_print: Instant,
|
||||||
|
|
@ -145,27 +150,34 @@ impl TitleNormalizer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
pub fn locate_title(normalizer: &TitleNormalizer, pages: &[Page], title: &str) -> NodeIdx {
|
||||||
pub fn find_index_of_title(pages: &[Page<PageInfo>], title: &str) -> u32 {
|
let normalized = normalizer.normalize(title);
|
||||||
let title = normalize_link(title);
|
|
||||||
pages
|
pages
|
||||||
.iter()
|
.iter()
|
||||||
.enumerate()
|
.enumerate()
|
||||||
.find(|(_, p)| normalize_link(&p.data.title) == title)
|
.find(|(_, p)| normalizer.normalize(&p.title) == normalized)
|
||||||
.map(|(i, _)| i)
|
.map(|(i, _)| NodeIdx::new(i))
|
||||||
.expect("invalid title") as u32
|
.expect("invalid title")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn resolve_redirects(data: &AdjacencyList<PageInfo, LinkInfo>, mut page_idx: u32) -> u32 {
|
pub fn resolve_redirects(pages: &[Page], graph: &Graph, mut page: NodeIdx) -> NodeIdx {
|
||||||
loop {
|
loop {
|
||||||
if data.page(page_idx).data.redirect {
|
if pages[page.usize()].redirect {
|
||||||
if let Some(link_idx) = data.link_redirect(page_idx) {
|
if let Some(target) = graph.edges_for(page).first() {
|
||||||
page_idx = data.link(link_idx).to;
|
page = *target;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return page_idx;
|
return page;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
pub fn resolve_title(
|
||||||
|
normalizer: &TitleNormalizer,
|
||||||
|
pages: &[Page],
|
||||||
|
graph: &Graph,
|
||||||
|
title: &str,
|
||||||
|
) -> NodeIdx {
|
||||||
|
resolve_redirects(pages, graph, locate_title(normalizer, pages, title))
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue