Port path command

This commit is contained in:
Joscha 2024-12-31 02:48:28 +01:00
parent abd6b3519c
commit 4e41084f2a
4 changed files with 77 additions and 78 deletions

View file

@ -1 +1,2 @@
pub mod ingest; pub mod ingest;
pub mod path;

View file

@ -1,49 +1,30 @@
use std::{ use std::{io, path::Path};
fs::File,
io::{self, BufReader},
path::Path,
};
use crate::{ use crate::{
algo::Dijkstra, algo::Dijkstra,
data::{info::PageInfo, store}, data,
graph::{Graph, NodeIdx}, util::{self, TitleNormalizer},
util,
}; };
pub fn find_index_of_title(pages: &[PageInfo], title: &str) -> NodeIdx { /// Find the shortest path between two articles.
let title = util::normalize_link(title); #[derive(Debug, clap::Parser)]
pages pub struct Cmd {
.iter() start: String,
.enumerate() goal: String,
.find(|(_, p)| util::normalize_link(&p.title) == title)
.map(|(i, _)| NodeIdx::new(i))
.expect("invalid title")
} }
pub fn resolve_redirects(pages: &[PageInfo], graph: &Graph, mut page: NodeIdx) -> NodeIdx { impl Cmd {
loop { pub fn run(self, data: &Path) -> io::Result<()> {
if pages[page.usize()].redirect { let normalizer = TitleNormalizer::new();
if let Some(next) = graph.edges_for(page).first() {
page = *next;
continue;
}
}
return page;
}
}
pub fn path(datafile: &Path, start: &str, goal: &str) -> io::Result<()> {
println!(">> Import"); println!(">> Import");
let mut databuf = BufReader::new(File::open(datafile)?); let (pages, _links, graph) = data::read_from_file(data)?;
let (pages, _links, graph) = store::read_graph(&mut databuf)?;
println!(">> Locate from and to"); println!(">> Resolve articles");
let start = resolve_redirects(&pages, &graph, find_index_of_title(&pages, start)); let start = util::resolve_title(&normalizer, &pages, &graph, &self.start);
let goal = resolve_redirects(&pages, &graph, find_index_of_title(&pages, goal)); let goal = util::resolve_title(&normalizer, &pages, &graph, &self.goal);
println!("Start: {:?}", pages[start.usize()].title); println!("Start: {}", pages[start.usize()].title);
println!("Goal: {:?}", pages[goal.usize()].title); println!("Goal: {}", pages[goal.usize()].title);
println!(">> Find path"); println!(">> Find path");
println!("> Preparing dijkstra"); println!("> Preparing dijkstra");
@ -63,15 +44,18 @@ pub fn path(datafile: &Path, start: &str, goal: &str) -> io::Result<()> {
println!("> Collecting path"); println!("> Collecting path");
let path = dijkstra.path(goal); let path = dijkstra.path(goal);
let cost = dijkstra.cost(goal); let cost = dijkstra.cost(goal);
println!();
println!("Path found (cost {cost}, length {}):", path.len()); println!("Path found (cost {cost}, length {}):", path.len());
for page in path { for page in path {
let info = &pages[page.usize()]; let info = &pages[page.usize()];
if info.redirect { if info.redirect {
println!(" v {:?}", info.title); println!("v {:?}", info.title);
} else { } else {
println!(" - {:?}", info.title); println!("- {:?}", info.title);
} }
} }
Ok(()) Ok(())
}
} }

View file

@ -11,6 +11,7 @@ use clap::Parser;
#[derive(Debug, Parser)] #[derive(Debug, Parser)]
enum Command { enum Command {
Ingest(commands::ingest::Cmd), Ingest(commands::ingest::Cmd),
Path(commands::path::Cmd),
} }
#[derive(Debug, Parser)] #[derive(Debug, Parser)]
@ -24,5 +25,6 @@ fn main() -> io::Result<()> {
let args = Args::parse(); let args = Args::parse();
match args.command { match args.command {
Command::Ingest(cmd) => cmd.run(&args.datafile), Command::Ingest(cmd) => cmd.run(&args.datafile),
Command::Path(cmd) => cmd.run(&args.datafile),
} }
} }

View file

@ -1,8 +1,13 @@
use std::{fmt, iter, time::Instant}; use std::{fmt, iter, thread::panicking, time::Instant};
use regex::Regex; use regex::Regex;
use thousands::Separable; use thousands::Separable;
use crate::{
data::Page,
graph::{Graph, NodeIdx},
};
pub struct Counter { pub struct Counter {
n: usize, n: usize,
last_print: Instant, last_print: Instant,
@ -145,27 +150,34 @@ impl TitleNormalizer {
} }
} }
/* pub fn locate_title(normalizer: &TitleNormalizer, pages: &[Page], title: &str) -> NodeIdx {
pub fn find_index_of_title(pages: &[Page<PageInfo>], title: &str) -> u32 { let normalized = normalizer.normalize(title);
let title = normalize_link(title);
pages pages
.iter() .iter()
.enumerate() .enumerate()
.find(|(_, p)| normalize_link(&p.data.title) == title) .find(|(_, p)| normalizer.normalize(&p.title) == normalized)
.map(|(i, _)| i) .map(|(i, _)| NodeIdx::new(i))
.expect("invalid title") as u32 .expect("invalid title")
} }
pub fn resolve_redirects(data: &AdjacencyList<PageInfo, LinkInfo>, mut page_idx: u32) -> u32 { pub fn resolve_redirects(pages: &[Page], graph: &Graph, mut page: NodeIdx) -> NodeIdx {
loop { loop {
if data.page(page_idx).data.redirect { if pages[page.usize()].redirect {
if let Some(link_idx) = data.link_redirect(page_idx) { if let Some(target) = graph.edges_for(page).first() {
page_idx = data.link(link_idx).to; page = *target;
continue; continue;
} }
} }
return page_idx; return page;
} }
} }
*/
pub fn resolve_title(
normalizer: &TitleNormalizer,
pages: &[Page],
graph: &Graph,
title: &str,
) -> NodeIdx {
resolve_redirects(pages, graph, locate_title(normalizer, pages, title))
}