Port path command

This commit is contained in:
Joscha 2024-12-31 02:48:28 +01:00
parent abd6b3519c
commit 4e41084f2a
4 changed files with 77 additions and 78 deletions

View file

@ -1 +1,2 @@
pub mod ingest;
pub mod path;

View file

@ -1,49 +1,30 @@
use std::{
fs::File,
io::{self, BufReader},
path::Path,
};
use std::{io, path::Path};
use crate::{
algo::Dijkstra,
data::{info::PageInfo, store},
graph::{Graph, NodeIdx},
util,
data,
util::{self, TitleNormalizer},
};
pub fn find_index_of_title(pages: &[PageInfo], title: &str) -> NodeIdx {
let title = util::normalize_link(title);
pages
.iter()
.enumerate()
.find(|(_, p)| util::normalize_link(&p.title) == title)
.map(|(i, _)| NodeIdx::new(i))
.expect("invalid title")
/// Find the shortest path between two articles.
#[derive(Debug, clap::Parser)]
pub struct Cmd {
start: String,
goal: String,
}
pub fn resolve_redirects(pages: &[PageInfo], graph: &Graph, mut page: NodeIdx) -> NodeIdx {
loop {
if pages[page.usize()].redirect {
if let Some(next) = graph.edges_for(page).first() {
page = *next;
continue;
}
}
impl Cmd {
pub fn run(self, data: &Path) -> io::Result<()> {
let normalizer = TitleNormalizer::new();
return page;
}
}
pub fn path(datafile: &Path, start: &str, goal: &str) -> io::Result<()> {
println!(">> Import");
let mut databuf = BufReader::new(File::open(datafile)?);
let (pages, _links, graph) = store::read_graph(&mut databuf)?;
let (pages, _links, graph) = data::read_from_file(data)?;
println!(">> Locate from and to");
let start = resolve_redirects(&pages, &graph, find_index_of_title(&pages, start));
let goal = resolve_redirects(&pages, &graph, find_index_of_title(&pages, goal));
println!("Start: {:?}", pages[start.usize()].title);
println!("Goal: {:?}", pages[goal.usize()].title);
println!(">> Resolve articles");
let start = util::resolve_title(&normalizer, &pages, &graph, &self.start);
let goal = util::resolve_title(&normalizer, &pages, &graph, &self.goal);
println!("Start: {}", pages[start.usize()].title);
println!("Goal: {}", pages[goal.usize()].title);
println!(">> Find path");
println!("> Preparing dijkstra");
@ -63,6 +44,8 @@ pub fn path(datafile: &Path, start: &str, goal: &str) -> io::Result<()> {
println!("> Collecting path");
let path = dijkstra.path(goal);
let cost = dijkstra.cost(goal);
println!();
println!("Path found (cost {cost}, length {}):", path.len());
for page in path {
let info = &pages[page.usize()];
@ -75,3 +58,4 @@ pub fn path(datafile: &Path, start: &str, goal: &str) -> io::Result<()> {
Ok(())
}
}

View file

@ -11,6 +11,7 @@ use clap::Parser;
#[derive(Debug, Parser)]
enum Command {
Ingest(commands::ingest::Cmd),
Path(commands::path::Cmd),
}
#[derive(Debug, Parser)]
@ -24,5 +25,6 @@ fn main() -> io::Result<()> {
let args = Args::parse();
match args.command {
Command::Ingest(cmd) => cmd.run(&args.datafile),
Command::Path(cmd) => cmd.run(&args.datafile),
}
}

View file

@ -1,8 +1,13 @@
use std::{fmt, iter, time::Instant};
use std::{fmt, iter, thread::panicking, time::Instant};
use regex::Regex;
use thousands::Separable;
use crate::{
data::Page,
graph::{Graph, NodeIdx},
};
pub struct Counter {
n: usize,
last_print: Instant,
@ -145,27 +150,34 @@ impl TitleNormalizer {
}
}
/*
pub fn find_index_of_title(pages: &[Page<PageInfo>], title: &str) -> u32 {
let title = normalize_link(title);
pub fn locate_title(normalizer: &TitleNormalizer, pages: &[Page], title: &str) -> NodeIdx {
let normalized = normalizer.normalize(title);
pages
.iter()
.enumerate()
.find(|(_, p)| normalize_link(&p.data.title) == title)
.map(|(i, _)| i)
.expect("invalid title") as u32
.find(|(_, p)| normalizer.normalize(&p.title) == normalized)
.map(|(i, _)| NodeIdx::new(i))
.expect("invalid title")
}
pub fn resolve_redirects(data: &AdjacencyList<PageInfo, LinkInfo>, mut page_idx: u32) -> u32 {
pub fn resolve_redirects(pages: &[Page], graph: &Graph, mut page: NodeIdx) -> NodeIdx {
loop {
if data.page(page_idx).data.redirect {
if let Some(link_idx) = data.link_redirect(page_idx) {
page_idx = data.link(link_idx).to;
if pages[page.usize()].redirect {
if let Some(target) = graph.edges_for(page).first() {
page = *target;
continue;
}
}
return page_idx;
return page;
}
}
*/
pub fn resolve_title(
normalizer: &TitleNormalizer,
pages: &[Page],
graph: &Graph,
title: &str,
) -> NodeIdx {
resolve_redirects(pages, graph, locate_title(normalizer, pages, title))
}