Allow transforming graph before commands
This commit is contained in:
parent
ab7b7295ca
commit
c573f1b0b0
8 changed files with 134 additions and 17 deletions
|
|
@ -1,3 +1,4 @@
|
|||
mod dijkstra;
|
||||
mod edit;
|
||||
|
||||
pub use self::dijkstra::*;
|
||||
pub use self::{dijkstra::*, edit::*};
|
||||
|
|
|
|||
74
brood/src/algo/edit.rs
Normal file
74
brood/src/algo/edit.rs
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
use std::mem;
|
||||
|
||||
use crate::{
|
||||
data::{Data, Link},
|
||||
graph::NodeIdx,
|
||||
util,
|
||||
};
|
||||
|
||||
pub fn retain_edges(data: &mut Data, f: impl Fn(&Link) -> bool) {
|
||||
let mut links = mem::take(&mut data.links).into_iter();
|
||||
let graph = mem::take(&mut data.graph);
|
||||
|
||||
for node in graph.nodes() {
|
||||
data.graph.add_node();
|
||||
|
||||
for edge in graph.edge_slice(node) {
|
||||
let link = links.next().unwrap();
|
||||
if f(&link) {
|
||||
data.links.push(link);
|
||||
data.graph.add_edge(*edge);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn resolve_redirects(data: &mut Data) {
|
||||
// Permutation from input node to input node
|
||||
let mut perm_redirect = vec![NodeIdx::NONE; data.pages.len()];
|
||||
for node in data.graph.nodes() {
|
||||
perm_redirect[node.usize()] = util::resolve_redirects(data, node);
|
||||
}
|
||||
|
||||
// Permutation from input node to final node
|
||||
let mut perm_retain = vec![NodeIdx::NONE; data.pages.len()];
|
||||
let mut perm_retain_count = NodeIdx(0);
|
||||
for (i, page) in data.pages.iter().enumerate() {
|
||||
if !page.redirect {
|
||||
perm_retain[i] = perm_retain_count;
|
||||
perm_retain_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
let mut pages = mem::take(&mut data.pages).into_iter();
|
||||
let mut links = mem::take(&mut data.links).into_iter();
|
||||
let graph = mem::take(&mut data.graph);
|
||||
|
||||
for node in graph.nodes() {
|
||||
let page = pages.next().unwrap();
|
||||
let new_node = perm_retain[node.usize()];
|
||||
|
||||
if new_node == NodeIdx::NONE {
|
||||
// Skip all edges
|
||||
for _ in graph.edge_slice(node) {
|
||||
links.next().unwrap();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
data.pages.push(page);
|
||||
data.graph.add_node();
|
||||
|
||||
for edge in graph.edge_slice(node) {
|
||||
let link = links.next().unwrap();
|
||||
let new_edge = perm_retain[perm_redirect[edge.usize()].usize()];
|
||||
|
||||
if new_edge == NodeIdx::NONE {
|
||||
continue;
|
||||
}
|
||||
|
||||
data.links.push(link);
|
||||
data.graph.add_edge(new_edge);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -121,7 +121,7 @@ fn read_page_data(
|
|||
|
||||
for (target, start, len, flags) in page_links {
|
||||
if let Some((_, brood_i)) = title_lookup.get(&normalizer.normalize(&target)) {
|
||||
data.graph.edges.push(NodeIdx(*brood_i));
|
||||
data.graph.add_edge(NodeIdx(*brood_i));
|
||||
data.links.push(Link { start, len, flags });
|
||||
}
|
||||
}
|
||||
|
|
@ -139,7 +139,7 @@ pub struct Cmd {
|
|||
}
|
||||
|
||||
impl Cmd {
|
||||
pub fn run(self, brood_data: &Path) -> io::Result<()> {
|
||||
pub fn run(&self, brood_data: &Path) -> io::Result<()> {
|
||||
let normalizer = TitleNormalizer::new();
|
||||
|
||||
println!(">> First pass");
|
||||
|
|
@ -162,7 +162,7 @@ impl Cmd {
|
|||
drop(sift_data); // No longer needed
|
||||
|
||||
println!("> Checking consistency");
|
||||
data.graph.check_consistency();
|
||||
data.check_consistency();
|
||||
|
||||
println!(">> Export");
|
||||
println!(
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
use std::{io, path::Path};
|
||||
use std::io;
|
||||
|
||||
use crate::{
|
||||
algo::Dijkstra,
|
||||
|
|
@ -14,12 +14,9 @@ pub struct Cmd {
|
|||
}
|
||||
|
||||
impl Cmd {
|
||||
pub fn run(self, data: &Path) -> io::Result<()> {
|
||||
pub fn run(self, data: Data) -> io::Result<()> {
|
||||
let normalizer = TitleNormalizer::new();
|
||||
|
||||
println!(">> Import");
|
||||
let data = Data::read_from_file(data)?;
|
||||
|
||||
println!(">> Resolve articles");
|
||||
let start = util::resolve_title(&normalizer, &data, &self.start);
|
||||
let goal = util::resolve_title(&normalizer, &data, &self.goal);
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
use std::{collections::HashSet, io, path::Path};
|
||||
use std::{collections::HashSet, io};
|
||||
|
||||
use thousands::Separable;
|
||||
|
||||
|
|
@ -18,12 +18,9 @@ pub struct Cmd {
|
|||
}
|
||||
|
||||
impl Cmd {
|
||||
pub fn run(self, data: &Path) -> io::Result<()> {
|
||||
pub fn run(self, data: Data) -> io::Result<()> {
|
||||
let normalizer = TitleNormalizer::new();
|
||||
|
||||
println!(">> Import");
|
||||
let data = Data::read_from_file(data)?;
|
||||
|
||||
println!(">> Locate article");
|
||||
let mut node = util::locate_title(&normalizer, &data, &self.title);
|
||||
|
||||
|
|
|
|||
|
|
@ -192,6 +192,22 @@ impl Data {
|
|||
Self::read(&mut file)
|
||||
}
|
||||
|
||||
pub fn check_consistency(&self) {
|
||||
assert_eq!(
|
||||
self.pages.len(),
|
||||
self.graph.nodes.len(),
|
||||
"inconsistent number of pages"
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
self.links.len(),
|
||||
self.graph.edges.len(),
|
||||
"inconsistent number of links"
|
||||
);
|
||||
|
||||
self.graph.check_consistency();
|
||||
}
|
||||
|
||||
pub fn redirect_target(&self, node: NodeIdx) -> Option<NodeIdx> {
|
||||
if !self.pages[node.usize()].redirect {
|
||||
return None;
|
||||
|
|
|
|||
|
|
@ -194,6 +194,10 @@ impl Graph {
|
|||
self.nodes.push(EdgeIdx::new(self.edges.len()));
|
||||
}
|
||||
|
||||
pub fn add_edge(&mut self, target: NodeIdx) {
|
||||
self.edges.push(target);
|
||||
}
|
||||
|
||||
pub fn check_consistency(&self) {
|
||||
if self.nodes.is_empty() {
|
||||
assert!(self.edges.is_empty(), "edges must belong to existing nodes");
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ mod util;
|
|||
use std::{io, path::PathBuf};
|
||||
|
||||
use clap::Parser;
|
||||
use data::Data;
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
enum Command {
|
||||
|
|
@ -20,13 +21,40 @@ struct Args {
|
|||
datafile: PathBuf,
|
||||
#[command(subcommand)]
|
||||
command: Command,
|
||||
#[arg(long, short = 'P')]
|
||||
in_parens: Option<bool>,
|
||||
#[arg(long, short = 'S')]
|
||||
in_structure: Option<bool>,
|
||||
#[arg(long, short = 'R')]
|
||||
resolve_redirects: bool,
|
||||
}
|
||||
|
||||
fn main() -> io::Result<()> {
|
||||
let args = Args::parse();
|
||||
|
||||
if let Command::Ingest(cmd) = &args.command {
|
||||
return cmd.run(&args.datafile);
|
||||
}
|
||||
|
||||
println!(">> Import");
|
||||
let mut data = Data::read_from_file(&args.datafile)?;
|
||||
|
||||
if args.in_parens.is_some() || args.in_structure.is_some() {
|
||||
println!("> Filtering edges");
|
||||
algo::retain_edges(&mut data, |link| {
|
||||
args.in_parens.is_none_or(|b| b == link.in_parens())
|
||||
&& args.in_structure.is_none_or(|b| b == link.in_structure())
|
||||
});
|
||||
}
|
||||
|
||||
if args.resolve_redirects {
|
||||
println!("> Resolving redirects");
|
||||
algo::resolve_redirects(&mut data);
|
||||
}
|
||||
|
||||
match args.command {
|
||||
Command::Ingest(cmd) => cmd.run(&args.datafile),
|
||||
Command::Show(cmd) => cmd.run(&args.datafile),
|
||||
Command::Path(cmd) => cmd.run(&args.datafile),
|
||||
Command::Ingest(_) => unreachable!(),
|
||||
Command::Show(cmd) => cmd.run(data),
|
||||
Command::Path(cmd) => cmd.run(data),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue