Add some old files

This commit is contained in:
Joscha 2025-08-28 18:55:37 +02:00
parent 27416cf782
commit d85b61d419
3 changed files with 68 additions and 7 deletions

View file

@ -1,15 +1,59 @@
use std::fs::File; use std::collections::{HashMap, HashSet};
use std::fs::{self, File};
use std::io::{self, BufReader, BufWriter}; use std::io::{self, BufReader, BufWriter};
use std::path::Path; use std::path::{Path, PathBuf};
use serde::Deserialize;
use crate::data::adjacency_list::AdjacencyList; use crate::data::adjacency_list::AdjacencyList;
use crate::data::info::{LinkInfo, PageInfo};
use crate::data::store; use crate::data::store;
use crate::util;
#[derive(Deserialize)]
struct FilterFile {
title: String,
language: String,
}
fn filter_pages(
data: &AdjacencyList<PageInfo, LinkInfo>,
keep: HashSet<String>,
) -> AdjacencyList<PageInfo, LinkInfo> {
// Map from old to new indices. Only contains entries for pages to keep.
let mut index_map = HashMap::new();
for (page_idx, page) in data.pages() {
if keep.contains(&util::normalize_link(&page.data.title)) {
index_map.insert(page_idx, index_map.len() as u32);
}
}
// Create new adjacency list in a single pass
let mut result = AdjacencyList::default();
for (page_idx, page) in data.pages() {
let Some(new_idx) = index_map.get(&page_idx) else {
continue;
};
let actual_new_idx = result.push_page(page.data.clone());
assert!(*new_idx == actual_new_idx);
for (_, link) in data.links(page_idx) {
if let Some(to) = index_map.get(&link.to) {
result.push_link(*to, link.data);
}
}
}
result
}
pub fn reexport( pub fn reexport(
from: &Path, from: &Path,
to: &Path, to: &Path,
in_parens: Option<bool>, in_parens: Option<bool>,
in_structure: Option<bool>, in_structure: Option<bool>,
filter: Option<PathBuf>,
) -> io::Result<()> { ) -> io::Result<()> {
eprintln!(">> Import"); eprintln!(">> Import");
let mut from = BufReader::new(File::open(from)?); let mut from = BufReader::new(File::open(from)?);
@ -18,7 +62,7 @@ pub fn reexport(
eprintln!(">> Consistency check"); eprintln!(">> Consistency check");
data.check_consistency(); data.check_consistency();
if in_parens.is_some() || in_structure.is_some() { if in_parens.is_some() || in_structure.is_some() || filter.is_some() {
eprintln!(">> Filtering"); eprintln!(">> Filtering");
let mut data2 = AdjacencyList::default(); let mut data2 = AdjacencyList::default();
@ -38,6 +82,18 @@ pub fn reexport(
} }
data = data2; data = data2;
if let Some(filter) = filter {
let filter = fs::read_to_string(filter)?;
let filter = serde_json::from_str::<Vec<FilterFile>>(&filter).unwrap();
let keep = filter
.into_iter()
.filter(|f| f.language == "en")
.map(|f| f.title)
.map(|t| util::normalize_link(&t))
.collect::<HashSet<_>>();
data = filter_pages(&data, keep);
}
} }
eprintln!(">> Export"); eprintln!(">> Export");

View file

@ -49,15 +49,17 @@ impl<P, L> Default for AdjacencyList<P, L> {
} }
impl<P, L> AdjacencyList<P, L> { impl<P, L> AdjacencyList<P, L> {
pub fn push_page(&mut self, data: P) { pub fn push_page(&mut self, data: P) -> u32 {
self.pages.push(Page { self.pages.push(Page {
start: self.links.len() as u32, start: self.links.len() as u32,
data, data,
}); });
self.pages.len() as u32 - 1
} }
pub fn push_link(&mut self, to: u32, data: L) { pub fn push_link(&mut self, to: u32, data: L) -> u32 {
self.links.push(Link { to, data }) self.links.push(Link { to, data });
self.links.len() as u32 - 1
} }
pub fn page(&self, page_idx: u32) -> &Page<P> { pub fn page(&self, page_idx: u32) -> &Page<P> {

View file

@ -26,6 +26,8 @@ enum Command {
in_parens: Option<bool>, in_parens: Option<bool>,
#[arg(long, short = 'S')] #[arg(long, short = 'S')]
in_structure: Option<bool>, in_structure: Option<bool>,
#[arg(long, short = 'F')]
filter: Option<PathBuf>,
}, },
/// Find a path from one article to another. /// Find a path from one article to another.
Path { Path {
@ -61,7 +63,8 @@ fn main() -> io::Result<()> {
to, to,
in_parens, in_parens,
in_structure, in_structure,
} => commands::reexport::reexport(&args.datafile, &to, in_parens, in_structure), filter,
} => commands::reexport::reexport(&args.datafile, &to, in_parens, in_structure, filter),
Command::Path { from, to, flip } => { Command::Path { from, to, flip } => {
if flip { if flip {
commands::path::path(&args.datafile, &to, &from) commands::path::path(&args.datafile, &to, &from)