Compare commits

...
Sign in to create a new pull request.

1 commit

Author SHA1 Message Date
d85b61d419 Add some old files 2025-08-28 18:55:37 +02:00
3 changed files with 68 additions and 7 deletions

View file

@ -1,15 +1,59 @@
use std::fs::File;
use std::collections::{HashMap, HashSet};
use std::fs::{self, File};
use std::io::{self, BufReader, BufWriter};
use std::path::Path;
use std::path::{Path, PathBuf};
use serde::Deserialize;
use crate::data::adjacency_list::AdjacencyList;
use crate::data::info::{LinkInfo, PageInfo};
use crate::data::store;
use crate::util;
#[derive(Deserialize)]
struct FilterFile {
title: String,
language: String,
}
fn filter_pages(
data: &AdjacencyList<PageInfo, LinkInfo>,
keep: HashSet<String>,
) -> AdjacencyList<PageInfo, LinkInfo> {
// Map from old to new indices. Only contains entries for pages to keep.
let mut index_map = HashMap::new();
for (page_idx, page) in data.pages() {
if keep.contains(&util::normalize_link(&page.data.title)) {
index_map.insert(page_idx, index_map.len() as u32);
}
}
// Create new adjacency list in a single pass
let mut result = AdjacencyList::default();
for (page_idx, page) in data.pages() {
let Some(new_idx) = index_map.get(&page_idx) else {
continue;
};
let actual_new_idx = result.push_page(page.data.clone());
assert!(*new_idx == actual_new_idx);
for (_, link) in data.links(page_idx) {
if let Some(to) = index_map.get(&link.to) {
result.push_link(*to, link.data);
}
}
}
result
}
pub fn reexport(
from: &Path,
to: &Path,
in_parens: Option<bool>,
in_structure: Option<bool>,
filter: Option<PathBuf>,
) -> io::Result<()> {
eprintln!(">> Import");
let mut from = BufReader::new(File::open(from)?);
@ -18,7 +62,7 @@ pub fn reexport(
eprintln!(">> Consistency check");
data.check_consistency();
if in_parens.is_some() || in_structure.is_some() {
if in_parens.is_some() || in_structure.is_some() || filter.is_some() {
eprintln!(">> Filtering");
let mut data2 = AdjacencyList::default();
@ -38,6 +82,18 @@ pub fn reexport(
}
data = data2;
if let Some(filter) = filter {
let filter = fs::read_to_string(filter)?;
let filter = serde_json::from_str::<Vec<FilterFile>>(&filter).unwrap();
let keep = filter
.into_iter()
.filter(|f| f.language == "en")
.map(|f| f.title)
.map(|t| util::normalize_link(&t))
.collect::<HashSet<_>>();
data = filter_pages(&data, keep);
}
}
eprintln!(">> Export");

View file

@ -49,15 +49,17 @@ impl<P, L> Default for AdjacencyList<P, L> {
}
impl<P, L> AdjacencyList<P, L> {
pub fn push_page(&mut self, data: P) {
pub fn push_page(&mut self, data: P) -> u32 {
self.pages.push(Page {
start: self.links.len() as u32,
data,
});
self.pages.len() as u32 - 1
}
pub fn push_link(&mut self, to: u32, data: L) {
self.links.push(Link { to, data })
pub fn push_link(&mut self, to: u32, data: L) -> u32 {
self.links.push(Link { to, data });
self.links.len() as u32 - 1
}
pub fn page(&self, page_idx: u32) -> &Page<P> {

View file

@ -26,6 +26,8 @@ enum Command {
in_parens: Option<bool>,
#[arg(long, short = 'S')]
in_structure: Option<bool>,
#[arg(long, short = 'F')]
filter: Option<PathBuf>,
},
/// Find a path from one article to another.
Path {
@ -61,7 +63,8 @@ fn main() -> io::Result<()> {
to,
in_parens,
in_structure,
} => commands::reexport::reexport(&args.datafile, &to, in_parens, in_structure),
filter,
} => commands::reexport::reexport(&args.datafile, &to, in_parens, in_structure, filter),
Command::Path { from, to, flip } => {
if flip {
commands::path::path(&args.datafile, &to, &from)