Add some old files
This commit is contained in:
parent
27416cf782
commit
d85b61d419
3 changed files with 68 additions and 7 deletions
|
|
@ -1,15 +1,59 @@
|
||||||
use std::fs::File;
|
use std::collections::{HashMap, HashSet};
|
||||||
|
use std::fs::{self, File};
|
||||||
use std::io::{self, BufReader, BufWriter};
|
use std::io::{self, BufReader, BufWriter};
|
||||||
use std::path::Path;
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
|
use serde::Deserialize;
|
||||||
|
|
||||||
use crate::data::adjacency_list::AdjacencyList;
|
use crate::data::adjacency_list::AdjacencyList;
|
||||||
|
use crate::data::info::{LinkInfo, PageInfo};
|
||||||
use crate::data::store;
|
use crate::data::store;
|
||||||
|
use crate::util;
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct FilterFile {
|
||||||
|
title: String,
|
||||||
|
language: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn filter_pages(
|
||||||
|
data: &AdjacencyList<PageInfo, LinkInfo>,
|
||||||
|
keep: HashSet<String>,
|
||||||
|
) -> AdjacencyList<PageInfo, LinkInfo> {
|
||||||
|
// Map from old to new indices. Only contains entries for pages to keep.
|
||||||
|
let mut index_map = HashMap::new();
|
||||||
|
for (page_idx, page) in data.pages() {
|
||||||
|
if keep.contains(&util::normalize_link(&page.data.title)) {
|
||||||
|
index_map.insert(page_idx, index_map.len() as u32);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create new adjacency list in a single pass
|
||||||
|
let mut result = AdjacencyList::default();
|
||||||
|
for (page_idx, page) in data.pages() {
|
||||||
|
let Some(new_idx) = index_map.get(&page_idx) else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
|
||||||
|
let actual_new_idx = result.push_page(page.data.clone());
|
||||||
|
assert!(*new_idx == actual_new_idx);
|
||||||
|
|
||||||
|
for (_, link) in data.links(page_idx) {
|
||||||
|
if let Some(to) = index_map.get(&link.to) {
|
||||||
|
result.push_link(*to, link.data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
pub fn reexport(
|
pub fn reexport(
|
||||||
from: &Path,
|
from: &Path,
|
||||||
to: &Path,
|
to: &Path,
|
||||||
in_parens: Option<bool>,
|
in_parens: Option<bool>,
|
||||||
in_structure: Option<bool>,
|
in_structure: Option<bool>,
|
||||||
|
filter: Option<PathBuf>,
|
||||||
) -> io::Result<()> {
|
) -> io::Result<()> {
|
||||||
eprintln!(">> Import");
|
eprintln!(">> Import");
|
||||||
let mut from = BufReader::new(File::open(from)?);
|
let mut from = BufReader::new(File::open(from)?);
|
||||||
|
|
@ -18,7 +62,7 @@ pub fn reexport(
|
||||||
eprintln!(">> Consistency check");
|
eprintln!(">> Consistency check");
|
||||||
data.check_consistency();
|
data.check_consistency();
|
||||||
|
|
||||||
if in_parens.is_some() || in_structure.is_some() {
|
if in_parens.is_some() || in_structure.is_some() || filter.is_some() {
|
||||||
eprintln!(">> Filtering");
|
eprintln!(">> Filtering");
|
||||||
|
|
||||||
let mut data2 = AdjacencyList::default();
|
let mut data2 = AdjacencyList::default();
|
||||||
|
|
@ -38,6 +82,18 @@ pub fn reexport(
|
||||||
}
|
}
|
||||||
|
|
||||||
data = data2;
|
data = data2;
|
||||||
|
|
||||||
|
if let Some(filter) = filter {
|
||||||
|
let filter = fs::read_to_string(filter)?;
|
||||||
|
let filter = serde_json::from_str::<Vec<FilterFile>>(&filter).unwrap();
|
||||||
|
let keep = filter
|
||||||
|
.into_iter()
|
||||||
|
.filter(|f| f.language == "en")
|
||||||
|
.map(|f| f.title)
|
||||||
|
.map(|t| util::normalize_link(&t))
|
||||||
|
.collect::<HashSet<_>>();
|
||||||
|
data = filter_pages(&data, keep);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
eprintln!(">> Export");
|
eprintln!(">> Export");
|
||||||
|
|
|
||||||
|
|
@ -49,15 +49,17 @@ impl<P, L> Default for AdjacencyList<P, L> {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<P, L> AdjacencyList<P, L> {
|
impl<P, L> AdjacencyList<P, L> {
|
||||||
pub fn push_page(&mut self, data: P) {
|
pub fn push_page(&mut self, data: P) -> u32 {
|
||||||
self.pages.push(Page {
|
self.pages.push(Page {
|
||||||
start: self.links.len() as u32,
|
start: self.links.len() as u32,
|
||||||
data,
|
data,
|
||||||
});
|
});
|
||||||
|
self.pages.len() as u32 - 1
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn push_link(&mut self, to: u32, data: L) {
|
pub fn push_link(&mut self, to: u32, data: L) -> u32 {
|
||||||
self.links.push(Link { to, data })
|
self.links.push(Link { to, data });
|
||||||
|
self.links.len() as u32 - 1
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn page(&self, page_idx: u32) -> &Page<P> {
|
pub fn page(&self, page_idx: u32) -> &Page<P> {
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,8 @@ enum Command {
|
||||||
in_parens: Option<bool>,
|
in_parens: Option<bool>,
|
||||||
#[arg(long, short = 'S')]
|
#[arg(long, short = 'S')]
|
||||||
in_structure: Option<bool>,
|
in_structure: Option<bool>,
|
||||||
|
#[arg(long, short = 'F')]
|
||||||
|
filter: Option<PathBuf>,
|
||||||
},
|
},
|
||||||
/// Find a path from one article to another.
|
/// Find a path from one article to another.
|
||||||
Path {
|
Path {
|
||||||
|
|
@ -61,7 +63,8 @@ fn main() -> io::Result<()> {
|
||||||
to,
|
to,
|
||||||
in_parens,
|
in_parens,
|
||||||
in_structure,
|
in_structure,
|
||||||
} => commands::reexport::reexport(&args.datafile, &to, in_parens, in_structure),
|
filter,
|
||||||
|
} => commands::reexport::reexport(&args.datafile, &to, in_parens, in_structure, filter),
|
||||||
Command::Path { from, to, flip } => {
|
Command::Path { from, to, flip } => {
|
||||||
if flip {
|
if flip {
|
||||||
commands::path::path(&args.datafile, &to, &from)
|
commands::path::path(&args.datafile, &to, &from)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue