diff --git a/brood/src/data.rs b/brood/src/data.rs index afa904f..560f383 100644 --- a/brood/src/data.rs +++ b/brood/src/data.rs @@ -21,3 +21,49 @@ pub struct AdjacencyList { pub pages: Vec, pub links: Vec, } + +#[derive(Debug, Serialize, Deserialize)] +pub struct SlimAdjacencyList { + pages: Vec<(u32, u32, u16, String, bool)>, + links: Vec<(u32, u32, u32)>, +} + +impl SlimAdjacencyList { + pub fn from_alist(alist: AdjacencyList) -> Self { + let pages = alist + .pages + .into_iter() + .map(|p| (p.link_idx, p.id, p.ns, p.title, p.redirect)) + .collect(); + + let links = alist + .links + .into_iter() + .map(|l| (l.to, l.start, l.end)) + .collect(); + + Self { pages, links } + } + + pub fn to_alist(self) -> AdjacencyList { + let pages = self + .pages + .into_iter() + .map(|(link_idx, id, ns, title, redirect)| Page { + link_idx, + ns, + id, + title, + redirect, + }) + .collect(); + + let links = self + .links + .into_iter() + .map(|(to, start, end)| Link { to, start, end }) + .collect(); + + AdjacencyList { pages, links } + } +} diff --git a/brood/src/ingest.rs b/brood/src/ingest.rs index b31e872..d084a91 100644 --- a/brood/src/ingest.rs +++ b/brood/src/ingest.rs @@ -4,7 +4,7 @@ use std::io::{self, BufRead, BufReader}; use rustc_hash::FxHashMap; use serde::Deserialize; -use crate::data::{AdjacencyList, Link, Page}; +use crate::data::{AdjacencyList, Link, Page, SlimAdjacencyList}; #[derive(Deserialize)] struct JsonPage { @@ -179,7 +179,9 @@ pub fn ingest() -> io::Result<()> { } eprintln!("EXPORT"); - ciborium::ser::into_writer(&second_stage, io::stdout()).unwrap(); + let data = SlimAdjacencyList::from_alist(second_stage); + ciborium::ser::into_writer(&data, io::stdout()).unwrap(); + // simd_json::to_writer(io::stdout(), &data).unwrap(); Ok(()) } diff --git a/brood/src/test.rs b/brood/src/test.rs index bdb46a1..5ebeabf 100644 --- a/brood/src/test.rs +++ b/brood/src/test.rs @@ -1,10 +1,13 @@ use std::io::{self, BufReader}; -use crate::data::AdjacencyList; +use crate::data::SlimAdjacencyList; pub fn test() -> io::Result<()> { eprintln!("IMPORT"); - let data: AdjacencyList = ciborium::de::from_reader(BufReader::new(io::stdin())).unwrap(); + let data: SlimAdjacencyList = ciborium::de::from_reader(BufReader::new(io::stdin())).unwrap(); + // let data: SlimAdjacencyList = + // simd_json::serde::from_reader(BufReader::new(io::stdin())).unwrap(); + let data = data.to_alist(); eprintln!("CONSISTENCY CHECK"); let range = 0..data.pages.len() as u32;