Make stored data more compact
This commit is contained in:
parent
f6bcb39c52
commit
51096c99e1
3 changed files with 55 additions and 4 deletions
|
|
@ -21,3 +21,49 @@ pub struct AdjacencyList {
|
|||
pub pages: Vec<Page>,
|
||||
pub links: Vec<Link>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct SlimAdjacencyList {
|
||||
pages: Vec<(u32, u32, u16, String, bool)>,
|
||||
links: Vec<(u32, u32, u32)>,
|
||||
}
|
||||
|
||||
impl SlimAdjacencyList {
|
||||
pub fn from_alist(alist: AdjacencyList) -> Self {
|
||||
let pages = alist
|
||||
.pages
|
||||
.into_iter()
|
||||
.map(|p| (p.link_idx, p.id, p.ns, p.title, p.redirect))
|
||||
.collect();
|
||||
|
||||
let links = alist
|
||||
.links
|
||||
.into_iter()
|
||||
.map(|l| (l.to, l.start, l.end))
|
||||
.collect();
|
||||
|
||||
Self { pages, links }
|
||||
}
|
||||
|
||||
pub fn to_alist(self) -> AdjacencyList {
|
||||
let pages = self
|
||||
.pages
|
||||
.into_iter()
|
||||
.map(|(link_idx, id, ns, title, redirect)| Page {
|
||||
link_idx,
|
||||
ns,
|
||||
id,
|
||||
title,
|
||||
redirect,
|
||||
})
|
||||
.collect();
|
||||
|
||||
let links = self
|
||||
.links
|
||||
.into_iter()
|
||||
.map(|(to, start, end)| Link { to, start, end })
|
||||
.collect();
|
||||
|
||||
AdjacencyList { pages, links }
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ use std::io::{self, BufRead, BufReader};
|
|||
use rustc_hash::FxHashMap;
|
||||
use serde::Deserialize;
|
||||
|
||||
use crate::data::{AdjacencyList, Link, Page};
|
||||
use crate::data::{AdjacencyList, Link, Page, SlimAdjacencyList};
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct JsonPage {
|
||||
|
|
@ -179,7 +179,9 @@ pub fn ingest() -> io::Result<()> {
|
|||
}
|
||||
|
||||
eprintln!("EXPORT");
|
||||
ciborium::ser::into_writer(&second_stage, io::stdout()).unwrap();
|
||||
let data = SlimAdjacencyList::from_alist(second_stage);
|
||||
ciborium::ser::into_writer(&data, io::stdout()).unwrap();
|
||||
// simd_json::to_writer(io::stdout(), &data).unwrap();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,10 +1,13 @@
|
|||
use std::io::{self, BufReader};
|
||||
|
||||
use crate::data::AdjacencyList;
|
||||
use crate::data::SlimAdjacencyList;
|
||||
|
||||
pub fn test() -> io::Result<()> {
|
||||
eprintln!("IMPORT");
|
||||
let data: AdjacencyList = ciborium::de::from_reader(BufReader::new(io::stdin())).unwrap();
|
||||
let data: SlimAdjacencyList = ciborium::de::from_reader(BufReader::new(io::stdin())).unwrap();
|
||||
// let data: SlimAdjacencyList =
|
||||
// simd_json::serde::from_reader(BufReader::new(io::stdin())).unwrap();
|
||||
let data = data.to_alist();
|
||||
|
||||
eprintln!("CONSISTENCY CHECK");
|
||||
let range = 0..data.pages.len() as u32;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue