Make stored data more compact

This commit is contained in:
Joscha 2022-10-01 01:46:42 +02:00
parent f6bcb39c52
commit 51096c99e1
3 changed files with 55 additions and 4 deletions

View file

@ -21,3 +21,49 @@ pub struct AdjacencyList {
pub pages: Vec<Page>, pub pages: Vec<Page>,
pub links: Vec<Link>, pub links: Vec<Link>,
} }
#[derive(Debug, Serialize, Deserialize)]
pub struct SlimAdjacencyList {
pages: Vec<(u32, u32, u16, String, bool)>,
links: Vec<(u32, u32, u32)>,
}
impl SlimAdjacencyList {
pub fn from_alist(alist: AdjacencyList) -> Self {
let pages = alist
.pages
.into_iter()
.map(|p| (p.link_idx, p.id, p.ns, p.title, p.redirect))
.collect();
let links = alist
.links
.into_iter()
.map(|l| (l.to, l.start, l.end))
.collect();
Self { pages, links }
}
pub fn to_alist(self) -> AdjacencyList {
let pages = self
.pages
.into_iter()
.map(|(link_idx, id, ns, title, redirect)| Page {
link_idx,
ns,
id,
title,
redirect,
})
.collect();
let links = self
.links
.into_iter()
.map(|(to, start, end)| Link { to, start, end })
.collect();
AdjacencyList { pages, links }
}
}

View file

@ -4,7 +4,7 @@ use std::io::{self, BufRead, BufReader};
use rustc_hash::FxHashMap; use rustc_hash::FxHashMap;
use serde::Deserialize; use serde::Deserialize;
use crate::data::{AdjacencyList, Link, Page}; use crate::data::{AdjacencyList, Link, Page, SlimAdjacencyList};
#[derive(Deserialize)] #[derive(Deserialize)]
struct JsonPage { struct JsonPage {
@ -179,7 +179,9 @@ pub fn ingest() -> io::Result<()> {
} }
eprintln!("EXPORT"); eprintln!("EXPORT");
ciborium::ser::into_writer(&second_stage, io::stdout()).unwrap(); let data = SlimAdjacencyList::from_alist(second_stage);
ciborium::ser::into_writer(&data, io::stdout()).unwrap();
// simd_json::to_writer(io::stdout(), &data).unwrap();
Ok(()) Ok(())
} }

View file

@ -1,10 +1,13 @@
use std::io::{self, BufReader}; use std::io::{self, BufReader};
use crate::data::AdjacencyList; use crate::data::SlimAdjacencyList;
pub fn test() -> io::Result<()> { pub fn test() -> io::Result<()> {
eprintln!("IMPORT"); eprintln!("IMPORT");
let data: AdjacencyList = ciborium::de::from_reader(BufReader::new(io::stdin())).unwrap(); let data: SlimAdjacencyList = ciborium::de::from_reader(BufReader::new(io::stdin())).unwrap();
// let data: SlimAdjacencyList =
// simd_json::serde::from_reader(BufReader::new(io::stdin())).unwrap();
let data = data.to_alist();
eprintln!("CONSISTENCY CHECK"); eprintln!("CONSISTENCY CHECK");
let range = 0..data.pages.len() as u32; let range = 0..data.pages.len() as u32;