Make stored data more compact
This commit is contained in:
parent
f6bcb39c52
commit
51096c99e1
3 changed files with 55 additions and 4 deletions
|
|
@ -21,3 +21,49 @@ pub struct AdjacencyList {
|
||||||
pub pages: Vec<Page>,
|
pub pages: Vec<Page>,
|
||||||
pub links: Vec<Link>,
|
pub links: Vec<Link>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
|
pub struct SlimAdjacencyList {
|
||||||
|
pages: Vec<(u32, u32, u16, String, bool)>,
|
||||||
|
links: Vec<(u32, u32, u32)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SlimAdjacencyList {
|
||||||
|
pub fn from_alist(alist: AdjacencyList) -> Self {
|
||||||
|
let pages = alist
|
||||||
|
.pages
|
||||||
|
.into_iter()
|
||||||
|
.map(|p| (p.link_idx, p.id, p.ns, p.title, p.redirect))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let links = alist
|
||||||
|
.links
|
||||||
|
.into_iter()
|
||||||
|
.map(|l| (l.to, l.start, l.end))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
Self { pages, links }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn to_alist(self) -> AdjacencyList {
|
||||||
|
let pages = self
|
||||||
|
.pages
|
||||||
|
.into_iter()
|
||||||
|
.map(|(link_idx, id, ns, title, redirect)| Page {
|
||||||
|
link_idx,
|
||||||
|
ns,
|
||||||
|
id,
|
||||||
|
title,
|
||||||
|
redirect,
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let links = self
|
||||||
|
.links
|
||||||
|
.into_iter()
|
||||||
|
.map(|(to, start, end)| Link { to, start, end })
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
AdjacencyList { pages, links }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@ use std::io::{self, BufRead, BufReader};
|
||||||
use rustc_hash::FxHashMap;
|
use rustc_hash::FxHashMap;
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
|
|
||||||
use crate::data::{AdjacencyList, Link, Page};
|
use crate::data::{AdjacencyList, Link, Page, SlimAdjacencyList};
|
||||||
|
|
||||||
#[derive(Deserialize)]
|
#[derive(Deserialize)]
|
||||||
struct JsonPage {
|
struct JsonPage {
|
||||||
|
|
@ -179,7 +179,9 @@ pub fn ingest() -> io::Result<()> {
|
||||||
}
|
}
|
||||||
|
|
||||||
eprintln!("EXPORT");
|
eprintln!("EXPORT");
|
||||||
ciborium::ser::into_writer(&second_stage, io::stdout()).unwrap();
|
let data = SlimAdjacencyList::from_alist(second_stage);
|
||||||
|
ciborium::ser::into_writer(&data, io::stdout()).unwrap();
|
||||||
|
// simd_json::to_writer(io::stdout(), &data).unwrap();
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,13 @@
|
||||||
use std::io::{self, BufReader};
|
use std::io::{self, BufReader};
|
||||||
|
|
||||||
use crate::data::AdjacencyList;
|
use crate::data::SlimAdjacencyList;
|
||||||
|
|
||||||
pub fn test() -> io::Result<()> {
|
pub fn test() -> io::Result<()> {
|
||||||
eprintln!("IMPORT");
|
eprintln!("IMPORT");
|
||||||
let data: AdjacencyList = ciborium::de::from_reader(BufReader::new(io::stdin())).unwrap();
|
let data: SlimAdjacencyList = ciborium::de::from_reader(BufReader::new(io::stdin())).unwrap();
|
||||||
|
// let data: SlimAdjacencyList =
|
||||||
|
// simd_json::serde::from_reader(BufReader::new(io::stdin())).unwrap();
|
||||||
|
let data = data.to_alist();
|
||||||
|
|
||||||
eprintln!("CONSISTENCY CHECK");
|
eprintln!("CONSISTENCY CHECK");
|
||||||
let range = 0..data.pages.len() as u32;
|
let range = 0..data.pages.len() as u32;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue