From 3a75089e5ab46e1759e254b358eae73f6080a029 Mon Sep 17 00:00:00 2001 From: Joscha Date: Fri, 21 Oct 2022 20:39:53 +0200 Subject: [PATCH] Make adjacency list extensible --- brood/src/commands/ingest.rs | 26 +++++++----- brood/src/data.rs | 78 ++++++++++++++++++++---------------- 2 files changed, 60 insertions(+), 44 deletions(-) diff --git a/brood/src/commands/ingest.rs b/brood/src/commands/ingest.rs index c9508a7..f14a20e 100644 --- a/brood/src/commands/ingest.rs +++ b/brood/src/commands/ingest.rs @@ -41,9 +41,9 @@ form a proper adjacency list. struct FirstStage { /// List with page info and index into [`Self::links`]. - pages: Vec, + pages: Vec>, /// List with link info and index into [`Self::titles`]. - links: Vec, + links: Vec>, /// List with titles. titles: Vec, /// Map from normalized title to index in [`Self::titles`]. @@ -80,11 +80,17 @@ impl FirstStage { length, redirect, title, + data: (), }); } fn insert_link(&mut self, to: u32, start: u32, end: u32) { - self.links.push(Link { to, start, end }); + self.links.push(Link { + to, + start, + end, + data: (), + }); } fn import_json_page(&mut self, page: JsonPage) { @@ -125,9 +131,9 @@ impl FirstStage { struct SecondStage { /// List with page info and index into [`Self::links`]. - pages: Vec, + pages: Vec>, /// List with link info and index into [`Self::pages`]. - links: Vec, + links: Vec>, /// Map from normalized title to index in [`Self::pages`]. pages_map: FxHashMap, } @@ -141,20 +147,20 @@ impl SecondStage { } } - fn initialize_pages_map(&mut self, pages: &[Page]) { + fn initialize_pages_map(&mut self, pages: &[Page<()>]) { for (idx, page) in pages.iter().enumerate() { let title = util::normalize_link(&page.title); self.pages_map.insert(title, idx as u32); } } - fn insert_page(&mut self, page: &Page) { + fn insert_page(&mut self, page: &Page<()>) { let mut page = page.clone(); page.link_idx = self.pages.len() as u32; self.pages.push(page); } - fn insert_link(&mut self, mut link: Link, titles: &[String]) { + fn insert_link(&mut self, mut link: Link<()>, titles: &[String]) { let title = &titles[link.to as usize]; if let Some(page_idx) = self.pages_map.get(title) { link.to = *page_idx; @@ -162,7 +168,7 @@ impl SecondStage { } } - fn finalize(&mut self, pages: &[Page]) { + fn finalize(&mut self, pages: &[Page<()>]) { self.insert_page(pages.last().unwrap()); } @@ -194,7 +200,7 @@ impl SecondStage { result } - fn into_adjacency_list(self) -> AdjacencyList { + fn into_adjacency_list(self) -> AdjacencyList<(), ()> { AdjacencyList { pages: self.pages, links: self.links, diff --git a/brood/src/data.rs b/brood/src/data.rs index 3c75122..d685029 100644 --- a/brood/src/data.rs +++ b/brood/src/data.rs @@ -51,15 +51,16 @@ mod ioutil { } #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Page { +pub struct Page

{ pub link_idx: u32, pub id: u32, pub length: u32, pub redirect: bool, pub title: String, + pub data: P, } -impl Page { +impl Page<()> { pub fn write(&self, to: &mut W) -> io::Result<()> { ioutil::write_u32(self.link_idx, to)?; ioutil::write_u32(self.id, to)?; @@ -83,18 +84,20 @@ impl Page { length, redirect, title, + data: (), }) } } #[derive(Debug, Clone, Copy, Serialize, Deserialize)] -pub struct Link { +pub struct Link { pub to: u32, pub start: u32, pub end: u32, + pub data: L, } -impl Link { +impl Link<()> { pub fn write(&self, to: &mut W) -> io::Result<()> { ioutil::write_u32(self.to, to)?; ioutil::write_u32(self.start, to)?; @@ -108,42 +111,22 @@ impl Link { let start = ioutil::read_u32(from)?; let end = ioutil::read_u32(from)?; - Ok(Self { to, start, end }) + Ok(Self { + to, + start, + end, + data: (), + }) } } #[derive(Debug, Serialize, Deserialize)] -pub struct AdjacencyList { - pub pages: Vec, - pub links: Vec, +pub struct AdjacencyList { + pub pages: Vec>, + pub links: Vec>, } -impl AdjacencyList { - pub fn check_consistency(&self) { - // Check that all types are large enough - assert!(self.pages.len() <= u32::MAX as usize, "pages len"); - assert!(self.links.len() <= u32::MAX as usize, "links len"); - for page in &self.pages { - assert!(page.link_idx <= u32::MAX as u32, "page link_idx"); - assert!(page.id <= u32::MAX as u32, "page id"); - assert!(page.length <= u32::MAX as u32, "page length"); - assert!(page.title.len() <= u8::MAX as usize, "page title len"); - } - for link in &self.links { - assert!(link.to <= u32::MAX as u32, "link to"); - assert!(link.start <= u32::MAX as u32, "link start"); - assert!(link.end <= u32::MAX as u32, "link end"); - } - - // Check that all links contain valid indices - let range = 0..self.pages.len() as u32; - for link in &self.links { - if !range.contains(&link.to) { - panic!("Invalid link detected!"); - } - } - } - +impl AdjacencyList<(), ()> { pub fn write(&self, to: &mut W) -> io::Result<()> { ioutil::write_u32(self.pages.len() as u32, to)?; ioutil::write_u32(self.links.len() as u32, to)?; @@ -176,3 +159,30 @@ impl AdjacencyList { Ok(Self { pages, links }) } } + +impl AdjacencyList { + pub fn check_consistency(&self) { + // Check that all types are large enough + assert!(self.pages.len() <= u32::MAX as usize, "pages len"); + assert!(self.links.len() <= u32::MAX as usize, "links len"); + for page in &self.pages { + assert!(page.link_idx <= u32::MAX as u32, "page link_idx"); + assert!(page.id <= u32::MAX as u32, "page id"); + assert!(page.length <= u32::MAX as u32, "page length"); + assert!(page.title.len() <= u8::MAX as usize, "page title len"); + } + for link in &self.links { + assert!(link.to <= u32::MAX as u32, "link to"); + assert!(link.start <= u32::MAX as u32, "link start"); + assert!(link.end <= u32::MAX as u32, "link end"); + } + + // Check that all links contain valid indices + let range = 0..self.pages.len() as u32; + for link in &self.links { + if !range.contains(&link.to) { + panic!("Invalid link detected!"); + } + } + } +}