diff --git a/brood/src/data/adjacency_list.rs b/brood/src/data/adjacency_list.rs deleted file mode 100644 index 2de1631..0000000 --- a/brood/src/data/adjacency_list.rs +++ /dev/null @@ -1,196 +0,0 @@ -use std::ops::Range; - -use super::info::{LinkInfo, PageInfo}; - -#[derive(Debug, Clone, Copy)] -pub struct Page

{ - /// Index of the first link belonging to this page. - pub start: u32, - pub data: P, -} - -impl

Page

{ - pub fn change_data(self, f: &impl Fn(P) -> P2) -> Page { - Page { - start: self.start, - data: f(self.data), - } - } -} - -#[derive(Debug, Clone, Copy)] -pub struct Link { - /// Index of the page this link points to. - pub to: u32, - pub data: L, -} - -impl Link { - pub fn change_data(self, f: &impl Fn(L) -> L2) -> Link { - Link { - to: self.to, - data: f(self.data), - } - } - - pub fn change_data_with_page(self, page: &P, f: &impl Fn(&P, L) -> L2) -> Link { - Link { - to: self.to, - data: f(page, self.data), - } - } -} - -pub struct AdjacencyList { - pub pages: Vec>, - pub links: Vec>, -} - -impl Default for AdjacencyList { - fn default() -> Self { - Self { - pages: vec![], - links: vec![], - } - } -} - -impl AdjacencyList { - pub fn push_page(&mut self, data: P) { - self.pages.push(Page { - start: self.links.len() as u32, - data, - }); - } - - pub fn push_link(&mut self, to: u32, data: L) { - self.links.push(Link { to, data }) - } - - pub fn page(&self, page_idx: u32) -> &Page

{ - &self.pages[page_idx as usize] - } - - pub fn page_mut(&mut self, page_idx: u32) -> &mut Page

{ - &mut self.pages[page_idx as usize] - } - - pub fn pages(&self) -> impl Iterator)> { - self.pages.iter().enumerate().map(|(i, p)| (i as u32, p)) - } - - pub fn link(&self, link_idx: u32) -> &Link { - &self.links[link_idx as usize] - } - - pub fn link_mut(&mut self, link_idx: u32) -> &mut Link { - &mut self.links[link_idx as usize] - } - - pub fn link_range(&self, page_idx: u32) -> Range { - let start_idx = self.pages[page_idx as usize].start; - let end_idx = match self.pages.get(page_idx as usize + 1) { - Some(page) => page.start, - None => self.links.len() as u32, - }; - start_idx..end_idx - } - - pub fn link_redirect(&self, page_idx: u32) -> Option { - let range = self.link_range(page_idx); - if range.is_empty() { - None - } else { - Some(range.start) - } - } - - pub fn links(&self, page_idx: u32) -> impl Iterator)> { - self.link_range(page_idx).map(|i| (i, self.link(i))) - } - - pub fn change_page_data(self, page_f: impl Fn(P) -> P2) -> AdjacencyList { - let pages = self - .pages - .into_iter() - .map(|p| p.change_data(&page_f)) - .collect::>(); - - AdjacencyList { - pages, - links: self.links, - } - } - - pub fn change_link_data(self, link_f: impl Fn(L) -> L2) -> AdjacencyList { - let links = self - .links - .into_iter() - .map(|l| l.change_data(&link_f)) - .collect::>(); - - AdjacencyList { - pages: self.pages, - links, - } - } - - pub fn change_link_data_with_page( - self, - link_f: impl Fn(&P, L) -> L2, - ) -> AdjacencyList { - let mut pages = self.pages.iter().peekable(); - let Some(mut cur_page) = pages.next() else { - // The list is empty, nothing to do - return AdjacencyList::default(); - }; - - let mut links = vec![]; - - for (i, link) in self.links.into_iter().enumerate() { - if let Some(page) = pages.peek() { - if i >= page.start as usize { - cur_page = page; - pages.next(); - } - } - - links.push(link.change_data_with_page(&cur_page.data, &link_f)); - } - - AdjacencyList { - pages: self.pages, - links, - } - } -} - -impl AdjacencyList { - pub fn check_consistency(&self) { - // Check that all types are large enough - assert!(self.pages.len() < u32::MAX as usize, "too many pages"); - assert!(self.links.len() < u32::MAX as usize, "too many links"); - for page in &self.pages { - assert!( - page.data.title.len() <= u8::MAX as usize, - "page title too long" - ); - } - - // Check that all links contain valid indices. Links must not link to - // the sentinel page. - let range = 0..self.pages.len() as u32; - for link in &self.links { - assert!(range.contains(&link.to), "invalid link"); - } - - // Check that all redirect pages have at most one link - for (page_idx, page) in self.pages.iter().enumerate() { - if page.data.redirect { - let range = self.link_range(page_idx as u32); - let amount = range.end - range.start; - assert!(amount <= 1, "too many redirect links"); - } - } - } -} diff --git a/brood/src/data/info.rs b/brood/src/data/info.rs deleted file mode 100644 index dad04d4..0000000 --- a/brood/src/data/info.rs +++ /dev/null @@ -1,24 +0,0 @@ -#[derive(Debug, Clone)] -pub struct PageInfo { - pub id: u32, - pub title: String, - pub length: u32, - pub redirect: bool, -} - -#[derive(Debug, Default, Clone, Copy)] -pub struct LinkInfo { - pub start: u32, - pub len: u32, - pub flags: u8, -} - -impl LinkInfo { - pub fn in_parens(self) -> bool { - self.flags & 0b1 != 0 - } - - pub fn in_structure(self) -> bool { - self.flags & 0b10 != 0 - } -} diff --git a/brood/src/data/store.rs b/brood/src/data/store.rs deleted file mode 100644 index 06a35eb..0000000 --- a/brood/src/data/store.rs +++ /dev/null @@ -1,160 +0,0 @@ -use std::io::{self, Read, Write}; - -use crate::graph::{EdgeIdx, Graph, NodeIdx}; - -use super::{ - adjacency_list::{AdjacencyList, Link, Page}, - info::{LinkInfo, PageInfo}, -}; - -fn write_u8(n: u8, to: &mut W) -> io::Result<()> { - to.write_all(&n.to_le_bytes()) -} - -fn read_u8(from: &mut R) -> io::Result { - let mut buf = [0_u8; 1]; - from.read_exact(&mut buf)?; - Ok(u8::from_le_bytes(buf)) -} - -fn write_u16(n: u16, to: &mut W) -> io::Result<()> { - to.write_all(&n.to_le_bytes()) -} - -fn read_u16(from: &mut R) -> io::Result { - let mut buf = [0_u8; 2]; - from.read_exact(&mut buf)?; - Ok(u16::from_le_bytes(buf)) -} - -fn write_u32(n: u32, to: &mut W) -> io::Result<()> { - to.write_all(&n.to_le_bytes()) -} - -fn read_u32(from: &mut R) -> io::Result { - let mut buf = [0_u8; 4]; - from.read_exact(&mut buf)?; - Ok(u32::from_le_bytes(buf)) -} - -fn write_str(s: &str, to: &mut W) -> io::Result<()> { - assert!(s.len() <= u16::MAX as usize); - write_u16(s.len() as u16, to)?; - to.write_all(s.as_bytes())?; - Ok(()) -} - -fn read_str(from: &mut R) -> io::Result { - let len = read_u16(from)? as usize; - let mut buf = vec![0_u8; len]; - from.read_exact(&mut buf)?; - Ok(String::from_utf8(buf).unwrap()) -} - -fn write_page(page: &Page, to: &mut W) -> io::Result<()> { - write_u32(page.start, to)?; - write_u32(page.data.id, to)?; - write_u32(page.data.length, to)?; - write_u8(if page.data.redirect { 1 } else { 0 }, to)?; - write_str(&page.data.title, to)?; - - Ok(()) -} - -pub fn read_page(from: &mut R) -> io::Result> { - let start_link_idx = read_u32(from)?; - let id = read_u32(from)?; - let length = read_u32(from)?; - let redirect = read_u8(from)? != 0; - let title = read_str(from)?; - - Ok(Page { - start: start_link_idx, - data: PageInfo { - id, - length, - redirect, - title, - }, - }) -} - -fn write_link(link: &Link, to: &mut W) -> io::Result<()> { - write_u32(link.to, to)?; - write_u32(link.data.start, to)?; - write_u32(link.data.len, to)?; - write_u8(link.data.flags, to)?; - - Ok(()) -} - -fn read_link(from: &mut R) -> io::Result> { - let to_page_idx = read_u32(from)?; - let start = read_u32(from)?; - let len = read_u32(from)?; - let flags = read_u8(from)?; - - Ok(Link { - to: to_page_idx, - data: LinkInfo { start, len, flags }, - }) -} - -pub fn write_adjacency_list( - al: &AdjacencyList, - to: &mut W, -) -> io::Result<()> { - write_u32(al.pages.len() as u32, to)?; - write_u32(al.links.len() as u32, to)?; - - for page in &al.pages { - write_page(page, to)?; - } - - for link in &al.links { - write_link(link, to)?; - } - - Ok(()) -} - -pub fn read_adjacency_list(from: &mut R) -> io::Result> { - let n_pages = read_u32(from)?; - let n_links = read_u32(from)?; - - let mut pages = vec![]; - for _ in 0..n_pages { - pages.push(read_page(from)?); - } - - let mut links = vec![]; - for _ in 0..n_links { - links.push(read_link(from)?); - } - - Ok(AdjacencyList { pages, links }) -} - -pub fn read_graph(from: &mut impl Read) -> io::Result<(Vec, Vec, Graph)> { - let n_pages = read_u32(from)?; - let n_links = read_u32(from)?; - - let mut pages = Vec::with_capacity(n_pages as usize); - let mut links = Vec::with_capacity(n_links as usize); - let mut graph = Graph::with_capacity(n_pages as usize, n_links as usize); - - for _ in 0..n_pages { - let page = read_page(from)?; - graph.nodes.push(EdgeIdx(page.start)); - pages.push(page.data); - } - - for _ in 0..n_links { - let link = read_link(from)?; - graph.edges.push(NodeIdx(link.to)); - links.push(link.data); - } - - graph.check_consistency(); - Ok((pages, links, graph)) -}