diff --git a/brood/src/commands/ingest.rs b/brood/src/commands/ingest.rs index cf8e60d..c4c82d5 100644 --- a/brood/src/commands/ingest.rs +++ b/brood/src/commands/ingest.rs @@ -6,7 +6,7 @@ use std::path::Path; use rustc_hash::FxHashMap; use serde::Deserialize; -use crate::data::{AdjacencyList, Link, Page}; +use crate::data::{AdjacencyList, Link, LinkInfo, Page, PageInfo}; use crate::util; #[derive(Deserialize)] @@ -61,7 +61,7 @@ impl Titles { } } -fn first_stage() -> io::Result<(AdjacencyList<(), ()>, Titles)> { +fn first_stage() -> io::Result<(AdjacencyList, Titles)> { let mut titles = Titles::default(); let mut result = AdjacencyList::default(); @@ -71,20 +71,19 @@ fn first_stage() -> io::Result<(AdjacencyList<(), ()>, Titles)> { result.pages.push(Page { link_idx: result.links.len() as u32, - id: json_page.id, - length: json_page.length, - redirect: json_page.redirect.is_some(), - title: json_page.title, - data: (), + data: PageInfo { + id: json_page.id, + length: json_page.length, + redirect: json_page.redirect.is_some(), + title: json_page.title, + }, }); for (to, start, end) in json_page.links { let to = titles.insert(util::normalize_link(&to)); result.links.push(Link { to, - start, - end, - data: (), + data: LinkInfo { start, end }, }); } @@ -100,23 +99,29 @@ fn first_stage() -> io::Result<(AdjacencyList<(), ()>, Titles)> { result.pages.push(Page { link_idx: result.links.len() as u32, - id: 0, - length: 0, - redirect: false, - title: "Sentinel page at the end of all pages, Q2AKO3OYzyitmCJURghJ".to_string(), - data: (), + data: PageInfo { + id: 0, + length: 0, + redirect: false, + title: "Sentinel page at the end of all pages, Q2AKO3OYzyitmCJURghJ".to_string(), + }, }); Ok((result, titles)) } /// Create map from normalized title to index in pages. -fn initialize_pages_map(pages: &[Page<()>]) -> FxHashMap { +fn initialize_pages_map(pages: &[Page]) -> FxHashMap { let mut result = FxHashMap::default(); for (i, p) in pages.iter().enumerate() { - match result.entry(util::normalize_link(&p.title)) { + match result.entry(util::normalize_link(&p.data.title)) { Entry::Occupied(entry) => { - eprintln!("{:?} already exists at index {}", p.title, entry.get()); + eprintln!( + "{:?} already exists at index {} as {:?}", + p.data.title, + entry.get(), + util::normalize_link(&p.data.title) + ); } Entry::Vacant(entry) => { entry.insert(i as u32); @@ -126,7 +131,10 @@ fn initialize_pages_map(pages: &[Page<()>]) -> FxHashMap { result } -fn second_stage(first_stage: &AdjacencyList<(), ()>, titles: &Titles) -> AdjacencyList<(), ()> { +fn second_stage( + first_stage: &AdjacencyList, + titles: &Titles, +) -> AdjacencyList { let pages_map = initialize_pages_map(&first_stage.pages); let mut result = AdjacencyList::default(); diff --git a/brood/src/commands/path.rs b/brood/src/commands/path.rs index d536108..e137d4b 100644 --- a/brood/src/commands/path.rs +++ b/brood/src/commands/path.rs @@ -8,7 +8,7 @@ use crate::util; pub fn path(datafile: &Path, from: &str, to: &str) -> io::Result<()> { eprintln!(">> Import"); let mut databuf = BufReader::new(File::open(datafile)?); - let mut data = AdjacencyList::read(&mut databuf)?.change_page_data(f32::INFINITY); + let data = AdjacencyList::read(&mut databuf)?; eprintln!(">> Locate from and to"); let from = util::normalize_link(from); @@ -17,15 +17,15 @@ pub fn path(datafile: &Path, from: &str, to: &str) -> io::Result<()> { .pages .iter() .enumerate() - .filter(|(_, p)| !p.redirect) - .find(|(_, p)| util::normalize_link(&p.title) == from) + .filter(|(_, p)| !p.data.redirect) + .find(|(_, p)| util::normalize_link(&p.data.title) == from) .unwrap_or_else(|| panic!("no article called {from}")); let (to_i, to_p) = data .pages .iter() .enumerate() - .filter(|(_, p)| !p.redirect) - .find(|(_, p)| util::normalize_link(&p.title) == to) + .filter(|(_, p)| !p.data.redirect) + .find(|(_, p)| util::normalize_link(&p.data.title) == to) .unwrap_or_else(|| panic!("no article called {to}")); dbg!(from_i, from_p, to_i, to_p); diff --git a/brood/src/data.rs b/brood/src/data.rs index 294d0dd..0520d54 100644 --- a/brood/src/data.rs +++ b/brood/src/data.rs @@ -1,7 +1,5 @@ use std::io::{self, Read, Write}; -use serde::{Deserialize, Serialize}; - mod ioutil { use std::io::{self, Read, Write}; @@ -50,23 +48,27 @@ mod ioutil { } } -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Page

{ - pub link_idx: u32, +#[derive(Debug, Clone)] +pub struct PageInfo { pub id: u32, pub length: u32, pub redirect: bool, pub title: String, +} + +#[derive(Debug, Clone, Copy)] +pub struct Page

{ + pub link_idx: u32, pub data: P, } -impl Page<()> { +impl Page { pub fn write(&self, to: &mut W) -> io::Result<()> { ioutil::write_u32(self.link_idx, to)?; - ioutil::write_u32(self.id, to)?; - ioutil::write_u32(self.length, to)?; - ioutil::write_u8(if self.redirect { 1 } else { 0 }, to)?; - ioutil::write_str(&self.title, to)?; + ioutil::write_u32(self.data.id, to)?; + ioutil::write_u32(self.data.length, to)?; + ioutil::write_u8(if self.data.redirect { 1 } else { 0 }, to)?; + ioutil::write_str(&self.data.title, to)?; Ok(()) } @@ -80,41 +82,42 @@ impl Page<()> { Ok(Self { link_idx, - id, - length, - redirect, - title, - data: (), + data: PageInfo { + id, + length, + redirect, + title, + }, }) } } impl

Page

{ - pub fn change_data(self, data: P2) -> Page { + pub fn change_data(self, f: &impl Fn(P) -> P2) -> Page { Page { link_idx: self.link_idx, - id: self.id, - length: self.length, - redirect: self.redirect, - title: self.title, - data, + data: f(self.data), } } } -#[derive(Debug, Clone, Copy, Serialize, Deserialize)] -pub struct Link { - pub to: u32, +#[derive(Debug, Clone, Copy)] +pub struct LinkInfo { pub start: u32, pub end: u32, +} + +#[derive(Debug, Clone, Copy)] +pub struct Link { + pub to: u32, pub data: L, } -impl Link<()> { +impl Link { pub fn write(&self, to: &mut W) -> io::Result<()> { ioutil::write_u32(self.to, to)?; - ioutil::write_u32(self.start, to)?; - ioutil::write_u32(self.end, to)?; + ioutil::write_u32(self.data.start, to)?; + ioutil::write_u32(self.data.end, to)?; Ok(()) } @@ -126,31 +129,35 @@ impl Link<()> { Ok(Self { to, - start, - end, - data: (), + data: LinkInfo { start, end }, }) } } -impl

Link

{ - pub fn change_data(self, data: P2) -> Link { +impl Link { + pub fn change_data(self, f: &impl Fn(L) -> L2) -> Link { Link { to: self.to, - start: self.start, - end: self.end, - data, + data: f(self.data), } } } -#[derive(Debug, Default, Serialize, Deserialize)] pub struct AdjacencyList { pub pages: Vec>, pub links: Vec>, } -impl AdjacencyList<(), ()> { +impl Default for AdjacencyList { + fn default() -> Self { + Self { + pages: Default::default(), + links: Default::default(), + } + } +} + +impl AdjacencyList { pub fn write(&self, to: &mut W) -> io::Result<()> { ioutil::write_u32(self.pages.len() as u32, to)?; ioutil::write_u32(self.links.len() as u32, to)?; @@ -182,23 +189,21 @@ impl AdjacencyList<(), ()> { Ok(Self { pages, links }) } -} -impl AdjacencyList { pub fn check_consistency(&self) { // Check that all types are large enough assert!(self.pages.len() <= u32::MAX as usize, "pages len"); assert!(self.links.len() <= u32::MAX as usize, "links len"); for page in &self.pages { assert!(page.link_idx <= u32::MAX as u32, "page link_idx"); - assert!(page.id <= u32::MAX as u32, "page id"); - assert!(page.length <= u32::MAX as u32, "page length"); - assert!(page.title.len() <= u8::MAX as usize, "page title len"); + assert!(page.data.id <= u32::MAX as u32, "page id"); + assert!(page.data.length <= u32::MAX as u32, "page length"); + assert!(page.data.title.len() <= u8::MAX as usize, "page title len"); } for link in &self.links { assert!(link.to <= u32::MAX as u32, "link to"); - assert!(link.start <= u32::MAX as u32, "link start"); - assert!(link.end <= u32::MAX as u32, "link end"); + assert!(link.data.start <= u32::MAX as u32, "link start"); + assert!(link.data.end <= u32::MAX as u32, "link end"); } // Check that all links contain valid indices @@ -209,12 +214,14 @@ impl AdjacencyList { } } } +} - pub fn change_page_data(self, data: P2) -> AdjacencyList { +impl AdjacencyList { + pub fn change_page_data(self, page_f: &impl Fn(P) -> P2) -> AdjacencyList { let pages = self .pages .into_iter() - .map(|p| p.change_data(data.clone())) + .map(|p| p.change_data(page_f)) .collect::>(); AdjacencyList { @@ -223,11 +230,11 @@ impl AdjacencyList { } } - pub fn change_link_data(self, data: L2) -> AdjacencyList { + pub fn change_link_data(self, link_f: &impl Fn(L) -> L2) -> AdjacencyList { let links = self .links .into_iter() - .map(|l| l.change_data(data.clone())) + .map(|l| l.change_data(link_f)) .collect::>(); AdjacencyList {