use std::{ fs::File, io::{self, BufReader, BufWriter, Read, Write}, path::Path, }; use crate::graph::{EdgeIdx, Graph, NodeIdx}; #[derive(Debug, Clone)] pub struct Page { pub id: u32, pub title: String, pub length: u32, pub redirect: bool, } #[derive(Debug, Default, Clone, Copy)] pub struct Link { pub start: u32, pub len: u32, pub flags: u8, } impl Link { pub fn in_parens(self) -> bool { self.flags & 0b1 != 0 } pub fn in_structure(self) -> bool { self.flags & 0b10 != 0 } } fn write_u8(w: &mut impl Write, n: u8) -> io::Result<()> { w.write_all(&n.to_le_bytes()) } fn read_u8(r: &mut impl Read) -> io::Result { let mut buf = [0_u8; 1]; r.read_exact(&mut buf)?; Ok(u8::from_le_bytes(buf)) } fn write_u16(w: &mut impl Write, n: u16) -> io::Result<()> { w.write_all(&n.to_le_bytes()) } fn read_u16(r: &mut impl Read) -> io::Result { let mut buf = [0_u8; 2]; r.read_exact(&mut buf)?; Ok(u16::from_le_bytes(buf)) } fn write_u32(w: &mut impl Write, n: u32) -> io::Result<()> { w.write_all(&n.to_le_bytes()) } fn read_u32(r: &mut impl Read) -> io::Result { let mut buf = [0_u8; 4]; r.read_exact(&mut buf)?; Ok(u32::from_le_bytes(buf)) } fn write_str(w: &mut impl Write, s: &str) -> io::Result<()> { assert!(s.len() <= u16::MAX as usize); write_u16(w, s.len() as u16)?; w.write_all(s.as_bytes())?; Ok(()) } fn read_str(r: &mut impl Read) -> io::Result { let len = read_u16(r)? as usize; let mut buf = vec![0_u8; len]; r.read_exact(&mut buf)?; Ok(String::from_utf8(buf).unwrap()) } fn write_page(w: &mut impl Write, page: &Page) -> io::Result<()> { write_u32(w, page.id)?; write_u32(w, page.length)?; write_u8(w, if page.redirect { 1 } else { 0 })?; write_str(w, &page.title)?; Ok(()) } pub fn read_page(r: &mut impl Read) -> io::Result { Ok(Page { id: read_u32(r)?, length: read_u32(r)?, redirect: read_u8(r)? != 0, title: read_str(r)?, }) } fn write_link(w: &mut impl Write, link: &Link) -> io::Result<()> { write_u32(w, link.start)?; write_u32(w, link.len)?; write_u8(w, link.flags)?; Ok(()) } fn read_link(r: &mut impl Read) -> io::Result { Ok(Link { start: read_u32(r)?, len: read_u32(r)?, flags: read_u8(r)?, }) } #[derive(Default)] pub struct Data { pub pages: Vec, pub links: Vec, pub graph: Graph, } impl Data { pub fn new() -> Self { Self::default() } pub fn with_capacity(pages: usize, links: usize) -> Self { Self { pages: Vec::with_capacity(pages), links: Vec::with_capacity(links), graph: Graph::with_capacity(pages, links), } } fn write(&self, w: &mut impl Write) -> io::Result<()> { assert!(self.pages.len() < u32::MAX as usize); assert!(self.links.len() < u32::MAX as usize); assert_eq!(self.pages.len(), self.graph.nodes.len()); assert_eq!(self.links.len(), self.graph.edges.len()); write_u32(w, self.pages.len() as u32)?; write_u32(w, self.links.len() as u32)?; for page in &self.pages { write_page(w, page)?; } for link in &self.links { write_link(w, link)?; } for node in &self.graph.nodes { write_u32(w, node.0)?; } for edge in &self.graph.edges { write_u32(w, edge.0)?; } Ok(()) } fn read(r: &mut impl Read) -> io::Result { let n_pages = read_u32(r)?; let n_links = read_u32(r)?; let mut result = Self::with_capacity(n_pages as usize, n_links as usize); for _ in 0..n_pages { result.pages.push(read_page(r)?); } for _ in 0..n_links { result.links.push(read_link(r)?); } for _ in 0..n_pages { result.graph.nodes.push(EdgeIdx(read_u32(r)?)); } for _ in 0..n_links { result.graph.edges.push(NodeIdx(read_u32(r)?)); } assert_eq!(result.pages.len(), result.graph.nodes.len()); assert_eq!(result.links.len(), result.graph.edges.len()); result.graph.check_consistency(); Ok(result) } pub fn write_to_file(&self, path: &Path) -> io::Result<()> { let mut file = BufWriter::new(File::create(path)?); self.write(&mut file) } pub fn read_from_file(path: &Path) -> io::Result { let mut file = BufReader::new(File::open(path)?); Self::read(&mut file) } pub fn check_consistency(&self) { assert_eq!( self.pages.len(), self.graph.nodes.len(), "inconsistent number of pages" ); assert_eq!( self.links.len(), self.graph.edges.len(), "inconsistent number of links" ); self.graph.check_consistency(); } pub fn redirect_target(&self, node: NodeIdx) -> Option { if !self.pages[node.usize()].redirect { return None; } self.graph.edge_slice(node).first().copied() } }