Refactor export and add page length

This commit is contained in:
Joscha 2022-10-03 22:14:58 +02:00
parent d910047b48
commit f71092058b
3 changed files with 123 additions and 74 deletions

View file

@ -2,12 +2,89 @@ use std::io::{self, Read, Write};
use serde::{Deserialize, Serialize};
mod ioutil {
use std::io::{self, Read, Write};
pub fn write_u8<W: Write>(n: u8, to: &mut W) -> io::Result<()> {
to.write_all(&n.to_le_bytes())
}
pub fn read_u8<R: Read>(from: &mut R) -> io::Result<u8> {
let mut buf = [0_u8; 1];
from.read_exact(&mut buf)?;
Ok(u8::from_le_bytes(buf))
}
pub fn write_u16<W: Write>(n: u16, to: &mut W) -> io::Result<()> {
to.write_all(&n.to_le_bytes())
}
pub fn read_u16<R: Read>(from: &mut R) -> io::Result<u16> {
let mut buf = [0_u8; 2];
from.read_exact(&mut buf)?;
Ok(u16::from_le_bytes(buf))
}
pub fn write_u32<W: Write>(n: u32, to: &mut W) -> io::Result<()> {
to.write_all(&n.to_le_bytes())
}
pub fn read_u32<R: Read>(from: &mut R) -> io::Result<u32> {
let mut buf = [0_u8; 4];
from.read_exact(&mut buf)?;
Ok(u32::from_le_bytes(buf))
}
pub fn write_str<W: Write>(s: &str, to: &mut W) -> io::Result<()> {
assert!(s.len() <= u16::MAX as usize);
write_u16(s.len() as u16, to)?;
to.write_all(s.as_bytes())?;
Ok(())
}
pub fn read_str<R: Read>(from: &mut R) -> io::Result<String> {
let len = read_u16(from)? as usize;
let mut buf = vec![0_u8; len];
from.read_exact(&mut buf)?;
Ok(String::from_utf8(buf).unwrap())
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Page {
pub link_idx: u32,
pub id: u32,
pub title: String,
pub length: u32,
pub redirect: bool,
pub title: String,
}
impl Page {
pub fn write<W: Write>(&self, to: &mut W) -> io::Result<()> {
ioutil::write_u32(self.link_idx, to)?;
ioutil::write_u32(self.id, to)?;
ioutil::write_u32(self.length, to)?;
ioutil::write_u8(if self.redirect { 1 } else { 0 }, to)?;
ioutil::write_str(&self.title, to)?;
Ok(())
}
pub fn read<R: Read>(from: &mut R) -> io::Result<Self> {
let link_idx = ioutil::read_u32(from)?;
let id = ioutil::read_u32(from)?;
let length = ioutil::read_u32(from)?;
let redirect = ioutil::read_u8(from)? != 0;
let title = ioutil::read_str(from)?;
Ok(Self {
link_idx,
id,
length,
redirect,
title,
})
}
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
@ -17,6 +94,24 @@ pub struct Link {
pub end: u32,
}
impl Link {
pub fn write<W: Write>(&self, to: &mut W) -> io::Result<()> {
ioutil::write_u32(self.to, to)?;
ioutil::write_u32(self.start, to)?;
ioutil::write_u32(self.end, to)?;
Ok(())
}
pub fn read<R: Read>(from: &mut R) -> io::Result<Self> {
let to = ioutil::read_u32(from)?;
let start = ioutil::read_u32(from)?;
let end = ioutil::read_u32(from)?;
Ok(Self { to, start, end })
}
}
#[derive(Debug, Serialize, Deserialize)]
pub struct AdjacencyList {
pub pages: Vec<Page>,
@ -33,87 +128,35 @@ impl AdjacencyList {
}
}
pub fn write<W: Write>(&self, mut to: W) -> io::Result<()> {
let n_pages: u32 = self.pages.len() as u32;
to.write_all(&n_pages.to_le_bytes())?;
let n_links: u32 = self.links.len() as u32;
to.write_all(&n_links.to_le_bytes())?;
pub fn write<W: Write>(&self, to: &mut W) -> io::Result<()> {
ioutil::write_u32(self.pages.len() as u32, to)?;
ioutil::write_u32(self.links.len() as u32, to)?;
for page in &self.pages {
to.write_all(&page.link_idx.to_le_bytes())?;
to.write_all(&page.id.to_le_bytes())?;
to.write_all(&[if page.redirect { 1 } else { 0 }])?;
let title_len: u16 = page.title.len() as u16;
to.write_all(&title_len.to_le_bytes())?;
to.write_all(page.title.as_bytes())?;
page.write(to)?;
}
for link in &self.links {
to.write_all(&link.to.to_le_bytes())?;
to.write_all(&link.start.to_le_bytes())?;
to.write_all(&link.end.to_le_bytes())?;
link.write(to)?;
}
Ok(())
}
pub fn read<R: Read>(mut from: R) -> io::Result<Self> {
let mut result = Self {
pages: vec![],
links: vec![],
};
let mut u8_buf = [0_u8; 1];
let mut u16_buf = [0_u8; 2];
let mut u32_buf = [0_u8; 4];
from.read_exact(&mut u32_buf)?;
let n_pages = u32::from_le_bytes(u32_buf);
from.read_exact(&mut u32_buf)?;
let n_links = u32::from_le_bytes(u32_buf);
pub fn read<R: Read>(from: &mut R) -> io::Result<Self> {
let n_pages = ioutil::read_u32(from)?;
let n_links = ioutil::read_u32(from)?;
let mut pages = vec![];
for _ in 0..n_pages {
from.read_exact(&mut u32_buf)?;
let link_idx = u32::from_le_bytes(u32_buf);
from.read_exact(&mut u32_buf)?;
let id = u32::from_le_bytes(u32_buf);
from.read_exact(&mut u8_buf)?;
let redirect = u8_buf[0] != 0;
from.read_exact(&mut u16_buf)?;
let title_len = u16::from_le_bytes(u16_buf);
let mut title_bytes = vec![0_u8; title_len as usize];
from.read_exact(&mut title_bytes)?;
let title = String::from_utf8(title_bytes).unwrap();
let page = Page {
link_idx,
id,
title,
redirect,
};
result.pages.push(page);
pages.push(Page::read(from)?);
}
let mut links = vec![];
for _ in 0..n_links {
from.read_exact(&mut u32_buf)?;
let to = u32::from_le_bytes(u32_buf);
from.read_exact(&mut u32_buf)?;
let start = u32::from_le_bytes(u32_buf);
from.read_exact(&mut u32_buf)?;
let end = u32::from_le_bytes(u32_buf);
let link = Link { to, start, end };
result.links.push(link);
links.push(Link::read(from)?);
}
Ok(result)
Ok(Self { pages, links })
}
}