Refactor export and add page length

This commit is contained in:
Joscha 2022-10-03 22:14:58 +02:00
parent d910047b48
commit f71092058b
3 changed files with 123 additions and 74 deletions

View file

@ -72,13 +72,14 @@ impl FirstStage {
} }
} }
fn insert_page(&mut self, id: u32, title: String, redirect: bool) { fn insert_page(&mut self, id: u32, length: u32, redirect: bool, title: String) {
let link_idx = self.pages.len() as u32; let link_idx = self.pages.len() as u32;
self.pages.push(Page { self.pages.push(Page {
link_idx, link_idx,
id, id,
title, length,
redirect, redirect,
title,
}); });
} }
@ -87,7 +88,7 @@ impl FirstStage {
} }
fn import_json_page(&mut self, page: JsonPage) { fn import_json_page(&mut self, page: JsonPage) {
self.insert_page(page.id, page.title, page.redirect.is_some()); self.insert_page(page.id, page.length, page.redirect.is_some(), page.title);
for (to, start, end) in page.links { for (to, start, end) in page.links {
let to = self.insert_title(util::normalize_link(&to)); let to = self.insert_title(util::normalize_link(&to));
self.insert_link(to, start, end); self.insert_link(to, start, end);
@ -95,7 +96,12 @@ impl FirstStage {
} }
fn finalize(&mut self) { fn finalize(&mut self) {
self.insert_page(0, "dummy page at the end of all pages".to_string(), false); self.insert_page(
0,
0,
false,
"dummy page at the end of all pages".to_string(),
);
} }
fn from_stdin() -> io::Result<Self> { fn from_stdin() -> io::Result<Self> {

View file

@ -6,15 +6,15 @@ use crate::data::AdjacencyList;
pub fn reexport(from: &Path, to: &Path) -> io::Result<()> { pub fn reexport(from: &Path, to: &Path) -> io::Result<()> {
eprintln!(">> Import"); eprintln!(">> Import");
let from = BufReader::new(File::open(from)?); let mut from = BufReader::new(File::open(from)?);
let data = AdjacencyList::read(from)?; let data = AdjacencyList::read(&mut from)?;
eprintln!(">> Consistency check"); eprintln!(">> Consistency check");
data.check_consistency(); data.check_consistency();
eprintln!(">> Export"); eprintln!(">> Export");
let to = BufWriter::new(File::create(to)?); let mut to = BufWriter::new(File::create(to)?);
data.write(to)?; data.write(&mut to)?;
Ok(()) Ok(())
} }

View file

@ -2,12 +2,89 @@ use std::io::{self, Read, Write};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
mod ioutil {
use std::io::{self, Read, Write};
pub fn write_u8<W: Write>(n: u8, to: &mut W) -> io::Result<()> {
to.write_all(&n.to_le_bytes())
}
pub fn read_u8<R: Read>(from: &mut R) -> io::Result<u8> {
let mut buf = [0_u8; 1];
from.read_exact(&mut buf)?;
Ok(u8::from_le_bytes(buf))
}
pub fn write_u16<W: Write>(n: u16, to: &mut W) -> io::Result<()> {
to.write_all(&n.to_le_bytes())
}
pub fn read_u16<R: Read>(from: &mut R) -> io::Result<u16> {
let mut buf = [0_u8; 2];
from.read_exact(&mut buf)?;
Ok(u16::from_le_bytes(buf))
}
pub fn write_u32<W: Write>(n: u32, to: &mut W) -> io::Result<()> {
to.write_all(&n.to_le_bytes())
}
pub fn read_u32<R: Read>(from: &mut R) -> io::Result<u32> {
let mut buf = [0_u8; 4];
from.read_exact(&mut buf)?;
Ok(u32::from_le_bytes(buf))
}
pub fn write_str<W: Write>(s: &str, to: &mut W) -> io::Result<()> {
assert!(s.len() <= u16::MAX as usize);
write_u16(s.len() as u16, to)?;
to.write_all(s.as_bytes())?;
Ok(())
}
pub fn read_str<R: Read>(from: &mut R) -> io::Result<String> {
let len = read_u16(from)? as usize;
let mut buf = vec![0_u8; len];
from.read_exact(&mut buf)?;
Ok(String::from_utf8(buf).unwrap())
}
}
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Page { pub struct Page {
pub link_idx: u32, pub link_idx: u32,
pub id: u32, pub id: u32,
pub title: String, pub length: u32,
pub redirect: bool, pub redirect: bool,
pub title: String,
}
impl Page {
pub fn write<W: Write>(&self, to: &mut W) -> io::Result<()> {
ioutil::write_u32(self.link_idx, to)?;
ioutil::write_u32(self.id, to)?;
ioutil::write_u32(self.length, to)?;
ioutil::write_u8(if self.redirect { 1 } else { 0 }, to)?;
ioutil::write_str(&self.title, to)?;
Ok(())
}
pub fn read<R: Read>(from: &mut R) -> io::Result<Self> {
let link_idx = ioutil::read_u32(from)?;
let id = ioutil::read_u32(from)?;
let length = ioutil::read_u32(from)?;
let redirect = ioutil::read_u8(from)? != 0;
let title = ioutil::read_str(from)?;
Ok(Self {
link_idx,
id,
length,
redirect,
title,
})
}
} }
#[derive(Debug, Clone, Copy, Serialize, Deserialize)] #[derive(Debug, Clone, Copy, Serialize, Deserialize)]
@ -17,6 +94,24 @@ pub struct Link {
pub end: u32, pub end: u32,
} }
impl Link {
pub fn write<W: Write>(&self, to: &mut W) -> io::Result<()> {
ioutil::write_u32(self.to, to)?;
ioutil::write_u32(self.start, to)?;
ioutil::write_u32(self.end, to)?;
Ok(())
}
pub fn read<R: Read>(from: &mut R) -> io::Result<Self> {
let to = ioutil::read_u32(from)?;
let start = ioutil::read_u32(from)?;
let end = ioutil::read_u32(from)?;
Ok(Self { to, start, end })
}
}
#[derive(Debug, Serialize, Deserialize)] #[derive(Debug, Serialize, Deserialize)]
pub struct AdjacencyList { pub struct AdjacencyList {
pub pages: Vec<Page>, pub pages: Vec<Page>,
@ -33,87 +128,35 @@ impl AdjacencyList {
} }
} }
pub fn write<W: Write>(&self, mut to: W) -> io::Result<()> { pub fn write<W: Write>(&self, to: &mut W) -> io::Result<()> {
let n_pages: u32 = self.pages.len() as u32; ioutil::write_u32(self.pages.len() as u32, to)?;
to.write_all(&n_pages.to_le_bytes())?; ioutil::write_u32(self.links.len() as u32, to)?;
let n_links: u32 = self.links.len() as u32;
to.write_all(&n_links.to_le_bytes())?;
for page in &self.pages { for page in &self.pages {
to.write_all(&page.link_idx.to_le_bytes())?; page.write(to)?;
to.write_all(&page.id.to_le_bytes())?;
to.write_all(&[if page.redirect { 1 } else { 0 }])?;
let title_len: u16 = page.title.len() as u16;
to.write_all(&title_len.to_le_bytes())?;
to.write_all(page.title.as_bytes())?;
} }
for link in &self.links { for link in &self.links {
to.write_all(&link.to.to_le_bytes())?; link.write(to)?;
to.write_all(&link.start.to_le_bytes())?;
to.write_all(&link.end.to_le_bytes())?;
} }
Ok(()) Ok(())
} }
pub fn read<R: Read>(mut from: R) -> io::Result<Self> { pub fn read<R: Read>(from: &mut R) -> io::Result<Self> {
let mut result = Self { let n_pages = ioutil::read_u32(from)?;
pages: vec![], let n_links = ioutil::read_u32(from)?;
links: vec![],
};
let mut u8_buf = [0_u8; 1];
let mut u16_buf = [0_u8; 2];
let mut u32_buf = [0_u8; 4];
from.read_exact(&mut u32_buf)?;
let n_pages = u32::from_le_bytes(u32_buf);
from.read_exact(&mut u32_buf)?;
let n_links = u32::from_le_bytes(u32_buf);
let mut pages = vec![];
for _ in 0..n_pages { for _ in 0..n_pages {
from.read_exact(&mut u32_buf)?; pages.push(Page::read(from)?);
let link_idx = u32::from_le_bytes(u32_buf);
from.read_exact(&mut u32_buf)?;
let id = u32::from_le_bytes(u32_buf);
from.read_exact(&mut u8_buf)?;
let redirect = u8_buf[0] != 0;
from.read_exact(&mut u16_buf)?;
let title_len = u16::from_le_bytes(u16_buf);
let mut title_bytes = vec![0_u8; title_len as usize];
from.read_exact(&mut title_bytes)?;
let title = String::from_utf8(title_bytes).unwrap();
let page = Page {
link_idx,
id,
title,
redirect,
};
result.pages.push(page);
} }
let mut links = vec![];
for _ in 0..n_links { for _ in 0..n_links {
from.read_exact(&mut u32_buf)?; links.push(Link::read(from)?);
let to = u32::from_le_bytes(u32_buf);
from.read_exact(&mut u32_buf)?;
let start = u32::from_le_bytes(u32_buf);
from.read_exact(&mut u32_buf)?;
let end = u32::from_le_bytes(u32_buf);
let link = Link { to, start, end };
result.links.push(link);
} }
Ok(result) Ok(Self { pages, links })
} }
} }