Refactor export and add page length
This commit is contained in:
parent
d910047b48
commit
f71092058b
3 changed files with 123 additions and 74 deletions
|
|
@ -72,13 +72,14 @@ impl FirstStage {
|
|||
}
|
||||
}
|
||||
|
||||
fn insert_page(&mut self, id: u32, title: String, redirect: bool) {
|
||||
fn insert_page(&mut self, id: u32, length: u32, redirect: bool, title: String) {
|
||||
let link_idx = self.pages.len() as u32;
|
||||
self.pages.push(Page {
|
||||
link_idx,
|
||||
id,
|
||||
title,
|
||||
length,
|
||||
redirect,
|
||||
title,
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -87,7 +88,7 @@ impl FirstStage {
|
|||
}
|
||||
|
||||
fn import_json_page(&mut self, page: JsonPage) {
|
||||
self.insert_page(page.id, page.title, page.redirect.is_some());
|
||||
self.insert_page(page.id, page.length, page.redirect.is_some(), page.title);
|
||||
for (to, start, end) in page.links {
|
||||
let to = self.insert_title(util::normalize_link(&to));
|
||||
self.insert_link(to, start, end);
|
||||
|
|
@ -95,7 +96,12 @@ impl FirstStage {
|
|||
}
|
||||
|
||||
fn finalize(&mut self) {
|
||||
self.insert_page(0, "dummy page at the end of all pages".to_string(), false);
|
||||
self.insert_page(
|
||||
0,
|
||||
0,
|
||||
false,
|
||||
"dummy page at the end of all pages".to_string(),
|
||||
);
|
||||
}
|
||||
|
||||
fn from_stdin() -> io::Result<Self> {
|
||||
|
|
|
|||
|
|
@ -6,15 +6,15 @@ use crate::data::AdjacencyList;
|
|||
|
||||
pub fn reexport(from: &Path, to: &Path) -> io::Result<()> {
|
||||
eprintln!(">> Import");
|
||||
let from = BufReader::new(File::open(from)?);
|
||||
let data = AdjacencyList::read(from)?;
|
||||
let mut from = BufReader::new(File::open(from)?);
|
||||
let data = AdjacencyList::read(&mut from)?;
|
||||
|
||||
eprintln!(">> Consistency check");
|
||||
data.check_consistency();
|
||||
|
||||
eprintln!(">> Export");
|
||||
let to = BufWriter::new(File::create(to)?);
|
||||
data.write(to)?;
|
||||
let mut to = BufWriter::new(File::create(to)?);
|
||||
data.write(&mut to)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,12 +2,89 @@ use std::io::{self, Read, Write};
|
|||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
mod ioutil {
|
||||
use std::io::{self, Read, Write};
|
||||
|
||||
pub fn write_u8<W: Write>(n: u8, to: &mut W) -> io::Result<()> {
|
||||
to.write_all(&n.to_le_bytes())
|
||||
}
|
||||
|
||||
pub fn read_u8<R: Read>(from: &mut R) -> io::Result<u8> {
|
||||
let mut buf = [0_u8; 1];
|
||||
from.read_exact(&mut buf)?;
|
||||
Ok(u8::from_le_bytes(buf))
|
||||
}
|
||||
|
||||
pub fn write_u16<W: Write>(n: u16, to: &mut W) -> io::Result<()> {
|
||||
to.write_all(&n.to_le_bytes())
|
||||
}
|
||||
|
||||
pub fn read_u16<R: Read>(from: &mut R) -> io::Result<u16> {
|
||||
let mut buf = [0_u8; 2];
|
||||
from.read_exact(&mut buf)?;
|
||||
Ok(u16::from_le_bytes(buf))
|
||||
}
|
||||
|
||||
pub fn write_u32<W: Write>(n: u32, to: &mut W) -> io::Result<()> {
|
||||
to.write_all(&n.to_le_bytes())
|
||||
}
|
||||
|
||||
pub fn read_u32<R: Read>(from: &mut R) -> io::Result<u32> {
|
||||
let mut buf = [0_u8; 4];
|
||||
from.read_exact(&mut buf)?;
|
||||
Ok(u32::from_le_bytes(buf))
|
||||
}
|
||||
|
||||
pub fn write_str<W: Write>(s: &str, to: &mut W) -> io::Result<()> {
|
||||
assert!(s.len() <= u16::MAX as usize);
|
||||
write_u16(s.len() as u16, to)?;
|
||||
to.write_all(s.as_bytes())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn read_str<R: Read>(from: &mut R) -> io::Result<String> {
|
||||
let len = read_u16(from)? as usize;
|
||||
let mut buf = vec![0_u8; len];
|
||||
from.read_exact(&mut buf)?;
|
||||
Ok(String::from_utf8(buf).unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Page {
|
||||
pub link_idx: u32,
|
||||
pub id: u32,
|
||||
pub title: String,
|
||||
pub length: u32,
|
||||
pub redirect: bool,
|
||||
pub title: String,
|
||||
}
|
||||
|
||||
impl Page {
|
||||
pub fn write<W: Write>(&self, to: &mut W) -> io::Result<()> {
|
||||
ioutil::write_u32(self.link_idx, to)?;
|
||||
ioutil::write_u32(self.id, to)?;
|
||||
ioutil::write_u32(self.length, to)?;
|
||||
ioutil::write_u8(if self.redirect { 1 } else { 0 }, to)?;
|
||||
ioutil::write_str(&self.title, to)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn read<R: Read>(from: &mut R) -> io::Result<Self> {
|
||||
let link_idx = ioutil::read_u32(from)?;
|
||||
let id = ioutil::read_u32(from)?;
|
||||
let length = ioutil::read_u32(from)?;
|
||||
let redirect = ioutil::read_u8(from)? != 0;
|
||||
let title = ioutil::read_str(from)?;
|
||||
|
||||
Ok(Self {
|
||||
link_idx,
|
||||
id,
|
||||
length,
|
||||
redirect,
|
||||
title,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
|
||||
|
|
@ -17,6 +94,24 @@ pub struct Link {
|
|||
pub end: u32,
|
||||
}
|
||||
|
||||
impl Link {
|
||||
pub fn write<W: Write>(&self, to: &mut W) -> io::Result<()> {
|
||||
ioutil::write_u32(self.to, to)?;
|
||||
ioutil::write_u32(self.start, to)?;
|
||||
ioutil::write_u32(self.end, to)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn read<R: Read>(from: &mut R) -> io::Result<Self> {
|
||||
let to = ioutil::read_u32(from)?;
|
||||
let start = ioutil::read_u32(from)?;
|
||||
let end = ioutil::read_u32(from)?;
|
||||
|
||||
Ok(Self { to, start, end })
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct AdjacencyList {
|
||||
pub pages: Vec<Page>,
|
||||
|
|
@ -33,87 +128,35 @@ impl AdjacencyList {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn write<W: Write>(&self, mut to: W) -> io::Result<()> {
|
||||
let n_pages: u32 = self.pages.len() as u32;
|
||||
to.write_all(&n_pages.to_le_bytes())?;
|
||||
|
||||
let n_links: u32 = self.links.len() as u32;
|
||||
to.write_all(&n_links.to_le_bytes())?;
|
||||
pub fn write<W: Write>(&self, to: &mut W) -> io::Result<()> {
|
||||
ioutil::write_u32(self.pages.len() as u32, to)?;
|
||||
ioutil::write_u32(self.links.len() as u32, to)?;
|
||||
|
||||
for page in &self.pages {
|
||||
to.write_all(&page.link_idx.to_le_bytes())?;
|
||||
to.write_all(&page.id.to_le_bytes())?;
|
||||
to.write_all(&[if page.redirect { 1 } else { 0 }])?;
|
||||
|
||||
let title_len: u16 = page.title.len() as u16;
|
||||
to.write_all(&title_len.to_le_bytes())?;
|
||||
to.write_all(page.title.as_bytes())?;
|
||||
page.write(to)?;
|
||||
}
|
||||
|
||||
for link in &self.links {
|
||||
to.write_all(&link.to.to_le_bytes())?;
|
||||
to.write_all(&link.start.to_le_bytes())?;
|
||||
to.write_all(&link.end.to_le_bytes())?;
|
||||
link.write(to)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn read<R: Read>(mut from: R) -> io::Result<Self> {
|
||||
let mut result = Self {
|
||||
pages: vec![],
|
||||
links: vec![],
|
||||
};
|
||||
|
||||
let mut u8_buf = [0_u8; 1];
|
||||
let mut u16_buf = [0_u8; 2];
|
||||
let mut u32_buf = [0_u8; 4];
|
||||
|
||||
from.read_exact(&mut u32_buf)?;
|
||||
let n_pages = u32::from_le_bytes(u32_buf);
|
||||
|
||||
from.read_exact(&mut u32_buf)?;
|
||||
let n_links = u32::from_le_bytes(u32_buf);
|
||||
pub fn read<R: Read>(from: &mut R) -> io::Result<Self> {
|
||||
let n_pages = ioutil::read_u32(from)?;
|
||||
let n_links = ioutil::read_u32(from)?;
|
||||
|
||||
let mut pages = vec![];
|
||||
for _ in 0..n_pages {
|
||||
from.read_exact(&mut u32_buf)?;
|
||||
let link_idx = u32::from_le_bytes(u32_buf);
|
||||
|
||||
from.read_exact(&mut u32_buf)?;
|
||||
let id = u32::from_le_bytes(u32_buf);
|
||||
|
||||
from.read_exact(&mut u8_buf)?;
|
||||
let redirect = u8_buf[0] != 0;
|
||||
|
||||
from.read_exact(&mut u16_buf)?;
|
||||
let title_len = u16::from_le_bytes(u16_buf);
|
||||
let mut title_bytes = vec![0_u8; title_len as usize];
|
||||
from.read_exact(&mut title_bytes)?;
|
||||
let title = String::from_utf8(title_bytes).unwrap();
|
||||
|
||||
let page = Page {
|
||||
link_idx,
|
||||
id,
|
||||
title,
|
||||
redirect,
|
||||
};
|
||||
result.pages.push(page);
|
||||
pages.push(Page::read(from)?);
|
||||
}
|
||||
|
||||
let mut links = vec![];
|
||||
for _ in 0..n_links {
|
||||
from.read_exact(&mut u32_buf)?;
|
||||
let to = u32::from_le_bytes(u32_buf);
|
||||
|
||||
from.read_exact(&mut u32_buf)?;
|
||||
let start = u32::from_le_bytes(u32_buf);
|
||||
|
||||
from.read_exact(&mut u32_buf)?;
|
||||
let end = u32::from_le_bytes(u32_buf);
|
||||
|
||||
let link = Link { to, start, end };
|
||||
result.links.push(link);
|
||||
links.push(Link::read(from)?);
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
Ok(Self { pages, links })
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue