Make adjacency list extensible
This commit is contained in:
parent
78aa27c019
commit
3a75089e5a
2 changed files with 60 additions and 44 deletions
|
|
@ -41,9 +41,9 @@ form a proper adjacency list.
|
|||
|
||||
struct FirstStage {
|
||||
/// List with page info and index into [`Self::links`].
|
||||
pages: Vec<Page>,
|
||||
pages: Vec<Page<()>>,
|
||||
/// List with link info and index into [`Self::titles`].
|
||||
links: Vec<Link>,
|
||||
links: Vec<Link<()>>,
|
||||
/// List with titles.
|
||||
titles: Vec<String>,
|
||||
/// Map from normalized title to index in [`Self::titles`].
|
||||
|
|
@ -80,11 +80,17 @@ impl FirstStage {
|
|||
length,
|
||||
redirect,
|
||||
title,
|
||||
data: (),
|
||||
});
|
||||
}
|
||||
|
||||
fn insert_link(&mut self, to: u32, start: u32, end: u32) {
|
||||
self.links.push(Link { to, start, end });
|
||||
self.links.push(Link {
|
||||
to,
|
||||
start,
|
||||
end,
|
||||
data: (),
|
||||
});
|
||||
}
|
||||
|
||||
fn import_json_page(&mut self, page: JsonPage) {
|
||||
|
|
@ -125,9 +131,9 @@ impl FirstStage {
|
|||
|
||||
struct SecondStage {
|
||||
/// List with page info and index into [`Self::links`].
|
||||
pages: Vec<Page>,
|
||||
pages: Vec<Page<()>>,
|
||||
/// List with link info and index into [`Self::pages`].
|
||||
links: Vec<Link>,
|
||||
links: Vec<Link<()>>,
|
||||
/// Map from normalized title to index in [`Self::pages`].
|
||||
pages_map: FxHashMap<String, u32>,
|
||||
}
|
||||
|
|
@ -141,20 +147,20 @@ impl SecondStage {
|
|||
}
|
||||
}
|
||||
|
||||
fn initialize_pages_map(&mut self, pages: &[Page]) {
|
||||
fn initialize_pages_map(&mut self, pages: &[Page<()>]) {
|
||||
for (idx, page) in pages.iter().enumerate() {
|
||||
let title = util::normalize_link(&page.title);
|
||||
self.pages_map.insert(title, idx as u32);
|
||||
}
|
||||
}
|
||||
|
||||
fn insert_page(&mut self, page: &Page) {
|
||||
fn insert_page(&mut self, page: &Page<()>) {
|
||||
let mut page = page.clone();
|
||||
page.link_idx = self.pages.len() as u32;
|
||||
self.pages.push(page);
|
||||
}
|
||||
|
||||
fn insert_link(&mut self, mut link: Link, titles: &[String]) {
|
||||
fn insert_link(&mut self, mut link: Link<()>, titles: &[String]) {
|
||||
let title = &titles[link.to as usize];
|
||||
if let Some(page_idx) = self.pages_map.get(title) {
|
||||
link.to = *page_idx;
|
||||
|
|
@ -162,7 +168,7 @@ impl SecondStage {
|
|||
}
|
||||
}
|
||||
|
||||
fn finalize(&mut self, pages: &[Page]) {
|
||||
fn finalize(&mut self, pages: &[Page<()>]) {
|
||||
self.insert_page(pages.last().unwrap());
|
||||
}
|
||||
|
||||
|
|
@ -194,7 +200,7 @@ impl SecondStage {
|
|||
result
|
||||
}
|
||||
|
||||
fn into_adjacency_list(self) -> AdjacencyList {
|
||||
fn into_adjacency_list(self) -> AdjacencyList<(), ()> {
|
||||
AdjacencyList {
|
||||
pages: self.pages,
|
||||
links: self.links,
|
||||
|
|
|
|||
|
|
@ -51,15 +51,16 @@ mod ioutil {
|
|||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Page {
|
||||
pub struct Page<P> {
|
||||
pub link_idx: u32,
|
||||
pub id: u32,
|
||||
pub length: u32,
|
||||
pub redirect: bool,
|
||||
pub title: String,
|
||||
pub data: P,
|
||||
}
|
||||
|
||||
impl Page {
|
||||
impl Page<()> {
|
||||
pub fn write<W: Write>(&self, to: &mut W) -> io::Result<()> {
|
||||
ioutil::write_u32(self.link_idx, to)?;
|
||||
ioutil::write_u32(self.id, to)?;
|
||||
|
|
@ -83,18 +84,20 @@ impl Page {
|
|||
length,
|
||||
redirect,
|
||||
title,
|
||||
data: (),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
|
||||
pub struct Link {
|
||||
pub struct Link<L> {
|
||||
pub to: u32,
|
||||
pub start: u32,
|
||||
pub end: u32,
|
||||
pub data: L,
|
||||
}
|
||||
|
||||
impl Link {
|
||||
impl Link<()> {
|
||||
pub fn write<W: Write>(&self, to: &mut W) -> io::Result<()> {
|
||||
ioutil::write_u32(self.to, to)?;
|
||||
ioutil::write_u32(self.start, to)?;
|
||||
|
|
@ -108,42 +111,22 @@ impl Link {
|
|||
let start = ioutil::read_u32(from)?;
|
||||
let end = ioutil::read_u32(from)?;
|
||||
|
||||
Ok(Self { to, start, end })
|
||||
Ok(Self {
|
||||
to,
|
||||
start,
|
||||
end,
|
||||
data: (),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct AdjacencyList {
|
||||
pub pages: Vec<Page>,
|
||||
pub links: Vec<Link>,
|
||||
pub struct AdjacencyList<P, L> {
|
||||
pub pages: Vec<Page<P>>,
|
||||
pub links: Vec<Link<L>>,
|
||||
}
|
||||
|
||||
impl AdjacencyList {
|
||||
pub fn check_consistency(&self) {
|
||||
// Check that all types are large enough
|
||||
assert!(self.pages.len() <= u32::MAX as usize, "pages len");
|
||||
assert!(self.links.len() <= u32::MAX as usize, "links len");
|
||||
for page in &self.pages {
|
||||
assert!(page.link_idx <= u32::MAX as u32, "page link_idx");
|
||||
assert!(page.id <= u32::MAX as u32, "page id");
|
||||
assert!(page.length <= u32::MAX as u32, "page length");
|
||||
assert!(page.title.len() <= u8::MAX as usize, "page title len");
|
||||
}
|
||||
for link in &self.links {
|
||||
assert!(link.to <= u32::MAX as u32, "link to");
|
||||
assert!(link.start <= u32::MAX as u32, "link start");
|
||||
assert!(link.end <= u32::MAX as u32, "link end");
|
||||
}
|
||||
|
||||
// Check that all links contain valid indices
|
||||
let range = 0..self.pages.len() as u32;
|
||||
for link in &self.links {
|
||||
if !range.contains(&link.to) {
|
||||
panic!("Invalid link detected!");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AdjacencyList<(), ()> {
|
||||
pub fn write<W: Write>(&self, to: &mut W) -> io::Result<()> {
|
||||
ioutil::write_u32(self.pages.len() as u32, to)?;
|
||||
ioutil::write_u32(self.links.len() as u32, to)?;
|
||||
|
|
@ -176,3 +159,30 @@ impl AdjacencyList {
|
|||
Ok(Self { pages, links })
|
||||
}
|
||||
}
|
||||
|
||||
impl<P, L> AdjacencyList<P, L> {
|
||||
pub fn check_consistency(&self) {
|
||||
// Check that all types are large enough
|
||||
assert!(self.pages.len() <= u32::MAX as usize, "pages len");
|
||||
assert!(self.links.len() <= u32::MAX as usize, "links len");
|
||||
for page in &self.pages {
|
||||
assert!(page.link_idx <= u32::MAX as u32, "page link_idx");
|
||||
assert!(page.id <= u32::MAX as u32, "page id");
|
||||
assert!(page.length <= u32::MAX as u32, "page length");
|
||||
assert!(page.title.len() <= u8::MAX as usize, "page title len");
|
||||
}
|
||||
for link in &self.links {
|
||||
assert!(link.to <= u32::MAX as u32, "link to");
|
||||
assert!(link.start <= u32::MAX as u32, "link start");
|
||||
assert!(link.end <= u32::MAX as u32, "link end");
|
||||
}
|
||||
|
||||
// Check that all links contain valid indices
|
||||
let range = 0..self.pages.len() as u32;
|
||||
for link in &self.links {
|
||||
if !range.contains(&link.to) {
|
||||
panic!("Invalid link detected!");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue