Make adjacency list extensible
This commit is contained in:
parent
78aa27c019
commit
3a75089e5a
2 changed files with 60 additions and 44 deletions
|
|
@ -41,9 +41,9 @@ form a proper adjacency list.
|
||||||
|
|
||||||
struct FirstStage {
|
struct FirstStage {
|
||||||
/// List with page info and index into [`Self::links`].
|
/// List with page info and index into [`Self::links`].
|
||||||
pages: Vec<Page>,
|
pages: Vec<Page<()>>,
|
||||||
/// List with link info and index into [`Self::titles`].
|
/// List with link info and index into [`Self::titles`].
|
||||||
links: Vec<Link>,
|
links: Vec<Link<()>>,
|
||||||
/// List with titles.
|
/// List with titles.
|
||||||
titles: Vec<String>,
|
titles: Vec<String>,
|
||||||
/// Map from normalized title to index in [`Self::titles`].
|
/// Map from normalized title to index in [`Self::titles`].
|
||||||
|
|
@ -80,11 +80,17 @@ impl FirstStage {
|
||||||
length,
|
length,
|
||||||
redirect,
|
redirect,
|
||||||
title,
|
title,
|
||||||
|
data: (),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
fn insert_link(&mut self, to: u32, start: u32, end: u32) {
|
fn insert_link(&mut self, to: u32, start: u32, end: u32) {
|
||||||
self.links.push(Link { to, start, end });
|
self.links.push(Link {
|
||||||
|
to,
|
||||||
|
start,
|
||||||
|
end,
|
||||||
|
data: (),
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
fn import_json_page(&mut self, page: JsonPage) {
|
fn import_json_page(&mut self, page: JsonPage) {
|
||||||
|
|
@ -125,9 +131,9 @@ impl FirstStage {
|
||||||
|
|
||||||
struct SecondStage {
|
struct SecondStage {
|
||||||
/// List with page info and index into [`Self::links`].
|
/// List with page info and index into [`Self::links`].
|
||||||
pages: Vec<Page>,
|
pages: Vec<Page<()>>,
|
||||||
/// List with link info and index into [`Self::pages`].
|
/// List with link info and index into [`Self::pages`].
|
||||||
links: Vec<Link>,
|
links: Vec<Link<()>>,
|
||||||
/// Map from normalized title to index in [`Self::pages`].
|
/// Map from normalized title to index in [`Self::pages`].
|
||||||
pages_map: FxHashMap<String, u32>,
|
pages_map: FxHashMap<String, u32>,
|
||||||
}
|
}
|
||||||
|
|
@ -141,20 +147,20 @@ impl SecondStage {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn initialize_pages_map(&mut self, pages: &[Page]) {
|
fn initialize_pages_map(&mut self, pages: &[Page<()>]) {
|
||||||
for (idx, page) in pages.iter().enumerate() {
|
for (idx, page) in pages.iter().enumerate() {
|
||||||
let title = util::normalize_link(&page.title);
|
let title = util::normalize_link(&page.title);
|
||||||
self.pages_map.insert(title, idx as u32);
|
self.pages_map.insert(title, idx as u32);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn insert_page(&mut self, page: &Page) {
|
fn insert_page(&mut self, page: &Page<()>) {
|
||||||
let mut page = page.clone();
|
let mut page = page.clone();
|
||||||
page.link_idx = self.pages.len() as u32;
|
page.link_idx = self.pages.len() as u32;
|
||||||
self.pages.push(page);
|
self.pages.push(page);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn insert_link(&mut self, mut link: Link, titles: &[String]) {
|
fn insert_link(&mut self, mut link: Link<()>, titles: &[String]) {
|
||||||
let title = &titles[link.to as usize];
|
let title = &titles[link.to as usize];
|
||||||
if let Some(page_idx) = self.pages_map.get(title) {
|
if let Some(page_idx) = self.pages_map.get(title) {
|
||||||
link.to = *page_idx;
|
link.to = *page_idx;
|
||||||
|
|
@ -162,7 +168,7 @@ impl SecondStage {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn finalize(&mut self, pages: &[Page]) {
|
fn finalize(&mut self, pages: &[Page<()>]) {
|
||||||
self.insert_page(pages.last().unwrap());
|
self.insert_page(pages.last().unwrap());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -194,7 +200,7 @@ impl SecondStage {
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
fn into_adjacency_list(self) -> AdjacencyList {
|
fn into_adjacency_list(self) -> AdjacencyList<(), ()> {
|
||||||
AdjacencyList {
|
AdjacencyList {
|
||||||
pages: self.pages,
|
pages: self.pages,
|
||||||
links: self.links,
|
links: self.links,
|
||||||
|
|
|
||||||
|
|
@ -51,15 +51,16 @@ mod ioutil {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct Page {
|
pub struct Page<P> {
|
||||||
pub link_idx: u32,
|
pub link_idx: u32,
|
||||||
pub id: u32,
|
pub id: u32,
|
||||||
pub length: u32,
|
pub length: u32,
|
||||||
pub redirect: bool,
|
pub redirect: bool,
|
||||||
pub title: String,
|
pub title: String,
|
||||||
|
pub data: P,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Page {
|
impl Page<()> {
|
||||||
pub fn write<W: Write>(&self, to: &mut W) -> io::Result<()> {
|
pub fn write<W: Write>(&self, to: &mut W) -> io::Result<()> {
|
||||||
ioutil::write_u32(self.link_idx, to)?;
|
ioutil::write_u32(self.link_idx, to)?;
|
||||||
ioutil::write_u32(self.id, to)?;
|
ioutil::write_u32(self.id, to)?;
|
||||||
|
|
@ -83,18 +84,20 @@ impl Page {
|
||||||
length,
|
length,
|
||||||
redirect,
|
redirect,
|
||||||
title,
|
title,
|
||||||
|
data: (),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
|
||||||
pub struct Link {
|
pub struct Link<L> {
|
||||||
pub to: u32,
|
pub to: u32,
|
||||||
pub start: u32,
|
pub start: u32,
|
||||||
pub end: u32,
|
pub end: u32,
|
||||||
|
pub data: L,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Link {
|
impl Link<()> {
|
||||||
pub fn write<W: Write>(&self, to: &mut W) -> io::Result<()> {
|
pub fn write<W: Write>(&self, to: &mut W) -> io::Result<()> {
|
||||||
ioutil::write_u32(self.to, to)?;
|
ioutil::write_u32(self.to, to)?;
|
||||||
ioutil::write_u32(self.start, to)?;
|
ioutil::write_u32(self.start, to)?;
|
||||||
|
|
@ -108,42 +111,22 @@ impl Link {
|
||||||
let start = ioutil::read_u32(from)?;
|
let start = ioutil::read_u32(from)?;
|
||||||
let end = ioutil::read_u32(from)?;
|
let end = ioutil::read_u32(from)?;
|
||||||
|
|
||||||
Ok(Self { to, start, end })
|
Ok(Self {
|
||||||
|
to,
|
||||||
|
start,
|
||||||
|
end,
|
||||||
|
data: (),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize)]
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
pub struct AdjacencyList {
|
pub struct AdjacencyList<P, L> {
|
||||||
pub pages: Vec<Page>,
|
pub pages: Vec<Page<P>>,
|
||||||
pub links: Vec<Link>,
|
pub links: Vec<Link<L>>,
|
||||||
}
|
|
||||||
|
|
||||||
impl AdjacencyList {
|
|
||||||
pub fn check_consistency(&self) {
|
|
||||||
// Check that all types are large enough
|
|
||||||
assert!(self.pages.len() <= u32::MAX as usize, "pages len");
|
|
||||||
assert!(self.links.len() <= u32::MAX as usize, "links len");
|
|
||||||
for page in &self.pages {
|
|
||||||
assert!(page.link_idx <= u32::MAX as u32, "page link_idx");
|
|
||||||
assert!(page.id <= u32::MAX as u32, "page id");
|
|
||||||
assert!(page.length <= u32::MAX as u32, "page length");
|
|
||||||
assert!(page.title.len() <= u8::MAX as usize, "page title len");
|
|
||||||
}
|
|
||||||
for link in &self.links {
|
|
||||||
assert!(link.to <= u32::MAX as u32, "link to");
|
|
||||||
assert!(link.start <= u32::MAX as u32, "link start");
|
|
||||||
assert!(link.end <= u32::MAX as u32, "link end");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check that all links contain valid indices
|
|
||||||
let range = 0..self.pages.len() as u32;
|
|
||||||
for link in &self.links {
|
|
||||||
if !range.contains(&link.to) {
|
|
||||||
panic!("Invalid link detected!");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl AdjacencyList<(), ()> {
|
||||||
pub fn write<W: Write>(&self, to: &mut W) -> io::Result<()> {
|
pub fn write<W: Write>(&self, to: &mut W) -> io::Result<()> {
|
||||||
ioutil::write_u32(self.pages.len() as u32, to)?;
|
ioutil::write_u32(self.pages.len() as u32, to)?;
|
||||||
ioutil::write_u32(self.links.len() as u32, to)?;
|
ioutil::write_u32(self.links.len() as u32, to)?;
|
||||||
|
|
@ -176,3 +159,30 @@ impl AdjacencyList {
|
||||||
Ok(Self { pages, links })
|
Ok(Self { pages, links })
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<P, L> AdjacencyList<P, L> {
|
||||||
|
pub fn check_consistency(&self) {
|
||||||
|
// Check that all types are large enough
|
||||||
|
assert!(self.pages.len() <= u32::MAX as usize, "pages len");
|
||||||
|
assert!(self.links.len() <= u32::MAX as usize, "links len");
|
||||||
|
for page in &self.pages {
|
||||||
|
assert!(page.link_idx <= u32::MAX as u32, "page link_idx");
|
||||||
|
assert!(page.id <= u32::MAX as u32, "page id");
|
||||||
|
assert!(page.length <= u32::MAX as u32, "page length");
|
||||||
|
assert!(page.title.len() <= u8::MAX as usize, "page title len");
|
||||||
|
}
|
||||||
|
for link in &self.links {
|
||||||
|
assert!(link.to <= u32::MAX as u32, "link to");
|
||||||
|
assert!(link.start <= u32::MAX as u32, "link start");
|
||||||
|
assert!(link.end <= u32::MAX as u32, "link end");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that all links contain valid indices
|
||||||
|
let range = 0..self.pages.len() as u32;
|
||||||
|
for link in &self.links {
|
||||||
|
if !range.contains(&link.to) {
|
||||||
|
panic!("Invalid link detected!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue