Remove PageIdx and LinkIdx again
I don't think the type safety is worth the effort right now.
This commit is contained in:
parent
17b118693f
commit
76abf5ea6e
7 changed files with 113 additions and 163 deletions
|
|
@ -7,9 +7,7 @@ use std::u32;
|
||||||
use rustc_hash::FxHashMap;
|
use rustc_hash::FxHashMap;
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
|
|
||||||
use crate::data::adjacency_list::{
|
use crate::data::adjacency_list::{AdjacencyList, Page};
|
||||||
AdjacencyList, Link, LinkIdx, Page, PageIdx, SENTINEL_PAGE_MARKER,
|
|
||||||
};
|
|
||||||
use crate::data::info::{LinkInfo, PageInfo};
|
use crate::data::info::{LinkInfo, PageInfo};
|
||||||
use crate::data::store;
|
use crate::data::store;
|
||||||
use crate::util;
|
use crate::util;
|
||||||
|
|
@ -74,29 +72,20 @@ fn first_stage() -> io::Result<(AdjacencyList<PageInfo, LinkInfo>, Titles)> {
|
||||||
for (i, line) in stdin.lines().enumerate() {
|
for (i, line) in stdin.lines().enumerate() {
|
||||||
let json_page = serde_json::from_str::<JsonPage>(&line?).unwrap();
|
let json_page = serde_json::from_str::<JsonPage>(&line?).unwrap();
|
||||||
|
|
||||||
result.pages.push(Page {
|
result.push_page(PageInfo {
|
||||||
start: LinkIdx(result.links.len() as u32),
|
id: json_page.id,
|
||||||
data: PageInfo {
|
length: json_page.length,
|
||||||
id: json_page.id,
|
redirect: json_page.redirect.is_some(),
|
||||||
length: json_page.length,
|
title: json_page.title,
|
||||||
redirect: json_page.redirect.is_some(),
|
|
||||||
title: json_page.title,
|
|
||||||
},
|
|
||||||
});
|
});
|
||||||
|
|
||||||
if let Some(to) = json_page.redirect {
|
if let Some(to) = json_page.redirect {
|
||||||
let to = titles.insert(util::normalize_link(&to));
|
let to = titles.insert(util::normalize_link(&to));
|
||||||
result.links.push(Link {
|
result.push_link(to, LinkInfo::default());
|
||||||
to: PageIdx(to),
|
|
||||||
data: LinkInfo::default(),
|
|
||||||
});
|
|
||||||
} else {
|
} else {
|
||||||
for (to, start, len, flags) in json_page.links {
|
for (to, start, len, flags) in json_page.links {
|
||||||
let to = titles.insert(util::normalize_link(&to));
|
let to = titles.insert(util::normalize_link(&to));
|
||||||
result.links.push(Link {
|
result.push_link(to, LinkInfo { start, len, flags });
|
||||||
to: PageIdx(to),
|
|
||||||
data: LinkInfo { start, len, flags },
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -110,16 +99,6 @@ fn first_stage() -> io::Result<(AdjacencyList<PageInfo, LinkInfo>, Titles)> {
|
||||||
eprintln!("Titles: {}", titles.titles.len());
|
eprintln!("Titles: {}", titles.titles.len());
|
||||||
eprintln!("Title map entries: {}", titles.map.len());
|
eprintln!("Title map entries: {}", titles.map.len());
|
||||||
|
|
||||||
result.pages.push(Page {
|
|
||||||
start: LinkIdx(result.links.len() as u32),
|
|
||||||
data: PageInfo {
|
|
||||||
id: u32::MAX,
|
|
||||||
length: 0,
|
|
||||||
redirect: false,
|
|
||||||
title: SENTINEL_PAGE_MARKER.to_string(),
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
Ok((result, titles))
|
Ok((result, titles))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -151,26 +130,19 @@ fn second_stage(
|
||||||
let pages_map = initialize_pages_map(&first_stage.pages);
|
let pages_map = initialize_pages_map(&first_stage.pages);
|
||||||
let mut result = AdjacencyList::default();
|
let mut result = AdjacencyList::default();
|
||||||
|
|
||||||
for page_idx in 0..first_stage.pages.len() - 1 {
|
for (page_idx, page) in first_stage.pages() {
|
||||||
let mut page = first_stage.pages[page_idx].clone();
|
result.push_page(page.data.clone());
|
||||||
let start_link_idx = page.start;
|
|
||||||
let end_link_idx = first_stage.pages[page_idx + 1].start;
|
|
||||||
|
|
||||||
page.start.0 = result.links.len() as u32;
|
for (_, link) in first_stage.links(page_idx) {
|
||||||
result.pages.push(page);
|
let title = util::normalize_link(titles.get(link.to));
|
||||||
|
|
||||||
for link_idx in start_link_idx.0..end_link_idx.0 {
|
|
||||||
let mut link = first_stage.links[link_idx as usize];
|
|
||||||
let title = util::normalize_link(titles.get(link.to.0));
|
|
||||||
if let Some(to) = pages_map.get(&title) {
|
if let Some(to) = pages_map.get(&title) {
|
||||||
// The link points to an existing article, we should keep it
|
// The link points to an existing article, we should keep it
|
||||||
link.to.0 = *to;
|
result.push_link(*to, link.data);
|
||||||
result.links.push(link);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (page_idx + 1) % 100_000 == 0 {
|
if (page_idx + 1) % 100_000 == 0 {
|
||||||
eprintln!("{} pages processed", page_idx + 1)
|
eprintln!("{} pages imported", page_idx + 1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -178,10 +150,6 @@ fn second_stage(
|
||||||
eprintln!("Links: {}", result.links.len());
|
eprintln!("Links: {}", result.links.len());
|
||||||
eprintln!("Page map entries: {}", pages_map.len());
|
eprintln!("Page map entries: {}", pages_map.len());
|
||||||
|
|
||||||
let mut sentinel = first_stage.pages.last().unwrap().clone();
|
|
||||||
sentinel.start.0 = result.links.len() as u32;
|
|
||||||
result.pages.push(sentinel);
|
|
||||||
|
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,15 +2,13 @@ use std::fs::File;
|
||||||
use std::io::{self, BufReader};
|
use std::io::{self, BufReader};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use crate::data::adjacency_list::PageIdx;
|
|
||||||
use crate::data::store;
|
use crate::data::store;
|
||||||
|
|
||||||
pub fn run(datafile: &Path) -> io::Result<()> {
|
pub fn run(datafile: &Path) -> io::Result<()> {
|
||||||
let mut databuf = BufReader::new(File::open(datafile)?);
|
let mut databuf = BufReader::new(File::open(datafile)?);
|
||||||
let data = store::read_adjacency_list(&mut databuf)?;
|
let data = store::read_adjacency_list(&mut databuf)?;
|
||||||
|
|
||||||
for (page_idx, page) in data.pages.iter().enumerate() {
|
for (page_idx, page) in data.pages() {
|
||||||
let page_idx = PageIdx(page_idx as u32);
|
|
||||||
if page.data.redirect {
|
if page.data.redirect {
|
||||||
for link_idx in data.link_range(page_idx) {
|
for link_idx in data.link_range(page_idx) {
|
||||||
let target_page = data.page(data.link(link_idx).to);
|
let target_page = data.page(data.link(link_idx).to);
|
||||||
|
|
|
||||||
|
|
@ -3,14 +3,15 @@ use std::fs::File;
|
||||||
use std::io::{self, BufReader};
|
use std::io::{self, BufReader};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use crate::data::adjacency_list::{AdjacencyList, PageIdx};
|
use crate::data::adjacency_list::AdjacencyList;
|
||||||
use crate::data::info::{LinkInfo, PageInfo};
|
use crate::data::info::{LinkInfo, PageInfo};
|
||||||
use crate::data::store;
|
use crate::data::store;
|
||||||
use crate::util;
|
use crate::util;
|
||||||
|
|
||||||
struct DijkstraPageInfo {
|
struct DijkstraPageInfo {
|
||||||
cost: u32,
|
cost: u32,
|
||||||
prev: PageIdx,
|
/// Index of the previous page.
|
||||||
|
prev: u32,
|
||||||
redirect: bool,
|
redirect: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -18,7 +19,7 @@ impl DijkstraPageInfo {
|
||||||
fn from_page_info(info: PageInfo) -> Self {
|
fn from_page_info(info: PageInfo) -> Self {
|
||||||
Self {
|
Self {
|
||||||
cost: u32::MAX,
|
cost: u32::MAX,
|
||||||
prev: PageIdx::MAX,
|
prev: u32::MAX,
|
||||||
redirect: info.redirect,
|
redirect: info.redirect,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -42,12 +43,12 @@ impl DijkstraLinkInfo {
|
||||||
#[derive(Clone, Copy, PartialEq, Eq)]
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
||||||
struct Entry {
|
struct Entry {
|
||||||
cost: u32,
|
cost: u32,
|
||||||
idx: PageIdx,
|
page_idx: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Entry {
|
impl Entry {
|
||||||
pub fn new(cost: u32, idx: PageIdx) -> Self {
|
pub fn new(cost: u32, page_idx: u32) -> Self {
|
||||||
Self { cost, idx }
|
Self { cost, page_idx }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -57,7 +58,7 @@ impl Ord for Entry {
|
||||||
other
|
other
|
||||||
.cost
|
.cost
|
||||||
.cmp(&self.cost)
|
.cmp(&self.cost)
|
||||||
.then_with(|| self.idx.cmp(&other.idx))
|
.then_with(|| self.page_idx.cmp(&other.page_idx))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -70,22 +71,18 @@ impl PartialOrd for Entry {
|
||||||
/// Closely matches the dijkstra example in [std::collections::binary_heap].
|
/// Closely matches the dijkstra example in [std::collections::binary_heap].
|
||||||
fn full_dijkstra(
|
fn full_dijkstra(
|
||||||
data: AdjacencyList<PageInfo, LinkInfo>,
|
data: AdjacencyList<PageInfo, LinkInfo>,
|
||||||
from_idx: PageIdx,
|
from: u32,
|
||||||
) -> AdjacencyList<DijkstraPageInfo, DijkstraLinkInfo> {
|
) -> AdjacencyList<DijkstraPageInfo, DijkstraLinkInfo> {
|
||||||
println!("> Prepare state");
|
println!("> Prepare state");
|
||||||
let mut data = data
|
let mut data = data
|
||||||
.change_page_data(DijkstraPageInfo::from_page_info)
|
.change_page_data(DijkstraPageInfo::from_page_info)
|
||||||
.change_link_data(DijkstraLinkInfo::from_link_info);
|
.change_link_data(DijkstraLinkInfo::from_link_info);
|
||||||
let mut queue = BinaryHeap::new();
|
let mut queue = BinaryHeap::new();
|
||||||
data.page_mut(from_idx).data.cost = 0;
|
data.page_mut(from).data.cost = 0;
|
||||||
queue.push(Entry::new(0, from_idx));
|
queue.push(Entry::new(0, from));
|
||||||
|
|
||||||
println!("> Run dijkstra");
|
println!("> Run dijkstra");
|
||||||
while let Some(Entry {
|
while let Some(Entry { cost, page_idx }) = queue.pop() {
|
||||||
cost,
|
|
||||||
idx: page_idx,
|
|
||||||
}) = queue.pop()
|
|
||||||
{
|
|
||||||
let page = data.page(page_idx);
|
let page = data.page(page_idx);
|
||||||
if cost > page.data.cost {
|
if cost > page.data.cost {
|
||||||
// This queue entry is outdated
|
// This queue entry is outdated
|
||||||
|
|
@ -98,7 +95,7 @@ fn full_dijkstra(
|
||||||
|
|
||||||
let next = Entry {
|
let next = Entry {
|
||||||
cost: cost + if redirect { 0 } else { link.data.cost },
|
cost: cost + if redirect { 0 } else { link.data.cost },
|
||||||
idx: link.to,
|
page_idx: link.to,
|
||||||
};
|
};
|
||||||
|
|
||||||
let target_page = data.page_mut(link.to);
|
let target_page = data.page_mut(link.to);
|
||||||
|
|
@ -115,23 +112,22 @@ fn full_dijkstra(
|
||||||
|
|
||||||
fn find_longest_shortest_path(
|
fn find_longest_shortest_path(
|
||||||
data: AdjacencyList<DijkstraPageInfo, DijkstraLinkInfo>,
|
data: AdjacencyList<DijkstraPageInfo, DijkstraLinkInfo>,
|
||||||
from: PageIdx,
|
from: u32,
|
||||||
) -> Option<Vec<PageIdx>> {
|
) -> Option<Vec<u32>> {
|
||||||
let to = PageIdx(
|
let to = data
|
||||||
data.pages
|
.pages
|
||||||
.iter()
|
.iter()
|
||||||
.enumerate()
|
.enumerate()
|
||||||
.filter(|(_, p)| p.data.cost != u32::MAX)
|
.filter(|(_, p)| p.data.cost != u32::MAX)
|
||||||
.max_by_key(|(_, p)| p.data.cost)?
|
.max_by_key(|(_, p)| p.data.cost)?
|
||||||
.0 as u32,
|
.0 as u32;
|
||||||
);
|
|
||||||
|
|
||||||
let mut steps = vec![];
|
let mut steps = vec![];
|
||||||
let mut at = to;
|
let mut at = to;
|
||||||
loop {
|
loop {
|
||||||
steps.push(at);
|
steps.push(at);
|
||||||
at = data.page(at).data.prev;
|
at = data.page(at).data.prev;
|
||||||
if at == PageIdx::MAX {
|
if at == u32::MAX {
|
||||||
break;
|
break;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
@ -162,7 +158,7 @@ pub fn run(datafile: &Path, from: &str) -> io::Result<()> {
|
||||||
if let Some(path) = path {
|
if let Some(path) = path {
|
||||||
println!("Path found:");
|
println!("Path found:");
|
||||||
for page_idx in path {
|
for page_idx in path {
|
||||||
let page = &pages[page_idx.0 as usize];
|
let page = &pages[page_idx as usize];
|
||||||
if page.data.redirect {
|
if page.data.redirect {
|
||||||
println!(" v {:?}", page.data.title);
|
println!(" v {:?}", page.data.title);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
|
|
@ -3,14 +3,14 @@ use std::fs::File;
|
||||||
use std::io::{self, BufReader};
|
use std::io::{self, BufReader};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use crate::data::adjacency_list::{AdjacencyList, PageIdx};
|
use crate::data::adjacency_list::AdjacencyList;
|
||||||
use crate::data::info::{LinkInfo, PageInfo};
|
use crate::data::info::{LinkInfo, PageInfo};
|
||||||
use crate::data::store;
|
use crate::data::store;
|
||||||
use crate::util;
|
use crate::util;
|
||||||
|
|
||||||
struct DijkstraPageInfo {
|
struct DijkstraPageInfo {
|
||||||
cost: u32,
|
cost: u32,
|
||||||
prev: PageIdx,
|
prev: u32,
|
||||||
redirect: bool,
|
redirect: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -18,7 +18,7 @@ impl DijkstraPageInfo {
|
||||||
fn from_page_info(info: PageInfo) -> Self {
|
fn from_page_info(info: PageInfo) -> Self {
|
||||||
Self {
|
Self {
|
||||||
cost: u32::MAX,
|
cost: u32::MAX,
|
||||||
prev: PageIdx::MAX,
|
prev: u32::MAX,
|
||||||
redirect: info.redirect,
|
redirect: info.redirect,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -42,12 +42,12 @@ impl DijkstraLinkInfo {
|
||||||
#[derive(Clone, Copy, PartialEq, Eq)]
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
||||||
struct Entry {
|
struct Entry {
|
||||||
cost: u32,
|
cost: u32,
|
||||||
idx: PageIdx,
|
page_idx: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Entry {
|
impl Entry {
|
||||||
pub fn new(cost: u32, idx: PageIdx) -> Self {
|
pub fn new(cost: u32, page_idx: u32) -> Self {
|
||||||
Self { cost, idx }
|
Self { cost, page_idx }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -57,7 +57,7 @@ impl Ord for Entry {
|
||||||
other
|
other
|
||||||
.cost
|
.cost
|
||||||
.cmp(&self.cost)
|
.cmp(&self.cost)
|
||||||
.then_with(|| self.idx.cmp(&other.idx))
|
.then_with(|| self.page_idx.cmp(&other.page_idx))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -68,11 +68,7 @@ impl PartialOrd for Entry {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Closely matches the dijkstra example in [std::collections::binary_heap].
|
/// Closely matches the dijkstra example in [std::collections::binary_heap].
|
||||||
fn dijkstra(
|
fn dijkstra(data: AdjacencyList<PageInfo, LinkInfo>, from: u32, to: u32) -> Option<Vec<u32>> {
|
||||||
data: AdjacencyList<PageInfo, LinkInfo>,
|
|
||||||
from: PageIdx,
|
|
||||||
to: PageIdx,
|
|
||||||
) -> Option<Vec<PageIdx>> {
|
|
||||||
println!("> Prepare state");
|
println!("> Prepare state");
|
||||||
let mut data = data
|
let mut data = data
|
||||||
.change_page_data(DijkstraPageInfo::from_page_info)
|
.change_page_data(DijkstraPageInfo::from_page_info)
|
||||||
|
|
@ -82,11 +78,7 @@ fn dijkstra(
|
||||||
queue.push(Entry::new(0, from));
|
queue.push(Entry::new(0, from));
|
||||||
|
|
||||||
println!("> Run dijkstra");
|
println!("> Run dijkstra");
|
||||||
while let Some(Entry {
|
while let Some(Entry { cost, page_idx }) = queue.pop() {
|
||||||
cost,
|
|
||||||
idx: page_idx,
|
|
||||||
}) = queue.pop()
|
|
||||||
{
|
|
||||||
if page_idx == to {
|
if page_idx == to {
|
||||||
// We've found the shortest path to our target
|
// We've found the shortest path to our target
|
||||||
break;
|
break;
|
||||||
|
|
@ -104,7 +96,7 @@ fn dijkstra(
|
||||||
|
|
||||||
let next = Entry {
|
let next = Entry {
|
||||||
cost: cost + if redirect { 0 } else { link.data.cost },
|
cost: cost + if redirect { 0 } else { link.data.cost },
|
||||||
idx: link.to,
|
page_idx: link.to,
|
||||||
};
|
};
|
||||||
|
|
||||||
let target_page = data.page_mut(link.to);
|
let target_page = data.page_mut(link.to);
|
||||||
|
|
@ -122,7 +114,7 @@ fn dijkstra(
|
||||||
loop {
|
loop {
|
||||||
steps.push(at);
|
steps.push(at);
|
||||||
at = data.page(at).data.prev;
|
at = data.page(at).data.prev;
|
||||||
if at == PageIdx::MAX {
|
if at == u32::MAX {
|
||||||
break;
|
break;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
@ -152,7 +144,7 @@ pub fn path(datafile: &Path, from: &str, to: &str) -> io::Result<()> {
|
||||||
if let Some(path) = path {
|
if let Some(path) = path {
|
||||||
println!("Path found:");
|
println!("Path found:");
|
||||||
for page_idx in path {
|
for page_idx in path {
|
||||||
let page = &pages[page_idx.0 as usize];
|
let page = &pages[page_idx as usize];
|
||||||
if page.data.redirect {
|
if page.data.redirect {
|
||||||
println!(" v {:?}", page.data.title);
|
println!(" v {:?}", page.data.title);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
|
|
@ -1,20 +1,11 @@
|
||||||
|
use std::ops::Range;
|
||||||
|
|
||||||
use super::info::{LinkInfo, PageInfo};
|
use super::info::{LinkInfo, PageInfo};
|
||||||
|
|
||||||
pub const SENTINEL_PAGE_MARKER: &str = "Q2AKO3OYzyitmCJURghJ";
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
|
||||||
pub struct PageIdx(pub u32);
|
|
||||||
|
|
||||||
impl PageIdx {
|
|
||||||
pub const MAX: PageIdx = PageIdx(u32::MAX);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
|
||||||
pub struct LinkIdx(pub u32);
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
pub struct Page<P> {
|
pub struct Page<P> {
|
||||||
pub start: LinkIdx,
|
/// Index of the first link belonging to this page.
|
||||||
|
pub start: u32,
|
||||||
pub data: P,
|
pub data: P,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -29,7 +20,8 @@ impl<P> Page<P> {
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
pub struct Link<L> {
|
pub struct Link<L> {
|
||||||
pub to: PageIdx,
|
/// Index of the page this link points to.
|
||||||
|
pub to: u32,
|
||||||
pub data: L,
|
pub data: L,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -57,40 +49,57 @@ impl<P, L> Default for AdjacencyList<P, L> {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<P, L> AdjacencyList<P, L> {
|
impl<P, L> AdjacencyList<P, L> {
|
||||||
pub fn page(&self, idx: PageIdx) -> &Page<P> {
|
pub fn push_page(&mut self, data: P) {
|
||||||
&self.pages[idx.0 as usize]
|
self.pages.push(Page {
|
||||||
|
start: self.links.len() as u32,
|
||||||
|
data,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn page_mut(&mut self, idx: PageIdx) -> &mut Page<P> {
|
pub fn push_link(&mut self, to: u32, data: L) {
|
||||||
&mut self.pages[idx.0 as usize]
|
self.links.push(Link { to, data })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn pages_range(&self) -> impl DoubleEndedIterator<Item = PageIdx> {
|
pub fn page(&self, page_idx: u32) -> &Page<P> {
|
||||||
(0..self.pages.len() as u32 - 1).map(PageIdx)
|
&self.pages[page_idx as usize]
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn link_range(&self, idx: PageIdx) -> impl DoubleEndedIterator<Item = LinkIdx> {
|
pub fn page_mut(&mut self, page_idx: u32) -> &mut Page<P> {
|
||||||
let start_idx = self.page(idx).start;
|
&mut self.pages[page_idx as usize]
|
||||||
let end_idx = self.page(PageIdx(idx.0 + 1)).start;
|
|
||||||
(start_idx.0..end_idx.0).map(LinkIdx)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn link_redirect(&self, idx: PageIdx) -> Option<LinkIdx> {
|
pub fn pages(&self) -> impl Iterator<Item = (u32, &Page<P>)> {
|
||||||
let start_idx = self.page(idx).start;
|
self.pages.iter().enumerate().map(|(i, p)| (i as u32, p))
|
||||||
let end_idx = self.page(PageIdx(idx.0 + 1)).start;
|
}
|
||||||
if start_idx == end_idx {
|
|
||||||
|
pub fn link(&self, link_idx: u32) -> &Link<L> {
|
||||||
|
&self.links[link_idx as usize]
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn link_mut(&mut self, link_idx: u32) -> &mut Link<L> {
|
||||||
|
&mut self.links[link_idx as usize]
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn link_range(&self, page_idx: u32) -> Range<u32> {
|
||||||
|
let start_idx = self.pages[page_idx as usize].start;
|
||||||
|
let end_idx = match self.pages.get(page_idx as usize + 1) {
|
||||||
|
Some(page) => page.start,
|
||||||
|
None => self.links.len() as u32,
|
||||||
|
};
|
||||||
|
start_idx..end_idx
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn link_redirect(&self, page_idx: u32) -> Option<u32> {
|
||||||
|
let range = self.link_range(page_idx);
|
||||||
|
if range.is_empty() {
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
Some(start_idx)
|
Some(range.start)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn link(&self, idx: LinkIdx) -> &Link<L> {
|
pub fn links(&self, page_idx: u32) -> impl Iterator<Item = (u32, &Link<L>)> {
|
||||||
&self.links[idx.0 as usize]
|
self.link_range(page_idx).map(|i| (i, self.link(i)))
|
||||||
}
|
|
||||||
|
|
||||||
pub fn link_mut(&mut self, idx: LinkIdx) -> &mut Link<L> {
|
|
||||||
&mut self.links[idx.0 as usize]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn change_page_data<P2>(self, page_f: impl Fn(P) -> P2 + Copy) -> AdjacencyList<P2, L> {
|
pub fn change_page_data<P2>(self, page_f: impl Fn(P) -> P2 + Copy) -> AdjacencyList<P2, L> {
|
||||||
|
|
@ -122,14 +131,6 @@ impl<P, L> AdjacencyList<P, L> {
|
||||||
|
|
||||||
impl AdjacencyList<PageInfo, LinkInfo> {
|
impl AdjacencyList<PageInfo, LinkInfo> {
|
||||||
pub fn check_consistency(&self) {
|
pub fn check_consistency(&self) {
|
||||||
// Check that we have a sentinel page
|
|
||||||
let sentinel = self.pages.last().expect("no sentinel page");
|
|
||||||
assert!(sentinel.data.id == u32::MAX, "unmarked sentinel page");
|
|
||||||
assert!(
|
|
||||||
sentinel.data.title.contains(SENTINEL_PAGE_MARKER),
|
|
||||||
"unmarked sentinel page"
|
|
||||||
);
|
|
||||||
|
|
||||||
// Check that all types are large enough
|
// Check that all types are large enough
|
||||||
assert!(self.pages.len() < u32::MAX as usize, "too many pages");
|
assert!(self.pages.len() < u32::MAX as usize, "too many pages");
|
||||||
assert!(self.links.len() < u32::MAX as usize, "too many links");
|
assert!(self.links.len() < u32::MAX as usize, "too many links");
|
||||||
|
|
@ -142,18 +143,17 @@ impl AdjacencyList<PageInfo, LinkInfo> {
|
||||||
|
|
||||||
// Check that all links contain valid indices. Links must not link to
|
// Check that all links contain valid indices. Links must not link to
|
||||||
// the sentinel page.
|
// the sentinel page.
|
||||||
let range = 0..self.pages.len() as u32 - 1;
|
let range = 0..self.pages.len() as u32;
|
||||||
for link in &self.links {
|
for link in &self.links {
|
||||||
assert!(range.contains(&link.to.0), "invalid link");
|
assert!(range.contains(&link.to), "invalid link");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check that all redirect pages have at most one link
|
// Check that all redirect pages have at most one link
|
||||||
for page_idx in (0..self.pages.len() as u32 - 1).map(PageIdx) {
|
for (page_idx, page) in self.pages.iter().enumerate() {
|
||||||
let page = self.page(page_idx);
|
|
||||||
if page.data.redirect {
|
if page.data.redirect {
|
||||||
let mut range = self.link_range(page_idx);
|
let range = self.link_range(page_idx as u32);
|
||||||
range.next(); // 0 or 1 links allowed
|
let amount = range.end - range.start;
|
||||||
assert!(range.next().is_none(), "too many redirect links");
|
assert!(amount <= 1, "too many redirect links");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
use std::io::{self, Read, Write};
|
use std::io::{self, Read, Write};
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
adjacency_list::{AdjacencyList, Link, LinkIdx, Page, PageIdx},
|
adjacency_list::{AdjacencyList, Link, Page},
|
||||||
info::{LinkInfo, PageInfo},
|
info::{LinkInfo, PageInfo},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -50,7 +50,7 @@ fn read_str<R: Read>(from: &mut R) -> io::Result<String> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_page<W: Write>(page: &Page<PageInfo>, to: &mut W) -> io::Result<()> {
|
fn write_page<W: Write>(page: &Page<PageInfo>, to: &mut W) -> io::Result<()> {
|
||||||
write_u32(page.start.0, to)?;
|
write_u32(page.start, to)?;
|
||||||
write_u32(page.data.id, to)?;
|
write_u32(page.data.id, to)?;
|
||||||
write_u32(page.data.length, to)?;
|
write_u32(page.data.length, to)?;
|
||||||
write_u8(if page.data.redirect { 1 } else { 0 }, to)?;
|
write_u8(if page.data.redirect { 1 } else { 0 }, to)?;
|
||||||
|
|
@ -60,14 +60,14 @@ fn write_page<W: Write>(page: &Page<PageInfo>, to: &mut W) -> io::Result<()> {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn read_page<R: Read>(from: &mut R) -> io::Result<Page<PageInfo>> {
|
pub fn read_page<R: Read>(from: &mut R) -> io::Result<Page<PageInfo>> {
|
||||||
let start = LinkIdx(read_u32(from)?);
|
let start_link_idx = read_u32(from)?;
|
||||||
let id = read_u32(from)?;
|
let id = read_u32(from)?;
|
||||||
let length = read_u32(from)?;
|
let length = read_u32(from)?;
|
||||||
let redirect = read_u8(from)? != 0;
|
let redirect = read_u8(from)? != 0;
|
||||||
let title = read_str(from)?;
|
let title = read_str(from)?;
|
||||||
|
|
||||||
Ok(Page {
|
Ok(Page {
|
||||||
start,
|
start: start_link_idx,
|
||||||
data: PageInfo {
|
data: PageInfo {
|
||||||
id,
|
id,
|
||||||
length,
|
length,
|
||||||
|
|
@ -78,7 +78,7 @@ pub fn read_page<R: Read>(from: &mut R) -> io::Result<Page<PageInfo>> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_link<W: Write>(link: &Link<LinkInfo>, to: &mut W) -> io::Result<()> {
|
fn write_link<W: Write>(link: &Link<LinkInfo>, to: &mut W) -> io::Result<()> {
|
||||||
write_u32(link.to.0, to)?;
|
write_u32(link.to, to)?;
|
||||||
write_u32(link.data.start, to)?;
|
write_u32(link.data.start, to)?;
|
||||||
write_u32(link.data.len, to)?;
|
write_u32(link.data.len, to)?;
|
||||||
write_u8(link.data.flags, to)?;
|
write_u8(link.data.flags, to)?;
|
||||||
|
|
@ -87,13 +87,13 @@ fn write_link<W: Write>(link: &Link<LinkInfo>, to: &mut W) -> io::Result<()> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn read_link<R: Read>(from: &mut R) -> io::Result<Link<LinkInfo>> {
|
fn read_link<R: Read>(from: &mut R) -> io::Result<Link<LinkInfo>> {
|
||||||
let to = PageIdx(read_u32(from)?);
|
let to_page_idx = read_u32(from)?;
|
||||||
let start = read_u32(from)?;
|
let start = read_u32(from)?;
|
||||||
let len = read_u32(from)?;
|
let len = read_u32(from)?;
|
||||||
let flags = read_u8(from)?;
|
let flags = read_u8(from)?;
|
||||||
|
|
||||||
Ok(Link {
|
Ok(Link {
|
||||||
to,
|
to: to_page_idx,
|
||||||
data: LinkInfo { start, len, flags },
|
data: LinkInfo { start, len, flags },
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
use crate::data::{
|
use crate::data::{
|
||||||
adjacency_list::{AdjacencyList, Page, PageIdx},
|
adjacency_list::{AdjacencyList, Page},
|
||||||
info::{LinkInfo, PageInfo},
|
info::{LinkInfo, PageInfo},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -15,21 +15,17 @@ pub fn normalize_link(link: &str) -> String {
|
||||||
.collect::<String>()
|
.collect::<String>()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn find_index_of_title(pages: &[Page<PageInfo>], title: &str) -> PageIdx {
|
pub fn find_index_of_title(pages: &[Page<PageInfo>], title: &str) -> u32 {
|
||||||
let title = normalize_link(title);
|
let title = normalize_link(title);
|
||||||
let idx = pages
|
pages
|
||||||
.iter()
|
.iter()
|
||||||
.enumerate()
|
.enumerate()
|
||||||
.find(|(_, p)| normalize_link(&p.data.title) == title)
|
.find(|(_, p)| normalize_link(&p.data.title) == title)
|
||||||
.map(|(i, _)| i)
|
.map(|(i, _)| i)
|
||||||
.expect("invalid title") as u32;
|
.expect("invalid title") as u32
|
||||||
PageIdx(idx)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn resolve_redirects(
|
pub fn resolve_redirects(data: &AdjacencyList<PageInfo, LinkInfo>, mut page_idx: u32) -> u32 {
|
||||||
data: &AdjacencyList<PageInfo, LinkInfo>,
|
|
||||||
mut page_idx: PageIdx,
|
|
||||||
) -> PageIdx {
|
|
||||||
loop {
|
loop {
|
||||||
if data.page(page_idx).data.redirect {
|
if data.page(page_idx).data.redirect {
|
||||||
if let Some(link_idx) = data.link_redirect(page_idx) {
|
if let Some(link_idx) = data.link_redirect(page_idx) {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue