Try out faster hash algorithm

This commit is contained in:
Joscha 2022-09-30 19:02:57 +02:00
parent 5e8589f73e
commit 11c4ff699f
3 changed files with 13 additions and 5 deletions

7
brood/Cargo.lock generated
View file

@ -41,6 +41,7 @@ name = "brood"
version = "0.1.0"
dependencies = [
"clap",
"rustc-hash",
"serde",
"serde_json",
"simd-json",
@ -218,6 +219,12 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "rustc-hash"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "ryu"
version = "1.0.11"

View file

@ -5,6 +5,7 @@ edition = "2021"
[dependencies]
clap = { version = "4.0.5", features = ["derive"] }
rustc-hash = "1.1.0"
serde = { version = "1.0.145", features = ["derive"] }
serde_json = "1.0.85"
simd-json = "0.6.0"

View file

@ -1,7 +1,7 @@
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::io::{self, BufRead, BufReader};
use rustc_hash::FxHashMap;
use serde::Deserialize;
use crate::data::{Link, Page};
@ -44,23 +44,23 @@ struct FirstStage {
/// The first entry with id 0 represents a nonexistent link.
pages: Vec<Page>,
/// Map from title to index in [`Self::pages`] (used during the second pass).
pages_map: HashMap<String, u32>,
pages_map: FxHashMap<String, u32>,
/// List with link info and index into [`Self::titles`].
links: Vec<Link>,
/// List with titles.
titles: Vec<String>,
/// Map from title to index in [`Self::titles`] (used during decoding).
titles_map: HashMap<String, u32>,
titles_map: FxHashMap<String, u32>,
}
impl FirstStage {
fn new() -> Self {
let mut result = Self {
pages: vec![],
pages_map: HashMap::new(),
pages_map: FxHashMap::default(),
links: vec![],
titles: vec![],
titles_map: HashMap::new(),
titles_map: FxHashMap::default(),
};
result.push_page(0, 0, "this link does not exist".to_string(), false);
result