Speed up ingest using rustc_hash
An enwiki ingest went from ca. 6:50 minutes down to ca. 7:00 minutes. Oh wait... This was not a rigorous test, but rustc_hash doesn't seem to have a significant positive impact. Maybe I'm just holding it wrong, but right now I'd rather remove it again and have simpler code/deps.
This commit is contained in:
parent
eb631250d7
commit
e04215802e
1 changed files with 5 additions and 4 deletions
|
|
@ -1,10 +1,11 @@
|
||||||
use std::{
|
use std::{
|
||||||
collections::{hash_map::Entry, HashMap},
|
collections::hash_map::Entry,
|
||||||
fs::File,
|
fs::File,
|
||||||
io::{self, BufRead, BufReader, Seek},
|
io::{self, BufRead, BufReader, Seek},
|
||||||
path::{Path, PathBuf},
|
path::{Path, PathBuf},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use rustc_hash::FxHashMap;
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use thousands::Separable;
|
use thousands::Separable;
|
||||||
|
|
||||||
|
|
@ -48,9 +49,9 @@ fn read_titles(r: &mut BufReader<File>) -> io::Result<Vec<String>> {
|
||||||
fn compute_title_lookup(
|
fn compute_title_lookup(
|
||||||
normalizer: &TitleNormalizer,
|
normalizer: &TitleNormalizer,
|
||||||
titles: &[String],
|
titles: &[String],
|
||||||
) -> HashMap<String, (u32, u32)> {
|
) -> FxHashMap<String, (u32, u32)> {
|
||||||
let mut counter = Counter::new();
|
let mut counter = Counter::new();
|
||||||
let mut title_lookup = HashMap::<String, (u32, u32)>::new();
|
let mut title_lookup = FxHashMap::<String, (u32, u32)>::default();
|
||||||
|
|
||||||
for (sift_i, title) in titles.iter().enumerate() {
|
for (sift_i, title) in titles.iter().enumerate() {
|
||||||
counter.tick();
|
counter.tick();
|
||||||
|
|
@ -85,7 +86,7 @@ fn compute_title_lookup(
|
||||||
|
|
||||||
fn read_page_data(
|
fn read_page_data(
|
||||||
normalizer: &TitleNormalizer,
|
normalizer: &TitleNormalizer,
|
||||||
title_lookup: &HashMap<String, (u32, u32)>,
|
title_lookup: &FxHashMap<String, (u32, u32)>,
|
||||||
r: &mut BufReader<File>,
|
r: &mut BufReader<File>,
|
||||||
) -> io::Result<(Vec<Page>, Vec<Link>, Graph)> {
|
) -> io::Result<(Vec<Page>, Vec<Link>, Graph)> {
|
||||||
let mut counter = Counter::new();
|
let mut counter = Counter::new();
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue