Use simd-json

This commit is contained in:
Joscha 2022-09-30 18:07:50 +02:00
parent c195fbb8d4
commit b1f2af9577
4 changed files with 114 additions and 1 deletions

2
brood/.cargo/config.toml Normal file
View file

@ -0,0 +1,2 @@
[build]
rustflags = ["-C", "target-cpu=native"]

109
brood/Cargo.lock generated
View file

@ -2,6 +2,17 @@
# It is not intended for manual editing. # It is not intended for manual editing.
version = 3 version = 3
[[package]]
name = "ahash"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
dependencies = [
"getrandom",
"once_cell",
"version_check",
]
[[package]] [[package]]
name = "atty" name = "atty"
version = "0.2.14" version = "0.2.14"
@ -13,6 +24,12 @@ dependencies = [
"winapi", "winapi",
] ]
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]] [[package]]
name = "bitflags" name = "bitflags"
version = "1.3.2" version = "1.3.2"
@ -26,8 +43,15 @@ dependencies = [
"clap", "clap",
"serde", "serde",
"serde_json", "serde_json",
"simd-json",
] ]
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]] [[package]]
name = "clap" name = "clap"
version = "4.0.5" version = "4.0.5"
@ -65,6 +89,45 @@ dependencies = [
"os_str_bytes", "os_str_bytes",
] ]
[[package]]
name = "float-cmp"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4"
dependencies = [
"num-traits",
]
[[package]]
name = "getrandom"
version = "0.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4eb1a864a501629691edf6c15a593b7a51eebaa1e8468e9ddc623de7c9b58ec6"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "halfbrown"
version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce69ed202df415a3d4a01e6f3341320ca88b9bd4f0bf37be6fa239cdea06d9bf"
dependencies = [
"hashbrown",
"serde",
]
[[package]]
name = "hashbrown"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
dependencies = [
"ahash",
]
[[package]] [[package]]
name = "heck" name = "heck"
version = "0.4.0" version = "0.4.0"
@ -92,6 +155,15 @@ version = "0.2.134"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "329c933548736bc49fd575ee68c89e8be4d260064184389a5b77517cddd99ffb" checksum = "329c933548736bc49fd575ee68c89e8be4d260064184389a5b77517cddd99ffb"
[[package]]
name = "num-traits"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
dependencies = [
"autocfg",
]
[[package]] [[package]]
name = "once_cell" name = "once_cell"
version = "1.15.0" version = "1.15.0"
@ -183,6 +255,25 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "simd-json"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9bd78b840b9de64fa3f7d72909b76343849f68e8c3d32608db8d38e4e5481f84"
dependencies = [
"halfbrown",
"serde",
"serde_json",
"simdutf8",
"value-trait",
]
[[package]]
name = "simdutf8"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a"
[[package]] [[package]]
name = "strsim" name = "strsim"
version = "0.10.0" version = "0.10.0"
@ -215,12 +306,30 @@ version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd" checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd"
[[package]]
name = "value-trait"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0a635407649b66e125e4d2ffd208153210179f8c7c8b71c030aa2ad3eeb4c8f"
dependencies = [
"float-cmp",
"halfbrown",
"itoa",
"ryu",
]
[[package]] [[package]]
name = "version_check" name = "version_check"
version = "0.9.4" version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]] [[package]]
name = "winapi" name = "winapi"
version = "0.3.9" version = "0.3.9"

View file

@ -7,3 +7,4 @@ edition = "2021"
clap = { version = "4.0.5", features = ["derive"] } clap = { version = "4.0.5", features = ["derive"] }
serde = { version = "1.0.145", features = ["derive"] } serde = { version = "1.0.145", features = ["derive"] }
serde_json = "1.0.85" serde_json = "1.0.85"
simd-json = "0.6.0"

View file

@ -18,7 +18,8 @@ pub fn ingest() -> io::Result<()> {
let mut n_links = 0; let mut n_links = 0;
for line in stdin.lines() { for line in stdin.lines() {
let json_page = serde_json::from_str::<JsonPage>(&line?)?; // let json_page = serde_json::from_str::<JsonPage>(&line?)?;
let json_page = simd_json::serde::from_str::<JsonPage>(&mut line?).unwrap();
n_pages += 1; n_pages += 1;
n_links += json_page.links.len(); n_links += json_page.links.len();