From b1f2af9577acb623316092dfcf8a468689d18239 Mon Sep 17 00:00:00 2001 From: Joscha Date: Fri, 30 Sep 2022 18:07:50 +0200 Subject: [PATCH] Use simd-json --- brood/.cargo/config.toml | 2 + brood/Cargo.lock | 109 +++++++++++++++++++++++++++++++++++++++ brood/Cargo.toml | 1 + brood/src/ingest.rs | 3 +- 4 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 brood/.cargo/config.toml diff --git a/brood/.cargo/config.toml b/brood/.cargo/config.toml new file mode 100644 index 0000000..ddff440 --- /dev/null +++ b/brood/.cargo/config.toml @@ -0,0 +1,2 @@ +[build] +rustflags = ["-C", "target-cpu=native"] diff --git a/brood/Cargo.lock b/brood/Cargo.lock index 4eaac25..ae98952 100644 --- a/brood/Cargo.lock +++ b/brood/Cargo.lock @@ -2,6 +2,17 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "ahash" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + [[package]] name = "atty" version = "0.2.14" @@ -13,6 +24,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + [[package]] name = "bitflags" version = "1.3.2" @@ -26,8 +43,15 @@ dependencies = [ "clap", "serde", "serde_json", + "simd-json", ] +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + [[package]] name = "clap" version = "4.0.5" @@ -65,6 +89,45 @@ dependencies = [ "os_str_bytes", ] +[[package]] +name = "float-cmp" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4" +dependencies = [ + "num-traits", +] + +[[package]] +name = "getrandom" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eb1a864a501629691edf6c15a593b7a51eebaa1e8468e9ddc623de7c9b58ec6" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "halfbrown" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce69ed202df415a3d4a01e6f3341320ca88b9bd4f0bf37be6fa239cdea06d9bf" +dependencies = [ + "hashbrown", + "serde", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash", +] + [[package]] name = "heck" version = "0.4.0" @@ -92,6 +155,15 @@ version = "0.2.134" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "329c933548736bc49fd575ee68c89e8be4d260064184389a5b77517cddd99ffb" +[[package]] +name = "num-traits" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +dependencies = [ + "autocfg", +] + [[package]] name = "once_cell" version = "1.15.0" @@ -183,6 +255,25 @@ dependencies = [ "serde", ] +[[package]] +name = "simd-json" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9bd78b840b9de64fa3f7d72909b76343849f68e8c3d32608db8d38e4e5481f84" +dependencies = [ + "halfbrown", + "serde", + "serde_json", + "simdutf8", + "value-trait", +] + +[[package]] +name = "simdutf8" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" + [[package]] name = "strsim" version = "0.10.0" @@ -215,12 +306,30 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd" +[[package]] +name = "value-trait" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0a635407649b66e125e4d2ffd208153210179f8c7c8b71c030aa2ad3eeb4c8f" +dependencies = [ + "float-cmp", + "halfbrown", + "itoa", + "ryu", +] + [[package]] name = "version_check" version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + [[package]] name = "winapi" version = "0.3.9" diff --git a/brood/Cargo.toml b/brood/Cargo.toml index 395ce7f..c0ba073 100644 --- a/brood/Cargo.toml +++ b/brood/Cargo.toml @@ -7,3 +7,4 @@ edition = "2021" clap = { version = "4.0.5", features = ["derive"] } serde = { version = "1.0.145", features = ["derive"] } serde_json = "1.0.85" +simd-json = "0.6.0" diff --git a/brood/src/ingest.rs b/brood/src/ingest.rs index f96f84a..80699bc 100644 --- a/brood/src/ingest.rs +++ b/brood/src/ingest.rs @@ -18,7 +18,8 @@ pub fn ingest() -> io::Result<()> { let mut n_links = 0; for line in stdin.lines() { - let json_page = serde_json::from_str::(&line?)?; + // let json_page = serde_json::from_str::(&line?)?; + let json_page = simd_json::serde::from_str::(&mut line?).unwrap(); n_pages += 1; n_links += json_page.links.len();