Last active
February 3, 2023 15:08
-
-
Save YuriGor/5e0dc379a340e5d13b53c5501892ab14 to your computer and use it in GitHub Desktop.
Vec vs HashMap read performance (ObjectId keys + rustc_hash)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[package] | |
name = "indexed_hash_map" | |
version = "0.1.0" | |
edition = "2021" | |
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | |
[dependencies] | |
mongodb = "2.3.0" | |
rustc-hash = "1.1.0" | |
rand = "0.8.5" | |
criterion = "0.4.0" | |
[profile.dev] | |
opt-level = 0 | |
[profile.release] | |
opt-level = 3 | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use core::cell::RefCell; | |
use mongodb::bson::oid; | |
use rand::prelude::*; | |
use rustc_hash::FxHashMap; | |
use std::rc::Rc; | |
// use std::hint::black_box; | |
use criterion::black_box; | |
use std::time::Instant; | |
pub struct Item { | |
pub id: oid::ObjectId, | |
pub idx: usize, | |
pub out_id: Vec<oid::ObjectId>, | |
pub out_idx: Vec<usize>, | |
} | |
pub struct Edge { | |
pub id: oid::ObjectId, | |
pub idx: usize, | |
pub from_id: oid::ObjectId, | |
pub from_idx: usize, | |
pub to_id: oid::ObjectId, | |
pub to_idx: usize, | |
} | |
pub struct ItemRef { | |
pub id: oid::ObjectId, | |
pub idx: usize, | |
pub out: Vec<Rc<RefCell<EdgeRef>>>, | |
} | |
pub struct EdgeRef { | |
pub id: oid::ObjectId, | |
pub from_idx: usize, | |
pub from: Rc<RefCell<ItemRef>>, | |
pub to_idx: usize, | |
pub to: Rc<RefCell<ItemRef>>, | |
} | |
const TOTAL: usize = 3000; | |
const NODE_EDGES: usize = 3; | |
const READS: usize = 1 + NODE_EDGES * 2; | |
fn main() { | |
hash(); | |
vec(); | |
cell(); | |
} | |
fn hash() { | |
let mut item_map = FxHashMap::default(); // HashMap with rustc_hash, should be fast? | |
let mut edge_map = FxHashMap::default(); // HashMap with rustc_hash, should be fast? | |
let mut item_keys = Vec::new(); // remember used keys | |
let mut rng = rand::thread_rng(); | |
for idx in 0..TOTAL { | |
// generate 1K records same way | |
let item = Item { | |
id: oid::ObjectId::new(), | |
idx, | |
out_idx: Vec::new(), | |
out_id: Vec::new(), | |
}; | |
item_keys.push(item.id); // push rec to map this time | |
item_map.insert(item.id, item); // remember key used | |
} | |
let mut ridx = 0; | |
for idx in 0..TOTAL { | |
// generate total random edges | |
for _ in 0..NODE_EDGES { | |
let from_idx = idx; | |
let to_idx = rng.gen_range(0..TOTAL); | |
let from_id = &item_keys[from_idx]; | |
let to_id = &item_keys[to_idx]; | |
let edge = Edge { | |
id: oid::ObjectId::new(), // new random UUID | |
idx: ridx, // index | |
from_idx: item_map[from_id].idx, | |
to_idx: item_map[to_id].idx, | |
from_id: item_map[from_id].id, | |
to_id: item_map[to_id].id, | |
}; | |
let from = item_map.get_mut(from_id).unwrap(); | |
from.out_id.push(edge.id); | |
from.out_idx.push(edge.idx); | |
edge_map.insert(edge.id, edge); | |
ridx += 1; | |
} | |
} | |
let now = Instant::now(); | |
for _ in 0..TOTAL { | |
// total runs x total records = 1M reads | |
for idx in &item_keys { | |
let item = &item_map[&idx]; | |
for ri in 0..NODE_EDGES { | |
black_box(&item_map[&edge_map[&item.out_id[ri]].to_id]); // just to not let compiler skip this code | |
} | |
} | |
} | |
let elapsed = now.elapsed(); | |
println!( | |
"{:.2?}M HashMap (rustc_hash) read time: {:.2?}", | |
READS * TOTAL * TOTAL / 1000000, | |
elapsed | |
); | |
} | |
fn vec() { | |
let mut item_vec = Vec::new(); // items registry | |
let mut edge_vec = Vec::new(); // items registry | |
let mut item_indexes = Vec::new(); // list of used item indexes (just to iterate over Vec of key when benching same as for HashMap) | |
let mut rng = rand::thread_rng(); | |
for idx in 0..TOTAL { | |
// generate total records | |
let item = Item { | |
id: oid::ObjectId::new(), // new random UUID | |
idx, // index | |
out_id: Vec::new(), | |
out_idx: Vec::new(), | |
}; | |
item_vec.push(item); //push to Vec as main storage | |
item_indexes.push(idx); // remember indexes used | |
} | |
let mut ridx = 0; | |
for idx in 0..TOTAL { | |
// generate total random edges | |
for _ in 0..NODE_EDGES { | |
let from_idx = idx; | |
let to_idx = rng.gen_range(0..TOTAL); | |
let edge = Edge { | |
id: oid::ObjectId::new(), // new random UUID | |
idx: ridx, // index | |
from_idx, | |
to_idx, | |
from_id: item_vec[from_idx].id, | |
to_id: item_vec[to_idx].id, | |
}; | |
item_vec[from_idx].out_id.push(edge.id); | |
item_vec[from_idx].out_idx.push(edge.idx); | |
edge_vec.push(edge); | |
ridx += 1; | |
} | |
} | |
let now = Instant::now(); | |
item_indexes.shuffle(&mut rng); | |
for _ in 0..TOTAL { | |
// total runs x total records = 1M reads | |
for idx in &item_indexes { | |
let item = &item_vec[*idx]; // noob here, not sure if doing nice here with ref/deref | |
for ri in 0..NODE_EDGES { | |
black_box(&item_vec[edge_vec[item.out_idx[ri]].to_idx]); // just to not let compiler skip this code | |
} | |
} | |
} | |
let elapsed = now.elapsed(); | |
println!( | |
"{:.2?}M Vec read time: {:.2?}", | |
READS * TOTAL * TOTAL / 1000000, | |
elapsed | |
); | |
} | |
fn cell() { | |
let mut item_vec = Vec::new(); | |
let mut edge_vec = Vec::new(); // HashMap with rustc_hash, should be fast? | |
let mut item_indexes = Vec::new(); // remember used keys | |
let mut rng = rand::thread_rng(); | |
for idx in 0..TOTAL { | |
// generate 1K records same way | |
let item = Rc::new(RefCell::new(ItemRef { | |
id: oid::ObjectId::new(), | |
idx, | |
out: Vec::new(), | |
})); | |
item_indexes.push(item.borrow().idx); // push rec to map this time | |
item_vec.push(item); // remember key used | |
} | |
for idx in 0..TOTAL { | |
// generate total random edges | |
for _ in 0..NODE_EDGES { | |
let from_idx = idx; | |
let to_idx = rng.gen_range(0..TOTAL); | |
let edge = Rc::new(RefCell::new(EdgeRef { | |
id: oid::ObjectId::new(), // new random UUID | |
from_idx, | |
to_idx, | |
from: item_vec[from_idx].clone(), | |
to: item_vec[to_idx].clone(), | |
})); | |
edge.borrow_mut().from.borrow_mut().out.push(edge.clone()); | |
edge_vec.push(edge); | |
} | |
} | |
let now = Instant::now(); | |
for _ in 0..TOTAL { | |
// total runs x total records = 1M reads | |
for idx in &item_indexes { | |
let item = &item_vec[*idx]; | |
for ri in 0..NODE_EDGES { | |
black_box(&item.borrow().out[ri].borrow().to); // just to not let compiler skip this code | |
} | |
} | |
} | |
let elapsed = now.elapsed(); | |
println!( | |
"{:.2?}M Vec/Rc/RefCell read time: {:.2?}", | |
READS * TOTAL * TOTAL / 1000000, | |
elapsed | |
); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
63M HashMap (rustc_hash) read time: 22,645s | |
63M Vec read time: 103,53ms | |
63M Vec/Rc/RefCell read time: 65.88ms |
This exact case is ultimately simple and isolated, so my approach is good enough.
That's bold claim.
You can't guarante that it would give same results on each execution.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I've uploaded this as example
https://github.com/Mart-Bogdan-TMP/html-parser-benchmark/blob/main/rust-parsers/benches/my_benchmark.rs
But I don't know perhaps it's to complex example. You could try examples from documtation of criterion from link of my original comment