Created
June 30, 2022 03:48
-
-
Save mooreniemi/fd26554f9b574899e35baac2b462d27f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[package] | |
name = "mem" | |
version = "0.1.0" | |
edition = "2021" | |
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | |
[dependencies] | |
rayon = "1.5" | |
indicatif = {version = "0.16", features = ["rayon"]} | |
ndarray = { version = "0.15", features = ["rayon", "serde", "blas"] } | |
blas-src = { version = "*", default-features = false, features = ["openblas"] } | |
openblas-src = { version = "0.6.1", default-features = false, features = ["cblas"] } | |
ndarray-npy = "0.8" | |
ndarray-rand = "0.14" | |
memmap2 = "0.5" | |
bincode = "1.3" | |
serde_bytes = "0.11.6" | |
rkyv = { version = "0.7", features = ["validation"] } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
extern crate blas_src; | |
use rayon::prelude::*; | |
use memmap2::MmapMut; | |
use std::{fs::OpenOptions, io::Write, time::Instant}; | |
use ndarray::Array1; | |
use ndarray_rand::{rand_distr::Uniform, RandomExt}; | |
fn main() -> Result<(), &'static str> { | |
let d = 256; | |
let n = 10_000; | |
let x = Array1::random(d, Uniform::<f32>::new(0., 1.)); | |
let mut ys = Vec::new(); | |
let mut encoded_ys = Vec::new(); | |
let mut encoded_size = 0; | |
let mut encoded_size_total = 0; | |
for _i in 0..n { | |
let y = Array1::random(d, Uniform::<f32>::new(0., 1.)); | |
let encoded: Vec<u8> = bincode::serialize(&y).expect("ser"); | |
let s = encoded.len(); | |
encoded_ys.push(encoded); | |
encoded_size_total += s; | |
encoded_size = s; | |
ys.push(y); | |
} | |
println!( | |
"encoded_size: {}, encoded_size_total: {}", | |
encoded_size, encoded_size_total | |
); | |
let file = OpenOptions::new() | |
.read(true) | |
.write(true) | |
.create(true) | |
.open(&"/tmp/embeds") | |
.expect("opened file"); | |
file.set_len(encoded_size_total as u64) | |
.expect("set file len"); | |
let mut mmap = unsafe { MmapMut::map_mut(&file).expect("mmap'd file") }; | |
for (i, ey) in encoded_ys.iter().enumerate() { | |
let start = encoded_size * i; | |
let end = encoded_size * (i + 1); | |
// dbg!(&start, &end); | |
(&mut mmap[start..end]) | |
.write_all(ey.as_slice()) | |
.expect("one entry to mmap"); | |
} | |
mmap.flush().expect("mmap flush"); | |
mmap.advise(memmap2::Advice::Sequential) | |
.expect("mmap advise"); | |
println!("in memory decode"); | |
let start = Instant::now(); | |
for (_i, ey) in encoded_ys.iter().enumerate() { | |
let _y: Array1<f32> = bincode::deserialize(&ey).expect("de"); | |
} | |
println!("took: {:?}", start.elapsed()); | |
println!("in memory dot"); | |
let start = Instant::now(); | |
for y in ys.clone() { | |
let _res: f32 = x.dot(&y); | |
} | |
println!("took: {:?}", start.elapsed()); | |
println!("in memmap (read only)"); | |
let start = Instant::now(); | |
for i in 0..n { | |
let start = encoded_size * i; | |
let end = encoded_size * (i + 1); | |
let _b = &mmap[start..end]; | |
} | |
println!("took: {:?}", start.elapsed()); | |
println!("in memmap (read+decode)"); | |
let start = Instant::now(); | |
for i in 0..n { | |
let start = encoded_size * i; | |
let end = encoded_size * (i + 1); | |
let _y: Array1<f32> = bincode::deserialize(&mmap[start..end]).expect("de"); | |
} | |
println!("took: {:?}", start.elapsed()); | |
println!("in memmap dot"); | |
let start = Instant::now(); | |
for i in 0..n { | |
let start = encoded_size * i; | |
let end = encoded_size * (i + 1); | |
let y: Array1<f32> = bincode::deserialize(&mmap[start..end]).expect("de"); | |
let _res: f32 = x.dot(&y); | |
} | |
println!("took: {:?}", start.elapsed()); | |
let file = OpenOptions::new() | |
.read(true) | |
.write(true) | |
.create(true) | |
.open(&"/tmp/embeds_by_3") | |
.expect("opened file"); | |
let mut mmap = unsafe { MmapMut::map_mut(&file).expect("mmap'd file") }; | |
let mut chunked_ys = Vec::new(); | |
let mut encoded_size = 0; | |
let mut encoded_size_total = 0; | |
let mut total = 0; | |
for (_i, cy) in ys.chunks(2).enumerate() { | |
let ey: Vec<u8> = bincode::serialize(&cy).expect("ser"); | |
let s = ey.len(); | |
encoded_size = s; | |
encoded_size_total += s; | |
chunked_ys.push(ey); | |
total += 1; | |
} | |
file.set_len(encoded_size_total as u64) | |
.expect("set file len"); | |
for (i, cy) in chunked_ys.iter().enumerate() { | |
let start = encoded_size * i; | |
let end = encoded_size * (i + 1); | |
// dbg!(&i, &start, &end); | |
(&mut mmap[start..end]) | |
.write_all(cy.as_slice()) | |
.expect("one chunked entry to mmap"); | |
} | |
mmap.flush().expect("mmap flush"); | |
mmap.advise(memmap2::Advice::Sequential) | |
.expect("mmap advise"); | |
println!("in chunked memmap read (only)"); | |
let start = Instant::now(); | |
// not n because we chunked | |
for i in 0..total { | |
let start = encoded_size * i; | |
let end = encoded_size * (i + 1); | |
let _b = &mmap[start..end]; | |
} | |
println!("took: {:?}", start.elapsed()); | |
println!("in chunked memmap read+decode"); | |
let start = Instant::now(); | |
// not n because we chunked | |
for i in 0..total { | |
let start = encoded_size * i; | |
let end = encoded_size * (i + 1); | |
let _ys: Vec<Array1<f32>> = bincode::deserialize(&mmap[start..end]).expect("de"); | |
} | |
println!("took: {:?}", start.elapsed()); | |
println!("in chunked memmap dot"); | |
let start = Instant::now(); | |
// not n because we chunked | |
for i in 0..total { | |
let start = encoded_size * i; | |
let end = encoded_size * (i + 1); | |
let ys: Vec<Array1<f32>> = bincode::deserialize(&mmap[start..end]).expect("de"); | |
for y in ys { | |
let _res: f32 = x.dot(&y); | |
} | |
} | |
println!("took: {:?}", start.elapsed()); | |
println!("in par_iter chunked memmap dot"); | |
let start = Instant::now(); | |
// not n because we chunked | |
for i in 0..total { | |
let start = encoded_size * i; | |
let end = encoded_size * (i + 1); | |
let ys: Vec<Array1<f32>> = bincode::deserialize(&mmap[start..end]).expect("de"); | |
ys.into_par_iter().for_each(|y| { | |
let _res: f32 = x.dot(&y); | |
}); | |
} | |
println!("took: {:?}", start.elapsed()); | |
Ok(()) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
To my surprise, from the naive above,
bincode
>dot
>mmap
in terms of performance impact.