Last active
October 25, 2019 09:29
-
-
Save lovasoa/1cb72245e871a90b64aa19b7b07d2caa to your computer and use it in GitHub Desktop.
Improvement upon the code presented in "Rust and C++ on Floating-point Intensive Code" (https://www.reidatcheson.com/hpc/architecture/performance/rust/c++/2019/10/19/measure-cache.html)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
compile with: | |
rustc -C target-cpu=native -C opt-level=3 -C inline-threshold=1 -O mul_add.rs | |
*/ | |
use std::env; | |
fn main(){ | |
use std::time::{Instant}; | |
let args: Vec<String> = env::args().collect(); | |
let n=args[1].parse::<usize>().unwrap(); | |
let nruns=100; | |
const CHUNKSIZE : usize = 32; | |
let mut a : Vec<f64> = vec![0.0;n]; | |
let mut b : Vec<f64> = vec![0.0;n]; | |
let mut c : Vec<f64> = vec![0.0;n]; | |
for i in 0..n{ | |
a[i]=(i as f64).sin().abs()+0.00001; | |
b[i]=(i as f64).cos(); | |
c[i]=(i as f64).cos(); | |
} | |
let mut count : usize =0; | |
let now = Instant::now(); | |
let mut beta_vec : [f64;CHUNKSIZE] = [0.0;CHUNKSIZE]; | |
let mut r_vec : [f64;CHUNKSIZE] = [0.0;CHUNKSIZE]; | |
while now.elapsed().as_secs_f64()<=1.0 { | |
count+=1; | |
for _ in 0..nruns{ | |
//Initialize partial reduction arrays | |
for bv in beta_vec.iter_mut(){ *bv=0.0; } | |
for rv in (r_vec).iter_mut(){ *rv=0.0; } | |
//Form iterator over chunks of | |
//input arrays | |
let outer_iter = | |
(&a).chunks_exact(CHUNKSIZE) | |
.zip( (&b).chunks_exact(CHUNKSIZE)) | |
.zip( (&c).chunks_exact(CHUNKSIZE)); | |
//Get remainder iterator | |
let outer_iter_remainder = | |
(&a).chunks_exact(CHUNKSIZE).remainder().iter() | |
.zip( (&b).chunks_exact(CHUNKSIZE).remainder().iter()) | |
.zip( (&c).chunks_exact(CHUNKSIZE).remainder().iter()); | |
//Loop over all chunks and form partial reductions | |
for ((avec,bvec),cvec) in outer_iter{ | |
let inner_itter = avec.iter() | |
.zip(bvec.iter()) | |
.zip(cvec.iter()) | |
.zip(beta_vec.iter_mut()) | |
.zip(r_vec.iter_mut()); | |
for ((((ai,bi),ci),betai),ri) in inner_itter{ | |
let res = ai.mul_add(-bi, *ci); | |
let ares = ai*res; | |
*betai = ares.mul_add(ares, *betai); | |
*ri = res.mul_add(ares, *ri); | |
} | |
} | |
//Form remainder reduction | |
let mut beta = 0.0; | |
let mut r = 0.0; | |
for ((ai,bi),ci) in outer_iter_remainder { | |
let res = ai.mul_add(-bi, *ci); | |
let ares = ai*res; | |
beta = ares.mul_add(ares, beta); | |
r = res.mul_add(ares, r); | |
} | |
//Loop over partial reductions to form final reduction | |
beta += beta_vec.iter().fold(0.0,|acc,x| acc+x); | |
r += r_vec.iter().fold(0.0,|acc,x| acc+x); | |
let rinvbeta = r/beta; | |
for ((ai,bi),ci) in (&a).iter().zip(b.iter_mut()).zip(&c) { | |
let tmp = bi.mul_add(-ai, *ci); | |
*bi = rinvbeta.mul_add(tmp, *bi); | |
} | |
} | |
} | |
println!("Normalized Average time = {}",now.elapsed().as_secs_f64()/((count as f64)*(n as f64)*(nruns as f64))); | |
let sumb : f64 = b.iter().sum(); | |
println!("sumb={}",sumb); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment