Skip to content

Instantly share code, notes, and snippets.

@lovasoa
Last active October 25, 2019 09:29
Show Gist options
  • Save lovasoa/1cb72245e871a90b64aa19b7b07d2caa to your computer and use it in GitHub Desktop.
Save lovasoa/1cb72245e871a90b64aa19b7b07d2caa to your computer and use it in GitHub Desktop.
Improvement upon the code presented in "Rust and C++ on Floating-point Intensive Code" (https://www.reidatcheson.com/hpc/architecture/performance/rust/c++/2019/10/19/measure-cache.html)
/*
compile with:
rustc -C target-cpu=native -C opt-level=3 -C inline-threshold=1 -O mul_add.rs
*/
use std::env;
fn main(){
use std::time::{Instant};
let args: Vec<String> = env::args().collect();
let n=args[1].parse::<usize>().unwrap();
let nruns=100;
const CHUNKSIZE : usize = 32;
let mut a : Vec<f64> = vec![0.0;n];
let mut b : Vec<f64> = vec![0.0;n];
let mut c : Vec<f64> = vec![0.0;n];
for i in 0..n{
a[i]=(i as f64).sin().abs()+0.00001;
b[i]=(i as f64).cos();
c[i]=(i as f64).cos();
}
let mut count : usize =0;
let now = Instant::now();
let mut beta_vec : [f64;CHUNKSIZE] = [0.0;CHUNKSIZE];
let mut r_vec : [f64;CHUNKSIZE] = [0.0;CHUNKSIZE];
while now.elapsed().as_secs_f64()<=1.0 {
count+=1;
for _ in 0..nruns{
//Initialize partial reduction arrays
for bv in beta_vec.iter_mut(){ *bv=0.0; }
for rv in (r_vec).iter_mut(){ *rv=0.0; }
//Form iterator over chunks of
//input arrays
let outer_iter =
(&a).chunks_exact(CHUNKSIZE)
.zip( (&b).chunks_exact(CHUNKSIZE))
.zip( (&c).chunks_exact(CHUNKSIZE));
//Get remainder iterator
let outer_iter_remainder =
(&a).chunks_exact(CHUNKSIZE).remainder().iter()
.zip( (&b).chunks_exact(CHUNKSIZE).remainder().iter())
.zip( (&c).chunks_exact(CHUNKSIZE).remainder().iter());
//Loop over all chunks and form partial reductions
for ((avec,bvec),cvec) in outer_iter{
let inner_itter = avec.iter()
.zip(bvec.iter())
.zip(cvec.iter())
.zip(beta_vec.iter_mut())
.zip(r_vec.iter_mut());
for ((((ai,bi),ci),betai),ri) in inner_itter{
let res = ai.mul_add(-bi, *ci);
let ares = ai*res;
*betai = ares.mul_add(ares, *betai);
*ri = res.mul_add(ares, *ri);
}
}
//Form remainder reduction
let mut beta = 0.0;
let mut r = 0.0;
for ((ai,bi),ci) in outer_iter_remainder {
let res = ai.mul_add(-bi, *ci);
let ares = ai*res;
beta = ares.mul_add(ares, beta);
r = res.mul_add(ares, r);
}
//Loop over partial reductions to form final reduction
beta += beta_vec.iter().fold(0.0,|acc,x| acc+x);
r += r_vec.iter().fold(0.0,|acc,x| acc+x);
let rinvbeta = r/beta;
for ((ai,bi),ci) in (&a).iter().zip(b.iter_mut()).zip(&c) {
let tmp = bi.mul_add(-ai, *ci);
*bi = rinvbeta.mul_add(tmp, *bi);
}
}
}
println!("Normalized Average time = {}",now.elapsed().as_secs_f64()/((count as f64)*(n as f64)*(nruns as f64)));
let sumb : f64 = b.iter().sum();
println!("sumb={}",sumb);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment