lovasoa · October 25, 2019 09:29
diff --git a/mul_add.rs b/mul_add.rs
 /*
 compile with: 
 rustc -C target-cpu=native -C opt-level=3 -C inline-threshold=1 -O mul_add.rs
 */
 use std::env;
 fn main(){
    use std::time::{Instant};

    let args: Vec<String> = env::args().collect();
    let n=args[1].parse::<usize>().unwrap();
    let nruns=100;
    const CHUNKSIZE : usize  = 32;

    let mut a : Vec<f64> = vec![0.0;n];
    let mut b : Vec<f64> = vec![0.0;n];
    let mut c : Vec<f64> = vec![0.0;n];
    for i in 0..n{
        a[i]=(i as f64).sin().abs()+0.00001;
        b[i]=(i as f64).cos();
        c[i]=(i as f64).cos();
    }

    let mut count : usize =0;
    let now = Instant::now();

    let mut beta_vec : [f64;CHUNKSIZE] = [0.0;CHUNKSIZE];
    let mut r_vec : [f64;CHUNKSIZE] = [0.0;CHUNKSIZE];
    while now.elapsed().as_secs_f64()<=1.0 {
        count+=1;
        for _ in 0..nruns{

            //Initialize partial reduction arrays
            for bv in beta_vec.iter_mut(){ *bv=0.0; }
            for rv in (r_vec).iter_mut(){ *rv=0.0; }

            //Form iterator over chunks of 
            //input arrays
            let outer_iter = 
                (&a).chunks_exact(CHUNKSIZE)
                .zip( (&b).chunks_exact(CHUNKSIZE))
                .zip( (&c).chunks_exact(CHUNKSIZE));
            //Get remainder iterator
            let outer_iter_remainder = 
                (&a).chunks_exact(CHUNKSIZE).remainder().iter()
                .zip( (&b).chunks_exact(CHUNKSIZE).remainder().iter())
                .zip( (&c).chunks_exact(CHUNKSIZE).remainder().iter());



            //Loop over all chunks and form partial reductions
            for ((avec,bvec),cvec) in outer_iter{
                let inner_itter = avec.iter()
                    .zip(bvec.iter())
                    .zip(cvec.iter())
                    .zip(beta_vec.iter_mut())
                    .zip(r_vec.iter_mut());

                for ((((ai,bi),ci),betai),ri) in inner_itter{                    
                    let res = ai.mul_add(-bi, *ci);
                    let ares = ai*res;
                    *betai = ares.mul_add(ares, *betai);
                    *ri = res.mul_add(ares, *ri);
                }
            }
            //Form remainder reduction
            let mut beta = 0.0;
            let mut r = 0.0;
            for ((ai,bi),ci) in outer_iter_remainder {
                let res = ai.mul_add(-bi, *ci);
                let ares = ai*res;
                beta = ares.mul_add(ares, beta);
                r = res.mul_add(ares, r);
            }
            //Loop over partial reductions to form final reduction
            beta += beta_vec.iter().fold(0.0,|acc,x| acc+x);
            r += r_vec.iter().fold(0.0,|acc,x| acc+x);

            let rinvbeta = r/beta;

            for ((ai,bi),ci) in (&a).iter().zip(b.iter_mut()).zip(&c) {
                let tmp = bi.mul_add(-ai, *ci);
                *bi = rinvbeta.mul_add(tmp, *bi);
            }
        }
    }
    println!("Normalized Average time = {}",now.elapsed().as_secs_f64()/((count as f64)*(n as f64)*(nruns as f64)));

    let sumb : f64 = b.iter().sum();

    println!("sumb={}",sumb);



 }
	/*
	compile with:
	rustc -C target-cpu=native -C opt-level=3 -C inline-threshold=1 -O mul_add.rs
	*/
	use std::env;
	fn main(){
	use std::time::{Instant};

	let args: Vec<String> = env::args().collect();
	let n=args[1].parse::<usize>().unwrap();
	let nruns=100;
	const CHUNKSIZE : usize = 32;

	let mut a : Vec<f64> = vec![0.0;n];
	let mut b : Vec<f64> = vec![0.0;n];
	let mut c : Vec<f64> = vec![0.0;n];
	for i in 0..n{
	a[i]=(i as f64).sin().abs()+0.00001;
	b[i]=(i as f64).cos();
	c[i]=(i as f64).cos();
	}

	let mut count : usize =0;
	let now = Instant::now();

	let mut beta_vec : [f64;CHUNKSIZE] = [0.0;CHUNKSIZE];
	let mut r_vec : [f64;CHUNKSIZE] = [0.0;CHUNKSIZE];
	while now.elapsed().as_secs_f64()<=1.0 {
	count+=1;
	for _ in 0..nruns{

	//Initialize partial reduction arrays
	for bv in beta_vec.iter_mut(){ *bv=0.0; }
	for rv in (r_vec).iter_mut(){ *rv=0.0; }

	//Form iterator over chunks of
	//input arrays
	let outer_iter =
	(&a).chunks_exact(CHUNKSIZE)
	.zip( (&b).chunks_exact(CHUNKSIZE))
	.zip( (&c).chunks_exact(CHUNKSIZE));
	//Get remainder iterator
	let outer_iter_remainder =
	(&a).chunks_exact(CHUNKSIZE).remainder().iter()
	.zip( (&b).chunks_exact(CHUNKSIZE).remainder().iter())
	.zip( (&c).chunks_exact(CHUNKSIZE).remainder().iter());



	//Loop over all chunks and form partial reductions
	for ((avec,bvec),cvec) in outer_iter{
	let inner_itter = avec.iter()
	.zip(bvec.iter())
	.zip(cvec.iter())
	.zip(beta_vec.iter_mut())
	.zip(r_vec.iter_mut());

	for ((((ai,bi),ci),betai),ri) in inner_itter{
	let res = ai.mul_add(-bi, *ci);
	let ares = ai*res;
	betai = ares.mul_add(ares, betai);
	ri = res.mul_add(ares, ri);
	}
	}
	//Form remainder reduction
	let mut beta = 0.0;
	let mut r = 0.0;
	for ((ai,bi),ci) in outer_iter_remainder {
	let res = ai.mul_add(-bi, *ci);
	let ares = ai*res;
	beta = ares.mul_add(ares, beta);
	r = res.mul_add(ares, r);
	}
	//Loop over partial reductions to form final reduction
	beta += beta_vec.iter().fold(0.0,\|acc,x\| acc+x);
	r += r_vec.iter().fold(0.0,\|acc,x\| acc+x);

	let rinvbeta = r/beta;

	for ((ai,bi),ci) in (&a).iter().zip(b.iter_mut()).zip(&c) {
	let tmp = bi.mul_add(-ai, *ci);
	bi = rinvbeta.mul_add(tmp, bi);
	}
	}
	}
	println!("Normalized Average time = {}",now.elapsed().as_secs_f64()/((count as f64)(n as f64)(nruns as f64)));

	let sumb : f64 = b.iter().sum();

	println!("sumb={}",sumb);



	}