lgarrison · September 29, 2019 20:50
diff --git a/corrfunc_bug_gh193.c b/corrfunc_bug_gh193.c
 /*

 This is a "minimal" reproducer for an apparent GCC-7 bug related to AVX-512.
 It has a 100% reproduction rate on my machine (Cascade Lake).

 A simple loop that copies the values from a double array to a float array
 sometimes gives the wrong answer in the first few elements (zero, nan, or garbage).

 The bug seems correlated with alignment values of the input and output
 arrays, but it's not a one-to-one relationship.

 There's a lot of "unnecessary" code below, but the bug is very sensitive to
 its presence (regardless of whether it is executed).

 Compile with:

 $ gcc-7 -mavx512f -O3 -fPIC corrfunc_bug_gh193.c -o corrfunc_bug_gh193

 The bug goes away with -fvect-cost-model=cheap, or with removing any of -mavx512f, -O3, or -fPIC.
 It does not occur with -O2 or -mavx[2].

 The bug does not occur with GCC 6 or 8 for me, just 7. (Tested: 6.5, 8.3, 7.3, 7.4)

 This bug was uncovered as the root cause of a bug in the Corrfunc package.
 Original bug report here: https://github.com/manodeep/Corrfunc/issues/193

 When I run this code, I get the following output:

 $ ./corrfunc_bug_gh193
 loaded r (offset 32 from 64 bytes): 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
 loaded r_float (offset 32 from 64 bytes): 0.000000 0.748047 1.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
 corrfunc_bug_gh193: corrfunc_bug_gh193.c:103: check_bug: Assertion `r_float[i] == 1. && "Bug was triggered!"' failed.
 Aborted (core dumped)

 Author: Lehman Garrison (lgarrison.github.io)

 */

 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <inttypes.h>
 #include <assert.h>

 #define DOUBLE float

 int setup_array(double *rmax,int *n, double **r)
 {
    *rmax = 0;
    *n = 20;  // 20 or larger seeems to trigger the bug
    *r = malloc(sizeof(double)* *n + 1);
    for(int i = 0; i < *n; i++)
        (*r)[i] = 1.;
    return 0;
 }

 void check_bug(int verbose, int binning_flags, int *bin_refine_factors)
 {
    double *r=NULL;
    int n ;
    double rmax;
    setup_array(&rmax,&n,&r);

    DOUBLE xmin, xmax, zmin, zmax;
    xmin = zmin = 1.f;
    xmax = zmax = -1.f;

    const DOUBLE xdiff = xmax-xmin;
    const DOUBLE zdiff = zmax-zmin;
    const DOUBLE pimax = (DOUBLE) rmax;
    if(verbose) {
        fprintf(stderr,"%f, %f, %f",xmin,xmax,xdiff);
    }
    if(binning_flags) {
        if(pimax < 0.05*zdiff) {
            *bin_refine_factors = 1;
        }
    }

    // Print alignment offset from 64 and values
    fprintf(stderr, "loaded r (offset %" PRId64 " from 64 bytes): ", ((uintptr_t)(void *)r) % 64);
    for(int64_t i=0;i<n;i++) {
        fprintf(stderr, "%f ", r[i]);
    }
    fprintf(stderr, "\n");

    DOUBLE r_float[n];

    // THIS LOOP SOMETIMES GIVES BAD VALUES
    for(int i=0; i < n; i++) {
        r_float[i] = r[i];
    }

    // Print alignment offset from 64 and values
    fprintf(stderr, "loaded r_float (offset %" PRId64 " from 64 bytes): ", ((uintptr_t)(void *)r_float) % 64);
    for(int64_t i=0;i<n;i++) {
        fprintf(stderr, "%f ", r_float[i]);
    }
    fprintf(stderr, "\n");

    // Check the answer
    for(int i = 0; i < n; i++)
        assert(r_float[i] == 1. && "Bug was triggered!");
    printf("No bug.\n");
 }

 int main(void){
    int brf;

    check_bug(0, 1, &brf);

    return 0;
 }
	/*

	This is a "minimal" reproducer for an apparent GCC-7 bug related to AVX-512.
	It has a 100% reproduction rate on my machine (Cascade Lake).

	A simple loop that copies the values from a double array to a float array
	sometimes gives the wrong answer in the first few elements (zero, nan, or garbage).

	The bug seems correlated with alignment values of the input and output
	arrays, but it's not a one-to-one relationship.

	There's a lot of "unnecessary" code below, but the bug is very sensitive to
	its presence (regardless of whether it is executed).

	Compile with:

	$ gcc-7 -mavx512f -O3 -fPIC corrfunc_bug_gh193.c -o corrfunc_bug_gh193

	The bug goes away with -fvect-cost-model=cheap, or with removing any of -mavx512f, -O3, or -fPIC.
	It does not occur with -O2 or -mavx[2].

	The bug does not occur with GCC 6 or 8 for me, just 7. (Tested: 6.5, 8.3, 7.3, 7.4)

	This bug was uncovered as the root cause of a bug in the Corrfunc package.
	Original bug report here: https://github.com/manodeep/Corrfunc/issues/193

	When I run this code, I get the following output:

	$ ./corrfunc_bug_gh193
	loaded r (offset 32 from 64 bytes): 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
	loaded r_float (offset 32 from 64 bytes): 0.000000 0.748047 1.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
	corrfunc_bug_gh193: corrfunc_bug_gh193.c:103: check_bug: Assertion `r_float[i] == 1. && "Bug was triggered!"' failed.
	Aborted (core dumped)

	Author: Lehman Garrison (lgarrison.github.io)

	*/

	#include <stdio.h>
	#include <stdlib.h>
	#include <stdint.h>
	#include <inttypes.h>
	#include <assert.h>

	#define DOUBLE float

	int setup_array(double rmax,int n, double **r)
	{
	*rmax = 0;
	*n = 20; // 20 or larger seeems to trigger the bug
	r = malloc(sizeof(double) *n + 1);
	for(int i = 0; i < *n; i++)
	(*r)[i] = 1.;
	return 0;
	}

	void check_bug(int verbose, int binning_flags, int *bin_refine_factors)
	{
	double *r=NULL;
	int n ;
	double rmax;
	setup_array(&rmax,&n,&r);

	DOUBLE xmin, xmax, zmin, zmax;
	xmin = zmin = 1.f;
	xmax = zmax = -1.f;

	const DOUBLE xdiff = xmax-xmin;
	const DOUBLE zdiff = zmax-zmin;
	const DOUBLE pimax = (DOUBLE) rmax;
	if(verbose) {
	fprintf(stderr,"%f, %f, %f",xmin,xmax,xdiff);
	}
	if(binning_flags) {
	if(pimax < 0.05*zdiff) {
	*bin_refine_factors = 1;
	}
	}

	// Print alignment offset from 64 and values
	fprintf(stderr, "loaded r (offset %" PRId64 " from 64 bytes): ", ((uintptr_t)(void *)r) % 64);
	for(int64_t i=0;i<n;i++) {
	fprintf(stderr, "%f ", r[i]);
	}
	fprintf(stderr, "\n");

	DOUBLE r_float[n];

	// THIS LOOP SOMETIMES GIVES BAD VALUES
	for(int i=0; i < n; i++) {
	r_float[i] = r[i];
	}

	// Print alignment offset from 64 and values
	fprintf(stderr, "loaded r_float (offset %" PRId64 " from 64 bytes): ", ((uintptr_t)(void *)r_float) % 64);
	for(int64_t i=0;i<n;i++) {
	fprintf(stderr, "%f ", r_float[i]);
	}
	fprintf(stderr, "\n");

	// Check the answer
	for(int i = 0; i < n; i++)
	assert(r_float[i] == 1. && "Bug was triggered!");
	printf("No bug.\n");
	}

	int main(void){
	int brf;

	check_bug(0, 1, &brf);

	return 0;
	}