Created
September 29, 2019 20:50
-
-
Save lgarrison/5ebb747d3f1126319fd3744f925298e3 to your computer and use it in GitHub Desktop.
GCC Bug (Corrfunc #193)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
This is a "minimal" reproducer for an apparent GCC-7 bug related to AVX-512. | |
It has a 100% reproduction rate on my machine (Cascade Lake). | |
A simple loop that copies the values from a double array to a float array | |
sometimes gives the wrong answer in the first few elements (zero, nan, or garbage). | |
The bug seems correlated with alignment values of the input and output | |
arrays, but it's not a one-to-one relationship. | |
There's a lot of "unnecessary" code below, but the bug is very sensitive to | |
its presence (regardless of whether it is executed). | |
Compile with: | |
$ gcc-7 -mavx512f -O3 -fPIC corrfunc_bug_gh193.c -o corrfunc_bug_gh193 | |
The bug goes away with -fvect-cost-model=cheap, or with removing any of -mavx512f, -O3, or -fPIC. | |
It does not occur with -O2 or -mavx[2]. | |
The bug does not occur with GCC 6 or 8 for me, just 7. (Tested: 6.5, 8.3, 7.3, 7.4) | |
This bug was uncovered as the root cause of a bug in the Corrfunc package. | |
Original bug report here: https://github.com/manodeep/Corrfunc/issues/193 | |
When I run this code, I get the following output: | |
$ ./corrfunc_bug_gh193 | |
loaded r (offset 32 from 64 bytes): 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 | |
loaded r_float (offset 32 from 64 bytes): 0.000000 0.748047 1.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 | |
corrfunc_bug_gh193: corrfunc_bug_gh193.c:103: check_bug: Assertion `r_float[i] == 1. && "Bug was triggered!"' failed. | |
Aborted (core dumped) | |
Author: Lehman Garrison (lgarrison.github.io) | |
*/ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <stdint.h> | |
#include <inttypes.h> | |
#include <assert.h> | |
#define DOUBLE float | |
int setup_array(double *rmax,int *n, double **r) | |
{ | |
*rmax = 0; | |
*n = 20; // 20 or larger seeems to trigger the bug | |
*r = malloc(sizeof(double)* *n + 1); | |
for(int i = 0; i < *n; i++) | |
(*r)[i] = 1.; | |
return 0; | |
} | |
void check_bug(int verbose, int binning_flags, int *bin_refine_factors) | |
{ | |
double *r=NULL; | |
int n ; | |
double rmax; | |
setup_array(&rmax,&n,&r); | |
DOUBLE xmin, xmax, zmin, zmax; | |
xmin = zmin = 1.f; | |
xmax = zmax = -1.f; | |
const DOUBLE xdiff = xmax-xmin; | |
const DOUBLE zdiff = zmax-zmin; | |
const DOUBLE pimax = (DOUBLE) rmax; | |
if(verbose) { | |
fprintf(stderr,"%f, %f, %f",xmin,xmax,xdiff); | |
} | |
if(binning_flags) { | |
if(pimax < 0.05*zdiff) { | |
*bin_refine_factors = 1; | |
} | |
} | |
// Print alignment offset from 64 and values | |
fprintf(stderr, "loaded r (offset %" PRId64 " from 64 bytes): ", ((uintptr_t)(void *)r) % 64); | |
for(int64_t i=0;i<n;i++) { | |
fprintf(stderr, "%f ", r[i]); | |
} | |
fprintf(stderr, "\n"); | |
DOUBLE r_float[n]; | |
// THIS LOOP SOMETIMES GIVES BAD VALUES | |
for(int i=0; i < n; i++) { | |
r_float[i] = r[i]; | |
} | |
// Print alignment offset from 64 and values | |
fprintf(stderr, "loaded r_float (offset %" PRId64 " from 64 bytes): ", ((uintptr_t)(void *)r_float) % 64); | |
for(int64_t i=0;i<n;i++) { | |
fprintf(stderr, "%f ", r_float[i]); | |
} | |
fprintf(stderr, "\n"); | |
// Check the answer | |
for(int i = 0; i < n; i++) | |
assert(r_float[i] == 1. && "Bug was triggered!"); | |
printf("No bug.\n"); | |
} | |
int main(void){ | |
int brf; | |
check_bug(0, 1, &brf); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment