Last active
December 9, 2015 19:24
-
-
Save sqamara/ca5a5af7e6da992e7e94 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/a.out |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <cmath> | |
#include <cassert> | |
#include <iostream> | |
#include <limits> | |
#include <algorithm> // for max | |
#include "gammq.h" | |
// Gamma function and incomplete gamma function from mbpol | |
//////////////////////////////////////////////////////////////////////////////// | |
namespace { | |
//////////////////////////////////////////////////////////////////////////////// | |
const double EPS = std::numeric_limits<double>::epsilon(); | |
const double FPMIN = std::numeric_limits<double>::min()/EPS; | |
const int ngau = 18; | |
const double y[18] = {0.0021695375159141994, | |
0.011413521097787704,0.027972308950302116,0.051727015600492421, | |
0.082502225484340941, 0.12007019910960293,0.16415283300752470, | |
0.21442376986779355, 0.27051082840644336, 0.33199876341447887, | |
0.39843234186401943, 0.46931971407375483, 0.54413605556657973, | |
0.62232745288031077, 0.70331500465597174, 0.78649910768313447, | |
0.87126389619061517, 0.95698180152629142}; | |
const double w[18] = {0.0055657196642445571, | |
0.012915947284065419,0.020181515297735382,0.027298621498568734, | |
0.034213810770299537,0.040875750923643261,0.047235083490265582, | |
0.053244713977759692,0.058860144245324798,0.064039797355015485, | |
0.068745323835736408,0.072941885005653087,0.076598410645870640, | |
0.079687828912071670,0.082187266704339706,0.084078218979661945, | |
0.085346685739338721,0.085983275670394821}; | |
//////////////////////////////////////////////////////////////////////////////// | |
double gammpapprox(const double& a, const double& x, int psig) | |
{ | |
const double gln = ttm::gammln(a); | |
const double a1 = a-1.0; | |
const double lna1 = std::log(a1); | |
const double sqrta1 = std::sqrt(a1); | |
double xu, t, sum, ans; | |
if (x > a1) | |
xu = std::max(a1 + 11.5*sqrta1, x + 6.0*sqrta1); | |
else | |
xu = std::max(0.0, std::min(a1 - 7.5*sqrta1, x - 5.0*sqrta1)); | |
sum = 0.0; | |
for (int j = 0; j < ngau; ++j) { | |
t = x + (xu - x)*y[j]; | |
sum += w[j]*std::exp(-(t - a1) + a1*(std::log(t) - lna1)); | |
} | |
ans = sum*(xu - x)*std::exp(a1*(lna1 - 1.0) - gln); | |
return (psig ? (ans > 0.0 ? 1.0 - ans : -ans) | |
: (ans >= 0.0 ? ans : 1.0 + ans)); | |
} | |
//////////////////////////////////////////////////////////////////////////////// | |
double gser(const double& a, const double& x) | |
{ | |
const double gln = ttm::gammln(a); | |
double ap = a; | |
double sum = 1.0/a; | |
double del = sum; | |
for (;;) { | |
++ap; | |
del *= x/ap; | |
sum += del; | |
if (std::fabs(del) < std::fabs(sum)*EPS) { | |
return sum*std::exp(- x + a*std::log(x) - gln); | |
} | |
} | |
} | |
//////////////////////////////////////////////////////////////////////////////// | |
double gcf(const double& a, const double& x) | |
{ | |
const double gln = ttm::gammln(a); | |
double b = x + 1.0 - a; | |
double c = 1.0/FPMIN; | |
double d = 1.0/b; | |
double h = d; | |
for (int i = 1;; ++i) { | |
const double an = -i*(i - a); | |
b += 2.0; | |
d = an*d + b; | |
if (std::fabs(d) < FPMIN) | |
d = FPMIN; | |
c = b + an/c; | |
if (std::fabs(c) < FPMIN) | |
c = FPMIN; | |
d = 1.0/d; | |
const double del = d*c; | |
h *= del; | |
if (std::fabs(del - 1.0) <= EPS) | |
break; | |
} | |
return std::exp( - x + a*std::log(x) - gln)*h; | |
} | |
//////////////////////////////////////////////////////////////////////////////// | |
} // namespace | |
//////////////////////////////////////////////////////////////////////////////// | |
namespace ttm { | |
//////////////////////////////////////////////////////////////////////////////// | |
double gammq(const double& a, const double& x) | |
{ | |
const int ASWITCH = 100; | |
if (!(x >= 0.0 && a > 0.0)) { | |
std::cerr << "gammq: x = " << x << ", a = " << a << std::endl; | |
} | |
assert(x >= 0.0 && a > 0.0); | |
if (x == 0.0) | |
return 1.0; | |
else if (int(a) >= ASWITCH) | |
return gammpapprox(a, x, 0); | |
else if (x < a + 1.0) | |
return 1.0 - gser(a,x); | |
else | |
return gcf(a,x); | |
} | |
//////////////////////////////////////////////////////////////////////////////// | |
double gammln(const double& xx) | |
{ | |
static const double cof[14] = {57.1562356658629235,-59.5979603554754912, | |
14.1360979747417471,-0.491913816097620199,.339946499848118887e-4, | |
.465236289270485756e-4,-.983744753048795646e-4,.158088703224912494e-3, | |
-.210264441724104883e-3,.217439618115212643e-3,-.164318106536763890e-3, | |
.844182239838527433e-4,-.261908384015814087e-4,.368991826595316234e-5}; | |
assert(xx > 0.0); | |
double x = xx; | |
double y = xx; | |
double tmp = x + 5.24218750000000000; | |
tmp = (x + 0.5)*std::log(tmp) - tmp; | |
double ser = 0.999999999999997092; | |
for (int j = 0; j < 14; ++j) | |
ser += cof[j]/++y; | |
return tmp + std::log(2.5066282746310005*ser/x); | |
} | |
//////////////////////////////////////////////////////////////////////////////// | |
} // namespace ttm | |
//////////////////////////////////////////////////////////////////////////////// |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <cmath> | |
#include <cassert> | |
#include <iostream> | |
#include <limits> | |
#include <algorithm> // for max | |
#include "gammq_cuda.h" | |
#include <cstdio> | |
// Gamma function and incomplete gamma function from mbpol | |
//////////////////////////////////////////////////////////////////////////////// | |
namespace { | |
//////////////////////////////////////////////////////////////////////////////// | |
__device__ const double EPS = 2.2204460492503131E-16; //std::numeric_limits<double>::epsilon(); | |
__device__ const double FPMIN = 2.2250738585072014e-308/EPS; //std::numeric_limits<double>::min()/EPS; | |
const int ngau = 18; | |
__device__ const double y[18] = {0.0021695375159141994, | |
0.011413521097787704,0.027972308950302116,0.051727015600492421, | |
0.082502225484340941, 0.12007019910960293,0.16415283300752470, | |
0.21442376986779355, 0.27051082840644336, 0.33199876341447887, | |
0.39843234186401943, 0.46931971407375483, 0.54413605556657973, | |
0.62232745288031077, 0.70331500465597174, 0.78649910768313447, | |
0.87126389619061517, 0.95698180152629142}; | |
__device__ const double w[18] = {0.0055657196642445571, | |
0.012915947284065419,0.020181515297735382,0.027298621498568734, | |
0.034213810770299537,0.040875750923643261,0.047235083490265582, | |
0.053244713977759692,0.058860144245324798,0.064039797355015485, | |
0.068745323835736408,0.072941885005653087,0.076598410645870640, | |
0.079687828912071670,0.082187266704339706,0.084078218979661945, | |
0.085346685739338721,0.085983275670394821}; | |
//////////////////////////////////////////////////////////////////////////////// | |
extern "C" __device__ void gammln_device(const double& xx, double& retval){ | |
const double cof[14] = {57.1562356658629235,-59.5979603554754912, | |
14.1360979747417471,-0.491913816097620199,.339946499848118887e-4, | |
.465236289270485756e-4,-.983744753048795646e-4,.158088703224912494e-3, | |
-.210264441724104883e-3,.217439618115212643e-3,-.164318106536763890e-3, | |
.844182239838527433e-4,-.261908384015814087e-4,.368991826595316234e-5}; | |
assert(xx > 0.0); | |
double x = xx; | |
double y = xx; | |
double tmp = x + 5.24218750000000000; | |
tmp = (x + 0.5)*std::log(tmp) - tmp; | |
double ser = 0.999999999999997092; | |
for (int j = 0; j < 14; ++j) | |
ser += cof[j]/++y; | |
retval = tmp + std::log(2.5066282746310005*ser/x); | |
} | |
/////////////////////////////////////////////////////////////////////////////// | |
extern "C" __device__ double gammpapprox(const double& a, const double& x, int psig) | |
{ | |
double gln; | |
gammln_device(a, gln); | |
const double a1 = a-1.0; | |
const double lna1 = log(a1); | |
const double sqrta1 = sqrt(a1); | |
double xu, t, sum, ans; | |
if (x > a1) | |
xu = fmax(a1 + 11.5*sqrta1, x + 6.0*sqrta1); | |
else | |
xu = fmax(0.0, fmin(a1 - 7.5*sqrta1, x - 5.0*sqrta1)); | |
sum = 0.0; | |
for (int j = 0; j < ngau; ++j) { | |
t = x + (xu - x)*y[j]; | |
sum += w[j]*std::exp(-(t - a1) + a1*(std::log(t) - lna1)); | |
} | |
ans = sum*(xu - x)*std::exp(a1*(lna1 - 1.0) - gln); | |
return (psig ? (ans > 0.0 ? 1.0 - ans : -ans) | |
: (ans >= 0.0 ? ans : 1.0 + ans)); | |
} | |
//////////////////////////////////////////////////////////////////////////////// | |
extern "C" __device__ double gser(const double& a, const double& x) { | |
double gln; | |
gammln_device(a, gln); | |
double ap = a; | |
double sum = 1.0/a; | |
double del = sum; | |
for (;;) { | |
++ap; | |
del *= x/ap; | |
sum += del; | |
if (fabs(del) < fabs(sum)*EPS) { | |
return sum*exp(- x + a*log(x) - gln); | |
} | |
} | |
} | |
//////////////////////////////////////////////////////////////////////////////// | |
extern "C" __device__ double gcf(const double& a, const double& x) { | |
double gln; | |
gammln_device(a, gln); | |
double b = x + 1.0 - a; | |
double c = 1.0/FPMIN; | |
double d = 1.0/b; | |
double h = d; | |
for (int i = 1;; ++i) { | |
const double an = -i*(i - a); | |
b += 2.0; | |
d = an*d + b; | |
if (fabs(d) < FPMIN) | |
d = FPMIN; | |
c = b + an/c; | |
if (fabs(c) < FPMIN) | |
c = FPMIN; | |
d = 1.0/d; | |
const double del = d*c; | |
h *= del; | |
if (fabs(del - 1.0) <= EPS) | |
break; | |
} | |
return exp( - x + a*std::log(x) - gln)*h; | |
} | |
//////////////////////////////////////////////////////////////////////////////// | |
} // namespace | |
//////////////////////////////////////////////////////////////////////////////// | |
namespace ttm_cuda { | |
//////////////////////////////////////////////////////////////////////////////// | |
extern "C" __global__ void gammq(const double* __restrict__ a, const double* __restrict__ x, double* __restrict__ retval) { | |
const int ASWITCH = 100; | |
if (!(*x >= 0.0 && *a > 0.0)) { | |
printf("gammq: x = %lf, a = %lf\n", *x, *a); | |
} | |
assert(*x >= 0.0 && *a > 0.0); | |
if (*x == 0.0) | |
*retval = 1.0; | |
else if (int(*a) >= ASWITCH) | |
*retval = gammpapprox(*a, *x, 0); | |
else if (*x < *a + 1.0) | |
*retval = 1.0 - gser(*a,*x); | |
else | |
*retval = gcf(*a,*x); | |
} | |
//////////////////////////////////////////////////////////////////////////////// | |
extern "C" __global__ void gammln(const double* xx, double* retval){ | |
gammln_device(*xx, *retval); | |
} | |
//////////////////////////////////////////////////////////////////////////////// | |
void launch_gammq(const double* a, const double* x, double* retval) { | |
gammq<<<1,1>>>(a, x, retval); | |
cudaDeviceSynchronize(); | |
} | |
} // namespace ttm | |
//////////////////////////////////////////////////////////////////////////////// |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef GAMMQ_H | |
#define GAMMQ_H | |
namespace ttm { | |
double gammq(const double&, const double&); | |
double gammln(const double&); | |
} // namespace ttm | |
#endif // GAMMQ_H | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef GAMMQCUDA_H | |
#define GAMMQCUDA_H | |
#include <cuda.h> | |
#include <cuda_runtime_api.h> | |
namespace ttm_cuda { | |
extern "C" __global__ void gammq(const double*, const double*, double*); | |
extern "C" __global__ void gammln(const double*, double*); | |
void launch_gammq(const double* a, const double* x, double* retval); | |
} // namespace ttm | |
#endif // GAMMQ_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <cstdio> | |
#include "gammq.h" | |
#include "gammq_cuda.h" | |
#include <cuda.h> | |
#include <cuda_runtime_api.h> | |
void compare_CPU_and_CUDA(double a, double x) { | |
printf("a = %lf, x = %lf\n", a, x); | |
printf("CPU gammq(%lf, %lf) = %lf\n", a, x, ttm::gammq(a, x)); | |
double result_cuda; | |
double *a_device, *x_device, *result_device; | |
cudaMalloc((void**)&a_device, sizeof(double)); | |
cudaMalloc((void**)&x_device, sizeof(double)); | |
cudaMalloc((void**)&result_device, sizeof(double)); | |
cudaMemcpy(a_device, &a, sizeof(double), cudaMemcpyHostToDevice); | |
cudaMemcpy(x_device, &x, sizeof(double), cudaMemcpyHostToDevice); | |
cudaMemcpy(result_device, &result_cuda, sizeof(double),cudaMemcpyHostToDevice); | |
ttm_cuda::launch_gammq(a_device, x_device, result_device); | |
cudaMemcpy(&result_cuda, result_device, sizeof(double), cudaMemcpyDeviceToHost); | |
cudaFree(a_device); | |
cudaFree(x_device); | |
cudaFree(result_device); | |
printf("CUDA gammq(%lf, %lf) = %lf\n\n", a, x, result_cuda); | |
} | |
void test_each_case() { | |
printf("Testing case if (x == 0.0)\n"); | |
compare_CPU_and_CUDA(1,0); | |
printf("Testing case else if (int(a) >= ASWITCH)\n"); | |
compare_CPU_and_CUDA(100,1); | |
printf("Testing case else if (x < a + 1.0)\n"); | |
compare_CPU_and_CUDA(1,1); | |
printf("Testing case else\n"); | |
compare_CPU_and_CUDA(1,2); | |
} | |
int main() { | |
test_each_case(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment