Last active
February 8, 2016 12:55
-
-
Save taroyabuki/851e417dd187023f8468 to your computer and use it in GitHub Desktop.
(インテルCPUの)三角関数は不要 http://blog.unfindable.net/archives/8991
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| //On Ubuntu, you need to install CUDA and: | |
| //sudo apt-get install libboost-dev | |
| //On Windows, I don't know the way to build this code. | |
| #include <iostream> | |
| #include <iomanip> | |
| #include <cmath> | |
| #include <cuda_runtime.h> | |
| #include <boost/multiprecision/cpp_dec_float.hpp> | |
| using namespace std; | |
| using namespace boost::multiprecision; | |
| typedef number<cpp_dec_float<30> >decimal; | |
| __global__ void mycos(const double *in, double *out, int length) | |
| { | |
| int index = blockDim.x * blockIdx.x + threadIdx.x; | |
| if (index < length) { | |
| out[index] = cos(in[index]); | |
| } | |
| } | |
| int main(void) | |
| { | |
| srand((unsigned) time(NULL)); | |
| int c1BetterCase = 0; | |
| int c2BetterCase = 0; | |
| const int n = 100; | |
| size_t size = n * sizeof(double); | |
| double *in = (double *) malloc(size); | |
| double *out = (double *) malloc(size); | |
| double *d_in = NULL; | |
| cudaMalloc((void **) &d_in, size); | |
| double *d_out = NULL; | |
| cudaMalloc((void **) &d_out, size); | |
| for (int i = 0; i < n; ++i) { | |
| in[i] = rand() / (double) RAND_MAX; | |
| } | |
| cudaMemcpy(d_in, in, size, cudaMemcpyHostToDevice); | |
| mycos<<<16, 16>>>(d_in, d_out, n); | |
| cudaMemcpy(out, d_out, size, cudaMemcpyDeviceToHost); | |
| for (int i = 0; i < n; ++i) { | |
| double x = in[i]; | |
| double c1 = out[i]; | |
| double c2 = 0; | |
| asm("fcos" : "=&t" (c2) : "f" (x)); | |
| decimal c = cos(decimal(x)); | |
| if (c1 != c2) { | |
| decimal delta1 = abs(c - decimal(c1)); | |
| decimal delta2 = abs(c - decimal(c2)); | |
| if (delta1 < delta2) ++c1BetterCase; | |
| else ++c2BetterCase; | |
| cout << "x = " << setprecision(20) << x << endl << | |
| "c1 = cuda cos(x) = " << c1 << endl << | |
| "c2 = fcos(x) = " << c2 << endl << | |
| "c = mp::cos(x) = " << c.str() << endl << setprecision(5) << | |
| "abs(c - c1) = " << delta1 << endl << | |
| "abs(c - c2) = " << delta2 << endl << endl; | |
| } | |
| } | |
| cout << "c1 is better: " << c1BetterCase << endl; | |
| cout << "c2 is better: " << c2BetterCase << endl; | |
| cout << "total: " << n << endl; | |
| cudaFree(d_in); | |
| cudaFree(d_out); | |
| free(in); | |
| free(out); | |
| cudaDeviceReset(); | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment