Created
July 6, 2015 02:51
-
-
Save kaityo256/f86d969faadead05a14f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #include <assert.h> | |
| #include <cuda.h> | |
| //---------------------------------------------------------------------- | |
| const int N = 32768; | |
| const double L = 10.0; | |
| const int D = 3; | |
| const int X = 0; | |
| const int Y = 1; | |
| const int Z = 2; | |
| double q[N][D],p[N][D]; | |
| const double dt = 0.001; | |
| const double C2 = 0.1; | |
| //---------------------------------------------------------------------- | |
| double | |
| myrand(void){ | |
| return static_cast<double>(rand())/(static_cast<double>(RAND_MAX)); | |
| } | |
| //---------------------------------------------------------------------- | |
| void | |
| calcforce(void){ | |
| for(int i=0;i<N-1;i++){ | |
| for(int j=i+1;j<N;j++){ | |
| double dx = q[j][X] - q[i][X]; | |
| double dy = q[j][Y] - q[i][Y]; | |
| double dz = q[j][Z] - q[i][Z]; | |
| double r2 = (dx*dx + dy*dy + dz*dz); | |
| double r6 = r2*r2*r2; | |
| double df = ((24.0*r6-48.0)/(r6*r6*r2)+C2*8.0)*dt; | |
| p[i][X] += df*dx; | |
| p[i][Y] += df*dy; | |
| p[i][Z] += df*dz; | |
| p[j][X] -= df*dx; | |
| p[j][Y] -= df*dy; | |
| p[j][Z] -= df*dz; | |
| } | |
| } | |
| } | |
| //---------------------------------------------------------------------- | |
| __global__ void | |
| calcforce_kernel_cuda(double *d_p, double *d_q){ | |
| int i = blockIdx.x * blockDim.x + threadIdx.x; | |
| for(int j=0;j<N;j++){ | |
| if (i==j)continue; | |
| double dx = d_q[j*3+X] - d_q[i*3+X]; | |
| double dy = d_q[j*3+Y] - d_q[i*3+Y]; | |
| double dz = d_q[j*3+Z] - d_q[i*3+Z]; | |
| double r2 = (dx*dx + dy*dy + dz*dz); | |
| double r6 = r2*r2*r2; | |
| double df = ((24.0*r6-48.0)/(r6*r6*r2)+C2*8.0)*dt; | |
| d_p[i*3+X] += df*dx; | |
| d_p[i*3+Y] += df*dy; | |
| d_p[i*3+Z] += df*dz; | |
| } | |
| } | |
| //---------------------------------------------------------------------- | |
| void | |
| calcforce_kernel(int i){ | |
| for(int j=0;j<N;j++){ | |
| if (i==j)continue; | |
| double dx = q[j][X] - q[i][X]; | |
| double dy = q[j][Y] - q[i][Y]; | |
| double dz = q[j][Z] - q[i][Z]; | |
| double r2 = (dx*dx + dy*dy + dz*dz); | |
| double r6 = r2*r2*r2; | |
| double df = ((24.0*r6-48.0)/(r6*r6*r2)+C2*8.0)*dt; | |
| p[i][X] += df*dx; | |
| p[i][Y] += df*dy; | |
| p[i][Z] += df*dz; | |
| } | |
| } | |
| //---------------------------------------------------------------------- | |
| void | |
| calcforce_cuda(void){ | |
| double *d_p, *d_q; | |
| size_t size = N*sizeof(double)*3; | |
| cudaMalloc((void**)&d_p,size); | |
| cudaMalloc((void**)&d_q,size); | |
| cudaMemcpy(d_p,p,size,cudaMemcpyHostToDevice); | |
| cudaMemcpy(d_q,q,size,cudaMemcpyHostToDevice); | |
| const int n_thread = 32; | |
| const int n_block = 1024; | |
| //const int n_thread = 512; | |
| //const int n_block = 64; | |
| assert(n_thread*n_block == N); | |
| calcforce_kernel_cuda<<<n_block,n_thread>>>(d_p,d_q); | |
| cudaMemcpy(p,d_p,size,cudaMemcpyDeviceToHost); | |
| cudaMemcpy(q,d_q,size,cudaMemcpyDeviceToHost); | |
| cudaThreadSynchronize(); | |
| cudaFree(d_p); | |
| cudaFree(d_q); | |
| } | |
| //---------------------------------------------------------------------- | |
| void | |
| calcforce_n2(void){ | |
| for(int i=0;i<N;i++){ | |
| for(int j=0;j<N;j++){ | |
| if (i==j)continue; | |
| double dx = q[j][X] - q[i][X]; | |
| double dy = q[j][Y] - q[i][Y]; | |
| double dz = q[j][Z] - q[i][Z]; | |
| double r2 = (dx*dx + dy*dy + dz*dz); | |
| double r6 = r2*r2*r2; | |
| double df = ((24.0*r6-48.0)/(r6*r6*r2)+C2*8.0)*dt; | |
| p[i][X] += df*dx; | |
| p[i][Y] += df*dy; | |
| p[i][Z] += df*dz; | |
| } | |
| } | |
| } | |
| //---------------------------------------------------------------------- | |
| int | |
| main(void){ | |
| cudaSetDeviceFlags(cudaDeviceMapHost); | |
| for(int i=0;i<N;i++){ | |
| p[i][X] = 1.0; | |
| p[i][Y] = 1.0; | |
| p[i][Z] = 1.0; | |
| q[i][X] = L*myrand(); | |
| q[i][Y] = L*myrand(); | |
| q[i][Z] = L*myrand(); | |
| } | |
| calcforce_cuda(); | |
| } | |
| //---------------------------------------------------------------------- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment