Skip to content

Instantly share code, notes, and snippets.

@kaityo256
Created July 6, 2015 03:02
Show Gist options
  • Save kaityo256/282ff99fd5f0dfe9825a to your computer and use it in GitHub Desktop.
Save kaityo256/282ff99fd5f0dfe9825a to your computer and use it in GitHub Desktop.
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <cuda.h>
//----------------------------------------------------------------------
const int N = 32768;
const double L = 10.0;
const int D = 3;
const int X = 0;
const int Y = 1;
const int Z = 2;
double q[N][D],p[N][D];
const double dt = 0.001;
const double C2 = 0.1;
//----------------------------------------------------------------------
double
myrand(void){
return static_cast<double>(rand())/(static_cast<double>(RAND_MAX));
}
//----------------------------------------------------------------------
void
calcforce(void){
for(int i=0;i<N-1;i++){
for(int j=i+1;j<N;j++){
double dx = q[j][X] - q[i][X];
double dy = q[j][Y] - q[i][Y];
double dz = q[j][Z] - q[i][Z];
double r2 = (dx*dx + dy*dy + dz*dz);
double r6 = r2*r2*r2;
double df = ((24.0*r6-48.0)/(r6*r6*r2)+C2*8.0)*dt;
p[i][X] += df*dx;
p[i][Y] += df*dy;
p[i][Z] += df*dz;
p[j][X] -= df*dx;
p[j][Y] -= df*dy;
p[j][Z] -= df*dz;
}
}
}
//----------------------------------------------------------------------
__global__ void
calcforce_kernel_cuda(double *d_p, double *d_q){
int i = blockIdx.x * blockDim.x + threadIdx.x;
for(int j=0;j<N;j++){
//if (i==j)continue;
double dx = d_q[j*3+X] - d_q[i*3+X];
double dy = d_q[j*3+Y] - d_q[i*3+Y];
double dz = d_q[j*3+Z] - d_q[i*3+Z];
double r2 = (dx*dx + dy*dy + dz*dz);
double r6 = r2*r2*r2;
double df = ((24.0*r6-48.0)/(r6*r6*r2)+C2*8.0)*dt;
if (i==j) df=0.0;
d_p[i*3+X] += df*dx;
d_p[i*3+Y] += df*dy;
d_p[i*3+Z] += df*dz;
}
}
//----------------------------------------------------------------------
void
calcforce_kernel(int i){
for(int j=0;j<N;j++){
if (i==j)continue;
double dx = q[j][X] - q[i][X];
double dy = q[j][Y] - q[i][Y];
double dz = q[j][Z] - q[i][Z];
double r2 = (dx*dx + dy*dy + dz*dz);
double r6 = r2*r2*r2;
double df = ((24.0*r6-48.0)/(r6*r6*r2)+C2*8.0)*dt;
p[i][X] += df*dx;
p[i][Y] += df*dy;
p[i][Z] += df*dz;
}
}
//----------------------------------------------------------------------
void
calcforce_cuda(void){
double *d_p, *d_q;
size_t size = N*sizeof(double)*3;
cudaMalloc((void**)&d_p,size);
cudaMalloc((void**)&d_q,size);
cudaMemcpy(d_p,p,size,cudaMemcpyHostToDevice);
cudaMemcpy(d_q,q,size,cudaMemcpyHostToDevice);
const int n_thread = 32;
const int n_block = 1024;
//const int n_thread = 512;
//const int n_block = 64;
assert(n_thread*n_block == N);
calcforce_kernel_cuda<<<n_block,n_thread>>>(d_p,d_q);
cudaMemcpy(p,d_p,size,cudaMemcpyDeviceToHost);
cudaMemcpy(q,d_q,size,cudaMemcpyDeviceToHost);
cudaThreadSynchronize();
cudaFree(d_p);
cudaFree(d_q);
}
//----------------------------------------------------------------------
void
calcforce_n2(void){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
if (i==j)continue;
double dx = q[j][X] - q[i][X];
double dy = q[j][Y] - q[i][Y];
double dz = q[j][Z] - q[i][Z];
double r2 = (dx*dx + dy*dy + dz*dz);
double r6 = r2*r2*r2;
double df = ((24.0*r6-48.0)/(r6*r6*r2)+C2*8.0)*dt;
p[i][X] += df*dx;
p[i][Y] += df*dy;
p[i][Z] += df*dz;
}
}
}
//----------------------------------------------------------------------
int
main(void){
cudaSetDeviceFlags(cudaDeviceMapHost);
for(int i=0;i<N;i++){
p[i][X] = 1.0;
p[i][Y] = 1.0;
p[i][Z] = 1.0;
q[i][X] = L*myrand();
q[i][Y] = L*myrand();
q[i][Z] = L*myrand();
}
calcforce_cuda();
}
//----------------------------------------------------------------------
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment