Created
May 14, 2012 18:24
-
-
Save foota/2695505 to your computer and use it in GitHub Desktop.
Simple molecular dynamics using GPU devices
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//////////////////////////////////////////////////////////////// | |
// Simple Molecular Dynamics using GPU by nox, 2009.12.10. | |
// | |
// Perform molecular dynamics for pseudo-atoms without internal | |
// interactions, such as bonds, angles, dihedrals. Only vdW and | |
// Coulomb interactions. | |
#include <iostream> | |
#include <iomanip> | |
#include <fstream> | |
#include <sstream> | |
#include <algorithm> | |
#include <numeric> | |
#include <cmath> | |
#include <cutil_inline.h> | |
using namespace std; | |
const int BLOCK = 256; | |
__device__ float Distance(float* crd, int i, int j) | |
{ | |
float dx = crd[i*3] - crd[j*3]; | |
float dy = crd[i*3+1] - crd[j*3+1]; | |
float dz = crd[i*3+2] - crd[j*3+2]; | |
return sqrtf(dx * dx + dy * dy + dz * dz); | |
} | |
__global__ void Center(float* crd, float cx, float cy, float cz, int num_atoms) | |
{ | |
int i = blockIdx.x * blockDim.x + threadIdx.x; | |
if (i >= num_atoms) return; | |
crd[i*3] -= cx; | |
crd[i*3+1] -= cy; | |
crd[i*3+2] -= cz; | |
} | |
// Calculate energies and forces | |
// Total energy = vdW + Coulomb | |
// vdW | |
// U = eps * [(Rij / r[i])^12 - 2 * (Rij / r[i])^6] | |
// F = -12 * eps / Rij * [(Rij / r[i])^13 - (Rij / r[i])^7] * r_xyz / r[i] | |
// Coulomb | |
// U = SUM_i>j qiqj / r[i] | |
// F = SUM_j qiqj / r[i]^3 * r_xyz | |
__global__ void Calc(float* crd, float* f, float* ene, float* chg, int num_atoms) | |
{ | |
int i = blockIdx.x * blockDim.x + threadIdx.x; | |
if (i >= num_atoms) return; | |
const float eps = 0.2f; | |
const float Rij = 2.5f; | |
float r, rij, r12, r6, r3, r_, q, x; | |
float e0, f0, f1, f2; | |
e0 = f0 = f1 = f2 = 0.0f; | |
float c0 = crd[i*3]; | |
float c1 = crd[i*3+1]; | |
float c2 = crd[i*3+2]; | |
float q0 = chg[i]; | |
for (int j = 0; j < num_atoms; j++) { | |
if (i == j) continue; | |
r = Distance(crd, i, j); | |
r_ = 1.0f / r; | |
q = q0 * chg[j]; | |
rij = Rij * r_; | |
r3 = rij * rij * rij; | |
r6 = r3 * r3; | |
r12 = r6 * r6; | |
x = ((12 * eps) * (r12 - r6) + q * r_) * r_ * r_; | |
e0 += eps * (r12 - 2.0f * r6) + q * r_; | |
f0 += x * (c0 - crd[j*3]); | |
f1 += x * (c1 - crd[j*3+1]); | |
f2 += x * (c2 - crd[j*3+2]); | |
} | |
ene[i] = e0; | |
f[i*3] = f0; | |
f[i*3+1] = f1; | |
f[i*3+2] = f2; | |
} | |
template<unsigned int blockSize> | |
__global__ void Energy(float* ene, float* oene, unsigned int n) | |
{ | |
extern __shared__ float sdata[]; | |
unsigned int tid = threadIdx.x; | |
unsigned int i = blockIdx.x * (blockSize * 2) + tid; | |
unsigned int gridSize = blockSize * 2 * gridDim.x; | |
sdata[tid] = 0; | |
while (i < n) { sdata[tid] += ene[i] + ene[i+blockSize]; i += gridSize; } | |
__syncthreads(); | |
if (blockSize >= 512) { if (tid < 256) { sdata[tid] += sdata[tid+256]; } __syncthreads(); } | |
if (blockSize >= 256) { if (tid < 128) { sdata[tid] += sdata[tid+128]; } __syncthreads(); } | |
if (blockSize >= 128) { if (tid < 64) { sdata[tid] += sdata[tid+ 64]; } __syncthreads(); } | |
if (tid < 32) { | |
if (blockSize >= 64) sdata[tid] += sdata[tid+32]; | |
if (blockSize >= 32) sdata[tid] += sdata[tid+16]; | |
if (blockSize >= 16) sdata[tid] += sdata[tid+ 8]; | |
if (blockSize >= 8) sdata[tid] += sdata[tid+ 4]; | |
if (blockSize >= 4) sdata[tid] += sdata[tid+ 2]; | |
if (blockSize >= 2) sdata[tid] += sdata[tid+ 1]; | |
} | |
if (tid == 0) oene[blockIdx.x] = sdata[0]; | |
} | |
__global__ void Move(float* crd, float* f, int num_atoms, float cap_range) | |
{ | |
int i = blockIdx.x * blockDim.x + threadIdx.x; | |
if (i >= num_atoms) return; | |
crd[i*3] += f[i*3] * 0.01f; | |
crd[i*3+1] += f[i*3+1] * 0.01f; | |
crd[i*3+2] += f[i*3+2] * 0.01f; | |
float r = crd[i*3] * crd[i*3] + crd[i*3+1] * crd[i*3+1] + crd[i*3+2] * crd[i*3+2]; | |
float dr = r - cap_range * cap_range; | |
if (dr > 0.0f) { | |
f[i*3] = -crd[i*3] / cap_range * dr * 0.01f; | |
f[i*3+1] = -crd[i*3+1] / cap_range * dr * 0.01f; | |
f[i*3+2] = -crd[i*3+2] / cap_range * dr * 0.01f; | |
} | |
} | |
class SimpleMD { | |
private: | |
dim3 grid; | |
dim3 threads; | |
int width; | |
int num_steps; | |
int num_atoms; | |
int num_center; | |
float cap_range; | |
float *h_crd, *h_f, *h_ene, *h_chg; | |
float *d_crd, *d_f, *d_ene, *d_chg, *d_oene; | |
float tene; | |
public: | |
SimpleMD(int n, int nc, char*); | |
~SimpleMD(); | |
void ReadCrd(char*); | |
void PrintCrd(); | |
void CenterPosition(); | |
unsigned int NextPow2(unsigned int x); | |
void GetBlocksAndThreads(int n, int& blocks, int& threads); | |
void TotalEnergyReduce(int threads, int blocks); | |
void TotalEnergy(); | |
void Run(); | |
}; | |
SimpleMD::SimpleMD(int n, int nc, char* fname) : num_steps(n), num_center(nc) | |
{ | |
ReadCrd(fname); | |
h_f = new float[num_atoms * 3]; | |
fill(h_f, h_f + num_atoms * 3, 0.0f); | |
h_ene = new float[NextPow2(num_atoms)]; | |
fill(h_ene, h_ene + NextPow2(num_atoms), 0.0f); | |
width = (num_atoms - 1) / BLOCK + 1; | |
grid.x = width; | |
grid.y = 1; | |
grid.z = 1; | |
threads.x = BLOCK; | |
threads.y = 1; | |
threads.z = 1; | |
cudaMalloc((void**)&d_crd, sizeof(float) * num_atoms * 3); | |
cudaMalloc((void**)&d_f, sizeof(float) * num_atoms * 3); | |
cudaMalloc((void**)&d_oene, sizeof(float) * num_atoms); | |
cudaMalloc((void**)&d_ene, sizeof(float) * NextPow2(num_atoms)); | |
cudaMalloc((void**)&d_chg, sizeof(float) * num_atoms); | |
} | |
SimpleMD::~SimpleMD() | |
{ | |
cudaFree(d_chg); | |
cudaFree(d_ene); | |
cudaFree(d_f); | |
cudaFree(d_crd); | |
delete[] h_ene; | |
delete[] h_f; | |
delete[] h_chg; | |
delete[] h_crd; | |
} | |
void SimpleMD::ReadCrd(char* fname) | |
{ | |
fstream fs(fname, ios_base::in); | |
string line; | |
stringstream ss; | |
if (!fs.is_open()) { | |
cerr << "File open error: " << fname << endl; | |
exit(1); | |
} | |
getline(fs, line); cout << line << endl; | |
getline(fs, line); ss.str(line); ss >> num_atoms; ss.clear(); | |
getline(fs, line); ss.str(line); ss >> cap_range; ss.clear(); | |
h_crd = new float[num_atoms * 3]; | |
h_chg = new float[num_atoms]; | |
for (int i = 0; i < num_atoms; i++) { | |
getline(fs, line); | |
ss.str(line); | |
ss >> h_crd[i*3] >> h_crd[i*3+1] >> h_crd[i*3+2] >> h_chg[i]; | |
ss.clear(); | |
ss.str(""); | |
} | |
fs.close(); | |
} | |
void SimpleMD::PrintCrd() | |
{ | |
cout << endl; | |
cout << num_atoms << endl; | |
cout << cap_range << endl; | |
for (int i = 0; i < num_atoms; i++) { | |
for (int j = 0; j < 3; j++) cout << " " << fixed << setw(10) << setprecision(6) << h_crd[i*3+j]; | |
cout << " " << fixed << setw(8) << setprecision(4) << h_chg[i]; | |
cout << endl; | |
} | |
} | |
void SimpleMD::CenterPosition() | |
{ | |
float d[3]; | |
cudaMemcpy(h_crd, d_crd, sizeof(float) * num_atoms * 3, cudaMemcpyDeviceToHost); | |
for (int i = 0; i < num_atoms; i++) | |
for (int j = 0; j < 3; j++) d[j] += h_crd[i*3+j]; | |
for (int i = 0; i < 3; i++) d[i] /= num_atoms; | |
cudaMemcpy(d_crd, h_crd, sizeof(float) * num_atoms * 3, cudaMemcpyHostToDevice); | |
Center<<<grid, threads>>>(d_crd, d[0], d[1], d[2], num_atoms); | |
cudaThreadSynchronize(); | |
} | |
unsigned int SimpleMD::NextPow2(unsigned int x) | |
{ | |
--x; | |
x |= x >> 1; | |
x |= x >> 2; | |
x |= x >> 4; | |
x |= x >> 8; | |
x |= x >> 16; | |
return ++x; | |
} | |
void SimpleMD::GetBlocksAndThreads(int n, int& blocks, int& threads) | |
{ | |
threads = (n < BLOCK * 2) ? NextPow2((n + 1) / 2) : BLOCK; | |
blocks = (n + (threads * 2 - 1)) / (threads * 2); | |
blocks = min(width, blocks); | |
} | |
void SimpleMD::TotalEnergyReduce(int threads, int blocks) | |
{ | |
switch (threads) { | |
case 512: | |
Energy<512><<<blocks, threads, sizeof(float) * threads>>>(d_ene, d_oene, num_atoms); break; | |
case 256: | |
Energy<256><<<blocks, threads, sizeof(float) * threads>>>(d_ene, d_oene, num_atoms); break; | |
case 128: | |
Energy<128><<<blocks, threads, sizeof(float) * threads>>>(d_ene, d_oene, num_atoms); break; | |
case 64: | |
Energy< 64><<<blocks, threads, sizeof(float) * threads>>>(d_ene, d_oene, num_atoms); break; | |
case 32: | |
Energy< 32><<<blocks, threads, sizeof(float) * threads>>>(d_ene, d_oene, num_atoms); break; | |
case 16: | |
Energy< 16><<<blocks, threads, sizeof(float) * threads>>>(d_ene, d_oene, num_atoms); break; | |
case 8: | |
Energy< 8><<<blocks, threads, sizeof(float) * threads>>>(d_ene, d_oene, num_atoms); break; | |
case 4: | |
Energy< 4><<<blocks, threads, sizeof(float) * threads>>>(d_ene, d_oene, num_atoms); break; | |
case 2: | |
Energy< 2><<<blocks, threads, sizeof(float) * threads>>>(d_ene, d_oene, num_atoms); break; | |
case 1: | |
Energy< 1><<<blocks, threads, sizeof(float) * threads>>>(d_ene, d_oene, num_atoms); break; | |
} | |
} | |
void SimpleMD::TotalEnergy() | |
{ | |
if (num_atoms > 1024) { | |
int th, bl; | |
GetBlocksAndThreads(num_atoms, bl, th); | |
int s = bl; | |
while (s > 1) { | |
GetBlocksAndThreads(s, bl, th); | |
TotalEnergyReduce(th, bl); | |
s = (s + (th * 2 - 1)) / (th * 2); | |
} | |
cudaThreadSynchronize(); | |
cudaMemcpy(&tene, d_oene, sizeof(float), cudaMemcpyDeviceToHost); | |
tene /= 2.0f; | |
} else { | |
cudaMemcpy(h_ene, d_ene, sizeof(float) * num_atoms, cudaMemcpyDeviceToHost); | |
tene = accumulate(h_ene, h_ene + num_atoms, 0.0f) / 2.0f; | |
} | |
} | |
void SimpleMD::Run() | |
{ | |
cudaMemcpy(d_crd, h_crd, sizeof(float) * num_atoms * 3, cudaMemcpyHostToDevice); | |
cudaMemcpy(d_f, h_f, sizeof(float) * num_atoms * 3, cudaMemcpyHostToDevice); | |
cudaMemcpy(d_ene, h_ene, sizeof(float) * NextPow2(num_atoms), cudaMemcpyHostToDevice); | |
cudaMemcpy(d_chg, h_chg, sizeof(float) * num_atoms, cudaMemcpyHostToDevice); | |
for (int i = 0; i < num_steps; i++) { | |
if (i % num_center == 0) CenterPosition(); | |
Calc<<<grid, threads>>>(d_crd, d_f, d_ene, d_chg, num_atoms); | |
cudaThreadSynchronize(); | |
TotalEnergy(); | |
Move<<<grid, threads>>>(d_crd, d_f, num_atoms, cap_range); | |
cudaThreadSynchronize(); | |
cout << "Energy (" << setw(7) << i + 1 << "): "; | |
cout << fixed << setw(15) << setprecision(5) << tene << endl; | |
} | |
cudaMemcpy(h_crd, d_crd, sizeof(float) * num_atoms * 3, cudaMemcpyDeviceToHost); | |
cudaMemcpy(h_f, d_f, sizeof(float) * num_atoms * 3, cudaMemcpyDeviceToHost); | |
cudaMemcpy(h_ene, d_ene, sizeof(float) * num_atoms, cudaMemcpyDeviceToHost); | |
} | |
int main(int argc, char** argv) | |
{ | |
if (argc != 3) { | |
cerr << "Usage: " << argv[0] << " input_file number_of_steps" << endl; | |
cerr << "Input: line 1 : title" << endl; | |
cerr << " line 2 : number of atoms" << endl; | |
cerr << " line 3 : radius of droplet" << endl; | |
cerr << " line 4-: x-crd y-crd z-crd charge" << endl; | |
exit(1); | |
} | |
stringstream ss; | |
int nstep; | |
cutilDeviceInit(1, argv); | |
ss.str(argv[2]); ss >> nstep; | |
int ncenter = 100; | |
SimpleMD md(nstep, ncenter, argv[1]); | |
md.PrintCrd(); | |
md.Run(); | |
md.PrintCrd(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
http://handasse.blogspot.com/2009/12/cuda.html
http://handasse.blogspot.com/2009/03/cuda.html