Created
November 16, 2020 08:47
-
-
Save kingardor/9cbf829a9743ef4afe5bdf0b6564e53d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <math.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include "timer.h" | |
#include "check.h" | |
#define SOFTENING 1e-9f | |
/* | |
* Each body contains x, y, and z coordinate positions, | |
* as well as velocities in the x, y, and z directions. | |
*/ | |
typedef struct { float x, y, z, vx, vy, vz; } Body; | |
/* | |
* Do not modify this function. A constraint of this exercise is | |
* that it remain a host function. | |
*/ | |
void randomizeBodies(float *data, int n) { | |
for (int i = 0; i < n; i++) { | |
data[i] = 2.0f * (rand() / (float)RAND_MAX) - 1.0f; | |
} | |
} | |
/* | |
* This function calculates the gravitational impact of all bodies in the system | |
* on all others, but does not update their positions. | |
*/ | |
__global__ void bodyForce(Body *p, float dt, int N) | |
{ | |
int tid = blockIdx.x * blockDim.x + threadIdx.x; | |
if (tid < N) { | |
float Fx = 0, Fy = 0, Fz = 0; | |
for (int i = 0; i < N; i++) { | |
float dx = p[i].x - p[tid].x; | |
float dy = p[i].y - p[tid].y; | |
float dz = p[i].z - p[tid].z; | |
float distSqr = dx*dx + dy*dy + dz*dz + SOFTENING; | |
float invDist = rsqrtf(distSqr); | |
float invDist3 = invDist * invDist * invDist; | |
Fx += dx * invDist3; | |
Fy += dy * invDist3; | |
Fz += dz * invDist3; | |
} | |
p[tid].vx += dt*Fx; | |
p[tid].vy += dt*Fy; | |
p[tid].vz += dt*Fz; | |
} | |
} | |
int main(const int argc, const char** argv) { | |
/* | |
* Do not change the value for `nBodies` here. If you would like to modify it, | |
* pass values into the command line. | |
*/ | |
int nBodies = 2<<11; | |
int salt = 0; | |
if (argc > 1) nBodies = 2<<atoi(argv[1]); | |
/* | |
* This salt is for assessment reasons. Tampering with it will result in automatic failure. | |
*/ | |
if (argc > 2) salt = atoi(argv[2]); | |
const float dt = 0.01f; // time step | |
const int nIters = 10; // simulation iterations | |
int bytes = nBodies * sizeof(Body); | |
float *buf; | |
cudaMallocManaged(&buf, bytes); | |
Body *p = (Body*)buf; | |
/* | |
* As a constraint of this exercise, `randomizeBodies` must remain a host function. | |
*/ | |
randomizeBodies(buf, 6 * nBodies); // Init pos / vel data | |
double totalTime = 0.0; | |
/* | |
* This simulation will run for 10 cycles of time, calculating gravitational | |
* interaction amongst bodies, and adjusting their positions to reflect. | |
*/ | |
/*******************************************************************/ | |
// Do not modify these 2 lines of code. | |
for (int iter = 0; iter < nIters; iter++) { | |
StartTimer(); | |
/*******************************************************************/ | |
/* | |
* You will likely wish to refactor the work being done in `bodyForce`, | |
* as well as the work to integrate the positions. | |
*/ | |
int threads_per_block = 128; | |
int number_of_blocks = (nBodies / threads_per_block); | |
bodyForce <<< number_of_blocks, threads_per_block >>> ( p, dt, nBodies ); | |
// Wait for GPU to finish before accessing on host | |
cudaDeviceSynchronize(); | |
// Integrate position | |
for (int i = 0 ; i < nBodies; i++) { | |
p[i].x += p[i].vx*dt; | |
p[i].y += p[i].vy*dt; | |
p[i].z += p[i].vz*dt; | |
} | |
/* | |
* This position integration cannot occur until this round of `bodyForce` has completed. | |
* Also, the next round of `bodyForce` cannot begin until the integration is complete. | |
*/ | |
/*******************************************************************/ | |
// Do not modify the code in this section. | |
const double tElapsed = GetTimer() / 1000.0; | |
totalTime += tElapsed; | |
} | |
double avgTime = totalTime / (double)(nIters); | |
float billionsOfOpsPerSecond = 1e-9 * nBodies * nBodies / avgTime; | |
#ifdef ASSESS | |
checkPerformance(buf, billionsOfOpsPerSecond, salt); | |
#else | |
checkAccuracy(buf, nBodies); | |
printf("%d Bodies: average %0.3f Billion Interactions / second\n", nBodies, billionsOfOpsPerSecond); | |
salt += 1; | |
#endif | |
/*******************************************************************/ | |
/* | |
* Feel free to modify code below. | |
*/ | |
cudaFree(buf); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment