Created
April 9, 2019 22:50
-
-
Save stanbar/f8176b2db7e05568a7d3cbdb7ca4eda9 to your computer and use it in GitHub Desktop.
Lab3 CUDA
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Copyright 2017, Paweł Czarnul [email protected] | |
Redistribution and use in source and binary forms, with or without | |
modification, are permitted provided that the following conditions are | |
met: | |
1. Redistributions of source code must retain the above copyright | |
notice, this list of conditions and the following disclaimer. | |
2. Redistributions in binary form must reproduce the above copyright | |
notice, this list of conditions and the following disclaimer in the | |
documentation and/or other materials provided with the distribution. | |
3. Neither the name of the copyright holder nor the names of its | |
contributors may be used to endorse or promote products derived from | |
this software without specific prior written permission. | |
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
*/ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <cuda_runtime.h> | |
#include <sys/time.h> | |
#include <curand.h> | |
#include <curand_kernel.h> | |
#define PRECISION 0.000001 | |
#define SHOTS_PER_RANGE 10000 | |
__global__ | |
void initRNG(curandState *state){ | |
int idx = threadIdx.x+blockDim.x*blockIdx.x; | |
curand_init(1234, idx, 0, &state[idx]); | |
} | |
__host__ | |
void errorexit(const char *s) { | |
printf("\n%s",s); | |
exit(EXIT_FAILURE); | |
} | |
__device__ | |
void float_rand(curandState d_state, double min, double max, double* result){ | |
double uniform_rand = curand_uniform_double(&d_state); /* [0, 1.0] */ | |
*result = (double) (min + uniform_rand * ( max - min )); /* [min, max] */ | |
} | |
__device__ | |
void f (double x, double* result){ | |
*result = sinf(x) * sinf(x); | |
} | |
__device__ | |
void monte_carlo(double hit, double shots, double a, double b, double* surface){ | |
double hit_shots_ratio =(double)hit/(double)shots; | |
double square_surface = b-a; | |
*surface = hit_shots_ratio * square_surface; | |
} | |
__global__ | |
void simpleIntegration(curandState *const rngStates, double *result) { | |
long my_index=blockIdx.x*blockDim.x+threadIdx.x; | |
double a = (double) my_index; | |
double b = a + 1.0; | |
unsigned long start=my_index; | |
int hit = 0; | |
int page = 0; | |
for(int i = 0; i < SHOTS_PER_RANGE; i++){ | |
double rand_x = 0.0; | |
float_rand(rngStates[i+page] ,a, b, &rand_x); | |
double sin_value = 0.0; | |
f(rand_x, &sin_value); | |
double random_y = 0.0; | |
float_rand(rngStates[i+page] ,0.0, 1.0, &random_y); | |
if(sin_value <= random_y){ | |
hit++; | |
} | |
} | |
double surface = 0.0; | |
monte_carlo(hit,SHOTS_PER_RANGE,a,b, &surface); | |
double surface_final = surface; | |
double difference = 0; | |
int iterations = 0; | |
do{ | |
page++; | |
iterations++; | |
int additional_shots = (int) ((float) SHOTS_PER_RANGE * 0.1f); | |
int additional_hits = 0; | |
for(int i = 0; i < additional_shots; i++){ | |
double rand_x = 0.0; | |
float_rand(rngStates[i+page], a, b, &rand_x); | |
double sin_value = 0.0; | |
f(rand_x, &sin_value); | |
double random_y = 0.0; | |
float_rand(rngStates[i+page], 0.0, 1.0, &random_y); | |
if(sin_value <= random_y){ | |
additional_hits++; | |
} | |
} | |
double surface_with_adition = 0.0; | |
monte_carlo(hit + additional_hits, SHOTS_PER_RANGE + additional_shots, a, b, &surface_with_adition); | |
difference = surface_with_adition / additional_hits; | |
surface_final = surface_with_adition; | |
iterations++; | |
}while(difference > 0.01f); | |
result[my_index]=surface_final; | |
} | |
__host__ | |
void printtime(struct timeval *start,struct timeval *stop) { | |
long time=1000000*(stop->tv_sec-start->tv_sec)+stop->tv_usec-start->tv_usec; | |
printf("\n%ld microseconds\n",time); | |
} | |
int main(int argc,char **argv) { | |
struct timeval start,stop,start1,stop1; | |
double result; | |
int threadsinblock=100; | |
int blocksingrid=100; | |
long threadcount=threadsinblock * blocksingrid; | |
gettimeofday(&start,NULL); | |
long size=threadcount*sizeof(double); | |
double *hresults=(double *)malloc(size); | |
if (!hresults) errorexit("Error allocating memory on the host"); | |
double *dresults=NULL; | |
if (cudaSuccess!=cudaMalloc((void **)&dresults,size)) | |
errorexit("Error allocating memory on the GPU"); | |
// Allocate memory for RNG states | |
curandState *d_rngStates = 0; | |
if (cudaSuccess!=cudaMalloc((void **)&d_rngStates, SHOTS_PER_RANGE * 100 * sizeof(curandState))) | |
errorexit("Error allocating memory for curandState"); | |
gettimeofday(&start1,NULL); | |
initRNG<<<blocksingrid, threadsinblock>>>(d_rngStates); | |
// start computations on the GPU | |
simpleIntegration<<<blocksingrid,threadsinblock>>>(d_rngStates, dresults); | |
if (cudaSuccess!=cudaGetLastError()) | |
errorexit("Error during kernel launch"); | |
if (cudaSuccess!=cudaMemcpy(hresults,dresults,size,cudaMemcpyDeviceToHost)) | |
errorexit("Error copying results"); | |
cudaDeviceSynchronize(); | |
gettimeofday(&stop1,NULL); | |
double total_surface = 0.0; | |
for(long i=0;i<threadcount;i++) | |
total_surface += hresults[i]; | |
gettimeofday(&stop,NULL); | |
printf("\nThe final result is %f\n",total_surface); | |
printtime(&start1,&stop1); | |
printtime(&start,&stop); | |
// release resources | |
free(hresults); | |
if (cudaSuccess!=cudaFree(dresults)) | |
errorexit("Error when deallocating space on the GPU"); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment