Created
July 1, 2019 12:52
-
-
Save vnkdj5/90d53bdf5d29905640ef605bbb8e464e to your computer and use it in GitHub Desktop.
Vector additon cuda program
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include<iostream> | |
#include<cstdlib> | |
using namespace std; | |
//VectorAdd parallel function | |
__global__ void vectorAdd(int *a, int *b, int *result, int n) | |
{ | |
int tid=threadIdx.x+blockIdx.x*blockDim.x; | |
if(tid<n) | |
{ | |
result[tid]=a[tid]+b[tid]; | |
} | |
} | |
int main() | |
{ | |
int *a,*b,*c; | |
int *a_dev,*b_dev,*c_dev; | |
int n=1<<24; | |
a=new int[n]; | |
b=new int[n]; | |
c=new int[n]; | |
int *d=new int[n]; | |
int size=n*sizeof(int); | |
cudaMalloc(&a_dev,size); | |
cudaMalloc(&b_dev,size); | |
cudaMalloc(&c_dev,size); | |
//Array initialization..You can use Randon function to assign values | |
for(int i=0;i<n;i++) | |
{ | |
a[i]=1; | |
b[i]=2; | |
d[i]=a[i]+b[i]; //calculating serial addition | |
} | |
cudaEvent_t start,end; | |
cudaEventCreate(&start); | |
cudaEventCreate(&end); | |
cudaMemcpy(a_dev,a,size,cudaMemcpyHostToDevice); | |
cudaMemcpy(b_dev,b,size,cudaMemcpyHostToDevice); | |
int threads=1024; | |
int blocks=(n+threads-1)/threads; | |
cudaEventRecord(start); | |
//Parallel addition program | |
vectorAdd<<<blocks,threads>>>(a_dev,b_dev,c_dev,n); | |
cudaEventRecord(end); | |
cudaEventSynchronize(end); | |
float time=0.0; | |
cudaEventElapsedTime(&time,start,end); | |
cudaMemcpy(c,c_dev,size,cudaMemcpyDeviceToHost); | |
//Calculate the error term. | |
int error=0; | |
for(int i=0;i<n;i++){ | |
error+=d[i]-c[i]; | |
//cout<<" gpu "<<c[i]<<" CPU "<<d[i]; | |
} | |
cout<<"Error : "<<error; | |
cout<<"\nTime Elapsed: "<<time; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment