Created
June 29, 2022 01:57
-
-
Save vhxs/5bc0f4a00050ff277c43248a382e680f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <stdio.h> | |
#include <cuda.h> | |
#define N 512 | |
// compile with nvcc add_vectors.cu | |
// forward declaration of CUDA kernel | |
__global__ void add_vectors(int* A, int* B, int* C); | |
int main(void) { | |
// allocate space on CPU | |
int* A = (int*) malloc(N * sizeof(int)); | |
int* B = (int*) malloc(N * sizeof(int)); | |
int* C = (int*) malloc(N * sizeof(int)); | |
// allocate space on GPU | |
int* A_d; | |
int* B_d; | |
int* C_d; | |
cudaMalloc(&A_d, N * sizeof(int)); | |
cudaMalloc(&B_d, N * sizeof(int)); | |
cudaMalloc(&C_d, N * sizeof(int)); | |
// populate data on CPU | |
for (int i = 0; i < N; i++) { | |
A[i] = i; | |
B[i] = 2*i; | |
C[i] = 0; | |
} | |
// copy CPU to GPU | |
cudaMemcpy(A_d, A, N * sizeof(int), cudaMemcpyHostToDevice); | |
cudaMemcpy(B_d, B, N * sizeof(int), cudaMemcpyHostToDevice); | |
cudaMemcpy(C_d, C, N * sizeof(int), cudaMemcpyHostToDevice); | |
// invoke kernel | |
add_vectors<<<1, N>>>(A_d, B_d, C_d); | |
// copy result from GPU to CPU | |
cudaMemcpy(C, C_d, N * sizeof(int), cudaMemcpyDeviceToHost); | |
// print result | |
for (int i = 0; i < N; i++) { | |
printf("%d\n", C[i]); | |
} | |
} | |
__global__ void add_vectors(int* A, int* B, int* C) { | |
C[threadIdx.x] = A[threadIdx.x] + B[threadIdx.x]; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment