Created
September 11, 2023 04:36
-
-
Save Bhavya031/ea2abc4edc4903f3674720494990e77e to your computer and use it in GitHub Desktop.
cuda matix multiplication
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <cuda.h> | |
#define N 3 // Matrix size (3x3) | |
__global__ void matrixMultiply(int *a, int *b, int *c) | |
{ | |
int row = blockIdx.y * blockDim.y + threadIdx.y; | |
int col = blockIdx.x * blockDim.x + threadIdx.x; | |
int sum = 0; | |
for (int i = 0; i < N; i++) { | |
sum += a[row * N + i] * b[i * N + col]; | |
} | |
c[row * N + col] = sum; | |
} | |
int main() | |
{ | |
int a[N][N], b[N][N], c[N][N]; | |
int *d_a, *d_b, *d_c; | |
// Initialize matrices a and b | |
printf("Enter values for matrix A (3x3):\n"); | |
for (int i = 0; i < N; i++) { | |
for (int j = 0; j < N; j++) { | |
scanf("%d", &a[i][j]); | |
} | |
} | |
printf("Enter values for matrix B (3x3):\n"); | |
for (int i = 0; i < N; i++) { | |
for (int j = 0; j < N; j++) { | |
scanf("%d", &b[i][j]); | |
} | |
} | |
// Allocate memory on the GPU | |
cudaMalloc((void **)&d_a, N * N * sizeof(int)); | |
cudaMalloc((void **)&d_b, N * N * sizeof(int)); | |
cudaMalloc((void **)&d_c, N * N * sizeof(int)); | |
// Copy matrices a and b from host to device | |
cudaMemcpy(d_a, a, N * N * sizeof(int), cudaMemcpyHostToDevice); | |
cudaMemcpy(d_b, b, N * N * sizeof(int), cudaMemcpyHostToDevice); | |
// Define grid and block dimensions | |
dim3 dimGrid(1, 1); | |
dim3 dimBlock(N, N); | |
// Launch the matrix multiplication kernel | |
matrixMultiply<<<dimGrid, dimBlock>>>(d_a, d_b, d_c); | |
// Copy the result matrix c from device to host | |
cudaMemcpy(c, d_c, N * N * sizeof(int), cudaMemcpyDeviceToHost); | |
// Print the result matrix c | |
printf("Result matrix C (3x3):\n"); | |
for (int i = 0; i < N; i++) { | |
for (int j = 0; j < N; j++) { | |
printf("%d\t", c[i][j]); | |
} | |
printf("\n"); | |
} | |
// Free device memory | |
cudaFree(d_a); | |
cudaFree(d_b); | |
cudaFree(d_c); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment