Bhavya031 · September 11, 2023 04:36
diff --git a/matix.c b/matix.c
 #include <stdio.h>
 #include <cuda.h>

 #define N 3 // Matrix size (3x3)

 __global__ void matrixMultiply(int *a, int *b, int *c)
 {
    int row = blockIdx.y * blockDim.y + threadIdx.y;
    int col = blockIdx.x * blockDim.x + threadIdx.x;

    int sum = 0;
    for (int i = 0; i < N; i++) {
        sum += a[row * N + i] * b[i * N + col];
    }

    c[row * N + col] = sum;
 }

 int main()
 {
    int a[N][N], b[N][N], c[N][N];
    int *d_a, *d_b, *d_c;

    // Initialize matrices a and b
    printf("Enter values for matrix A (3x3):\n");
    for (int i = 0; i < N; i++) {
        for (int j = 0; j < N; j++) {
            scanf("%d", &a[i][j]);
        }
    }

    printf("Enter values for matrix B (3x3):\n");
    for (int i = 0; i < N; i++) {
        for (int j = 0; j < N; j++) {
            scanf("%d", &b[i][j]);
        }
    }

    // Allocate memory on the GPU
    cudaMalloc((void **)&d_a, N * N * sizeof(int));
    cudaMalloc((void **)&d_b, N * N * sizeof(int));
    cudaMalloc((void **)&d_c, N * N * sizeof(int));

    // Copy matrices a and b from host to device
    cudaMemcpy(d_a, a, N * N * sizeof(int), cudaMemcpyHostToDevice);
    cudaMemcpy(d_b, b, N * N * sizeof(int), cudaMemcpyHostToDevice);

    // Define grid and block dimensions
    dim3 dimGrid(1, 1);
    dim3 dimBlock(N, N);

    // Launch the matrix multiplication kernel
    matrixMultiply<<<dimGrid, dimBlock>>>(d_a, d_b, d_c);

    // Copy the result matrix c from device to host
    cudaMemcpy(c, d_c, N * N * sizeof(int), cudaMemcpyDeviceToHost);

    // Print the result matrix c
    printf("Result matrix C (3x3):\n");
    for (int i = 0; i < N; i++) {
        for (int j = 0; j < N; j++) {
            printf("%d\t", c[i][j]);
        }
        printf("\n");
    }

    // Free device memory
    cudaFree(d_a);
    cudaFree(d_b);
    cudaFree(d_c);

    return 0;
 }
	#include <stdio.h>
	#include <cuda.h>

	#define N 3 // Matrix size (3x3)

	__global__ void matrixMultiply(int a, int b, int *c)
	{
	int row = blockIdx.y * blockDim.y + threadIdx.y;
	int col = blockIdx.x * blockDim.x + threadIdx.x;

	int sum = 0;
	for (int i = 0; i < N; i++) {
	sum += a[row * N + i] * b[i * N + col];
	}

	c[row * N + col] = sum;
	}

	int main()
	{
	int a[N][N], b[N][N], c[N][N];
	int d_a, d_b, *d_c;

	// Initialize matrices a and b
	printf("Enter values for matrix A (3x3):\n");
	for (int i = 0; i < N; i++) {
	for (int j = 0; j < N; j++) {
	scanf("%d", &a[i][j]);
	}
	}

	printf("Enter values for matrix B (3x3):\n");
	for (int i = 0; i < N; i++) {
	for (int j = 0; j < N; j++) {
	scanf("%d", &b[i][j]);
	}
	}

	// Allocate memory on the GPU
	cudaMalloc((void *)&d_a, N N * sizeof(int));
	cudaMalloc((void *)&d_b, N N * sizeof(int));
	cudaMalloc((void *)&d_c, N N * sizeof(int));

	// Copy matrices a and b from host to device
	cudaMemcpy(d_a, a, N * N * sizeof(int), cudaMemcpyHostToDevice);
	cudaMemcpy(d_b, b, N * N * sizeof(int), cudaMemcpyHostToDevice);

	// Define grid and block dimensions
	dim3 dimGrid(1, 1);
	dim3 dimBlock(N, N);

	// Launch the matrix multiplication kernel
	matrixMultiply<<<dimGrid, dimBlock>>>(d_a, d_b, d_c);

	// Copy the result matrix c from device to host
	cudaMemcpy(c, d_c, N * N * sizeof(int), cudaMemcpyDeviceToHost);

	// Print the result matrix c
	printf("Result matrix C (3x3):\n");
	for (int i = 0; i < N; i++) {
	for (int j = 0; j < N; j++) {
	printf("%d\t", c[i][j]);
	}
	printf("\n");
	}

	// Free device memory
	cudaFree(d_a);
	cudaFree(d_b);
	cudaFree(d_c);

	return 0;
	}
No results found