Skip to content

Instantly share code, notes, and snippets.

View AxoyTO's full-sized avatar
👨‍💻

Tevfik Aksoy AxoyTO

👨‍💻
View GitHub Profile
@AxoyTO
AxoyTO / addVectors.cu
Last active September 26, 2021 17:09
CUDA AddVectors (TASK_01)
#include <cuda.h>
#include <chrono>
#include <cstdlib>
#include <iostream>
__global__ void addVectorsKernel(const double* A, const double* B, double* C, double n) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < n) {
C[i] = A[i] + B[i];
}
@AxoyTO
AxoyTO / transposeMatrix.cu
Last active September 29, 2021 09:55
CUDA TransposeMatrix (TASK_02)
#include <cuda.h>
#include <chrono>
#include <cstdlib>
#include <iostream>
__global__ void transposeKernel(const double* A, double* AT, int N) {
int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
int index = xIndex + N * yIndex;
@AxoyTO
AxoyTO / divergencyThreads.cu
Last active September 29, 2021 15:50
CUDA Check threadDivergency (TASK_03)
#include <cuda.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <cstdlib>
#include <iostream>
__global__ void DivergencyKernel(float* a, int N) {
int x = blockDim.x * blockIdx.x + threadIdx.x;
if (x % 2 == 0) {
@AxoyTO
AxoyTO / reverseArray.cu
Last active September 29, 2021 15:53
CUDA ReverseArray (TASK_05)
#include <cuda.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <chrono>
#include <cstdlib>
#include <iostream>
__global__ void reverseKernel(float* A, int N) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
@AxoyTO
AxoyTO / Stencil_1D.cu
Last active September 29, 2021 20:40
CUDA Stencil_1D (TASK_08)
// UCSC CMPE220 Advanced Parallel Processing
// Prof. Heiner Leitz
// Author: Marcelo Siero.
// Modified from code by:: Andreas Goetz (agoetz@sdsc.edu)
// CUDA program to perform 1D stencil operation in parallel on the GPU
//
// /* FIXME */ COMMENTS ThAT REQUIRE ATTENTION
#include <cuda.h>
#include <device_launch_parameters.h>
@AxoyTO
AxoyTO / pinnedUnifiedMemory.cu
Created September 30, 2021 11:02
CUDA PinnedMemory+UnifiedMemory (TASK_09)
//
// main.cpp
//
//
// Created by Elijah Afanasiev on 25.09.2018.
//
//
// System includes
#include <assert.h>
@AxoyTO
AxoyTO / multiGPU.cu
Created September 30, 2021 15:43
CUDA MultiGPU (TASK_11)
#include <cuda.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <stdio.h>
#include <cstdlib>
#include <iostream>
__global__ void vectorAddGPU(float* a, float* b, float* c, int N) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
@AxoyTO
AxoyTO / gatherOptimization.cu
Last active October 1, 2021 09:48
CUDA "Gather"Optimization (TASK_06)
#include <cuda_profiler_api.h>
#include <cfloat>
#include <chrono>
#include <iostream>
using namespace std;
///////////////////////////////////////////////////////////////////////////////////////////////////////////
cudaError_t SAFE_CALL(cudaError_t result) {
@AxoyTO
AxoyTO / streams.cu
Created September 30, 2021 18:28
CUDA Streams (TASK_10)
//
// main.cpp
//
//
// Created by Elijah Afanasiev on 25.09.2018.
//
//
// System includes
#include <assert.h>
@AxoyTO
AxoyTO / matrixMult.cu
Last active September 30, 2021 19:17
CUDA CUBLAS/CURAND/THRUST MatrixMultiplication (TASK_12)
// High level matrix multiplication on GPU using CUDA with Thrust, CURAND and
// CUBLAS C(m,n) = A(m,k) * B(k,n)
#include <cublas_v2.h>
#include <curand.h>
#include <cstdlib>
#include <ctime>
#include <iostream>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>