Skip to content

Instantly share code, notes, and snippets.

View alphaville's full-sized avatar
:octocat:
(De)coding

Pantelis Sopasakis alphaville

:octocat:
(De)coding
View GitHub Profile
/* FILE: cuda_timer.cuh */
#include <cuda_runtime.h>
#include "error_handles.cuh"
static cudaEvent_t start;
static cudaEvent_t stop;
static short timer_running = 0;
static short tic_called = 0;
#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include <assert.h>
#include "helper_cuda.h"
#include "rand_data.h"
#include <math.h>
#define BLOCK_SIZE 16
template<typename T>
__global__ void matvec_kernel(
const T * dA,
const T * dx,
T * dy,
const unsigned int * nRows,
const unsigned int * nx)
{
unsigned int bid = blockIdx.x;
/* Auto-generated file (by a MATLAB script) - ID 3062321795 */
#ifndef __RAND_DATA_TURBO__
#define __RAND_DATA_TURBO__
#define NS 32
__device__ float dev_x[32] =
{ 9.320295340189, 8.666071467758, 2.064062819139, 5.206786781683, 9.887525026886, 9.196139126590, 2.988093813008, 9.115010659100, 4.994307851747, 0.409423900211,
1.911173066863, 1.515824153316, 5.797111859887, 1.448878331722, 8.210170746106, 6.347733121928, 7.880805428768, 0.672098319650, 5.452277295227, 9.969671220601,
int main(void) {
checkCudaErrors(cudaSetDeviceFlags(cudaDeviceMapHost));
float * dev_ptr_A = NULL;
float * dev_ptr_x = NULL;
float * hst_y = NULL;
float * address_y = NULL;
checkCudaErrors(
#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include "cublas_v2.h"
#include "helper_cuda.h"
#define ns 6
__device__ float dev_static_x[6] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0};
__device__ float dev_static_y[6] = {0.0, -1.0, -2.0, -3.0, -4.0, -5.0};
#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include "cublas_v2.h"
#include "helper_cuda.h"
#define ns 1e8
int main(void) {
cudaDeviceProp prop;
int whichDevice;
checkCudaErrors( cudaGetDevice(&whichDevice));
checkCudaErrors( cudaGetDeviceProperties(&prop, whichDevice));
if (prop.canMapHostMemory != 1){
fprintf(stderr, "Device cannot map memory!\n");
return 1;
}
int main(void) {
int * host_p; /*< Host data allocated as pinned memory */
int * dev_ptr_p; /*< this pointer resides on the host */
int ns = 32;
int data_size = ns * sizeof(int);
checkCudaErrors(
cudaHostAlloc((void**) &host_p, data_size, cudaHostAllocMapped));
#include <stdio.h>
#include <cuda_runtime.h>
#include "helper_cuda.h"
/* A very simple kernel function */
__global__ void kernel(int *d_var) { d_var[threadIdx.x] += 10; }
int * host_p;
int * host_result;
int * dev_p;