nwh · February 19, 2015 01:07
diff --git a/output b/output
 kernel_1: thread 0 writing 0 to shared memory.
 kernel_1: thread 1 writing 1 to shared memory.
 kernel_1: thread 2 writing 2 to shared memory.
 kernel_1: thread 3 writing 3 to shared memory.
 kernel_1: thread 4 writing 4 to shared memory.
 kernel_2: thread 0 reading 0 from shared memory.
 kernel_2: thread 1 reading 1 from shared memory.
 kernel_2: thread 2 reading 2 from shared memory.
 kernel_2: thread 3 reading 3 from shared memory.
 kernel_2: thread 4 reading 4 from shared memory.
 --- end of persisten-shared-mem ---
diff --git a/persistent-shared-mem.cu b/persistent-shared-mem.cu
 #include <iostream>
 #include <cstdio>

 __global__
 void kernel_1() {
  int id = blockDim.x * blockIdx.x + threadIdx.x;
  extern __shared__ int smem[];
  int myval = id;
  smem[threadIdx.x] = myval;
  printf("kernel_1: thread %d writing %d to shared memory.\n",id,myval);
 }

 __global__
 void kernel_2() {
  int id = blockDim.x * blockIdx.x + threadIdx.x;
  extern __shared__ int smem[];
  int myval = smem[threadIdx.x];
  printf("kernel_2: thread %d reading %d from shared memory.\n",id,myval);
 }

 int main() {
  using std::cout;
  using std::endl;

  int num_block = 1;
  int num_thread = 5;

  kernel_1<<<num_block,num_thread,num_thread*sizeof(int)>>>();
  kernel_2<<<num_block,num_thread,num_thread*sizeof(int)>>>();

  cudaDeviceSynchronize();

  cout << "--- end of persisten-shared-mem ---" << endl;

  return 0;
 }
diff --git a/shared-memory.md b/shared-memory.md
	kernel_1: thread 0 writing 0 to shared memory.
	kernel_1: thread 1 writing 1 to shared memory.
	kernel_1: thread 2 writing 2 to shared memory.
	kernel_1: thread 3 writing 3 to shared memory.
	kernel_1: thread 4 writing 4 to shared memory.
	kernel_2: thread 0 reading 0 from shared memory.
	kernel_2: thread 1 reading 1 from shared memory.
	kernel_2: thread 2 reading 2 from shared memory.
	kernel_2: thread 3 reading 3 from shared memory.
	kernel_2: thread 4 reading 4 from shared memory.
	--- end of persisten-shared-mem ---
	#include <iostream>
	#include <cstdio>

	__global__
	void kernel_1() {
	int id = blockDim.x * blockIdx.x + threadIdx.x;
	extern __shared__ int smem[];
	int myval = id;
	smem[threadIdx.x] = myval;
	printf("kernel_1: thread %d writing %d to shared memory.\n",id,myval);
	}

	__global__
	void kernel_2() {
	int id = blockDim.x * blockIdx.x + threadIdx.x;
	extern __shared__ int smem[];
	int myval = smem[threadIdx.x];
	printf("kernel_2: thread %d reading %d from shared memory.\n",id,myval);
	}

	int main() {
	using std::cout;
	using std::endl;

	int num_block = 1;
	int num_thread = 5;

	kernel_1<<<num_block,num_thread,num_thread*sizeof(int)>>>();
	kernel_2<<<num_block,num_thread,num_thread*sizeof(int)>>>();

	cudaDeviceSynchronize();

	cout << "--- end of persisten-shared-mem ---" << endl;

	return 0;
	}