Created
September 2, 2017 07:39
-
-
Save gyu-don/fa5e5f75e817b31db97b8f4e5ce0b129 to your computer and use it in GitHub Desktop.
CUDAの練習。100万人ビンゴ大会
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <inttypes.h> | |
#include <stdlib.h> | |
#include <cuda_runtime.h> | |
#include <curand_kernel.h> | |
__constant__ uint8_t seq[75]; | |
__device__ void make_card(uint8_t *card) | |
{ | |
unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; | |
curandState st; | |
curand_init(12345, idx, 0, &st); | |
// make a card. | |
uint8_t a[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; | |
for (int i=0,k=0; i+4<25; i+=5,k+=15) { | |
uint8_t t = curand(&st) % 15; | |
uint8_t sw; | |
card[i] = k + a[t]; | |
sw = a[t]; | |
a[t] = a[14]; | |
a[14] = sw; | |
t = curand(&st) % 14; | |
card[i+1] = k + a[t]; | |
sw = a[t]; | |
a[t] = a[13]; | |
a[13] = sw; | |
t = curand(&st) % 13; | |
card[i+2] = k + a[t]; | |
sw = a[t]; | |
a[t] = a[12]; | |
a[12] = sw; | |
t = curand(&st) % 12; | |
card[i+3] = k + a[t]; | |
sw = a[t]; | |
a[t] = a[11]; | |
a[11] = sw; | |
t = curand(&st) % 11; | |
card[i+4] = k + a[t]; | |
} | |
card[12] = 0; | |
} | |
__global__ void bingo(unsigned int *result) | |
{ | |
uint8_t card[25]; | |
make_card(card); | |
// do the game. | |
for (int i=0; i<75; i++) { | |
uint8_t *pt = card + ((seq[i] - 1) / 15) * 5; | |
for (int j=0; j<5; j++) { | |
if (pt[j] == seq[i]) { | |
pt[j] = 0; | |
if (!(pt[0] || pt[1] || pt[2] || pt[3] || pt[4])) { | |
atomicAdd(&result[i], 1); | |
return; | |
} | |
if (!(card[j] || card[5 + j] || card[10 + j] || card[15 + j] || card[20 + j])) { | |
atomicAdd(&result[i], 1); | |
return; | |
} | |
if (!(card[0] || card[6] || card[18] || card[24])) { | |
atomicAdd(&result[i], 1); | |
return; | |
} | |
if (!(card[4] || card[8] || card[16] || card[20])) { | |
atomicAdd(&result[i], 1); | |
return; | |
} | |
break; | |
} | |
} | |
} | |
} | |
int main(void) | |
{ | |
const unsigned int N_THREAD = 1024; | |
const unsigned int N_BLOCK = 1024; | |
srand(12345); | |
uint8_t seq_host[75]; | |
for (int i=0; i<75; i++) seq_host[i] = i + 1; | |
for (int i=0; i<75; i++) { | |
int t = rand() % (75 - i); | |
uint8_t sw; | |
sw = seq_host[i]; | |
seq_host[i] = seq_host[i + t]; | |
seq_host[i + t] = sw; | |
} | |
cudaMemcpyToSymbol(seq, seq_host, 75); | |
unsigned int *result; | |
unsigned int host_result[75] = {}; | |
cudaMalloc((void**)&result, sizeof(unsigned int) * 75); | |
cudaMemset(result, 0, sizeof(unsigned int) * 75); | |
bingo<<<N_BLOCK, N_THREAD>>>(result); | |
cudaMemcpy(host_result, result, sizeof(unsigned int) * 75, cudaMemcpyDeviceToHost); | |
for(int i=0;i<75;i++) { | |
printf("%2d\t: %6d\n", i + 1, host_result[i]); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment