Last active
October 19, 2017 14:33
-
-
Save belltailjp/f33f3bafb70865ec13fc314a632a4928 to your computer and use it in GitHub Desktop.
Draw 2D Gaussian Image using Cupy
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
def measure_time_in_ms(func, n_times, size, sigma): | |
start = time.time() | |
for _ in range(n_times): | |
out = func(size, sigma) | |
assert(out.shape == (size, size)) | |
elapsed_sec = time.time() - start | |
return elapsed_sec / n_times * 1000 | |
sizes = [2 ** t for t in range(3, 15)] | |
n_times = 50 | |
print('size numpy cupy cupy_eltw numpy_sendtoGPU') | |
for size in sizes: | |
# Kernel build will run at the first call, so exclude it from time measurement | |
gen_2d_gaussian_cupy(size, size / 5) | |
gen_2d_gaussian_cupy_elementwise(size, size / 5) | |
numpy_ms = measure_time_in_ms(gen_2d_gaussian_numpy, n_times, size, size / 5) | |
cupy_ms = measure_time_in_ms(gen_2d_gaussian_cupy, n_times, size, size / 5) | |
cupy_eltw_ms = measure_time_in_ms(gen_2d_gaussian_cupy_elementwise, n_times, size, size / 5) | |
numpy_togpu_ms = measure_time_in_ms(gen_2d_gaussian_numpy_sendtoGPU, n_times, size, size / 5) | |
print(f'{size} {numpy_ms} {cupy_ms} {cupy_eltw_ms} {numpy_togpu_ms}') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <chrono> | |
__global__ void gen_2d_gaussian_kernel(float *buf, int size, float sigma, int cx, int cy) | |
{ | |
const int x = blockDim.x * blockIdx.x + threadIdx.x; | |
const int y = blockDim.y * blockIdx.y + threadIdx.y; | |
if(x < 0 || size <= x || y < 0 || size <= y) return; | |
const int dx = x - cx, dy = y - cy; | |
const float d = sqrtf(dx * dx + dy * dy); | |
buf[y * size + x] = expf(-d * d / (2 * sigma * sigma)); | |
} | |
void gen_2d_gaussian(float *buf_gpu, int size, float sigma) | |
{ | |
cudaStream_t stream; | |
cudaStreamCreate(&stream); | |
if(size <= 32) | |
{ | |
dim3 threads(size, size); | |
gen_2d_gaussian_kernel<<<1, threads, 0, stream>>>(buf_gpu, size, sigma, size / 2, size / 2); | |
}else{ | |
dim3 threads(32, 32); | |
dim3 block(size / 32, size / 32); | |
gen_2d_gaussian_kernel<<<block, threads, 0, stream>>>(buf_gpu, size, sigma, size / 2, size / 2); | |
} | |
cudaStreamSynchronize(stream); | |
cudaStreamDestroy(stream); | |
} | |
double measure_time_in_ms(int size, int n_times, float sigma) | |
{ | |
auto start = std::chrono::high_resolution_clock::now(); | |
for(int i = 0; i < n_times; ++i) | |
{ | |
float *buf; | |
cudaMalloc(&buf, sizeof(float) * size * size); | |
gen_2d_gaussian(buf, size, sigma); | |
cudaFree(buf); | |
} | |
auto duration = std::chrono::high_resolution_clock::now() - start; | |
return std::chrono::duration_cast<std::chrono::microseconds>(duration).count() / 1000.0 / n_times; | |
} | |
int main() | |
{ | |
for(int size = 8; size <= 16384; size *= 2) | |
{ | |
measure_time_in_ms(size, 100, size / 5.0f); // To eliminate first kernel call overhead | |
std::cout << size << " " << measure_time_in_ms(size, 50, size / 5.0f) << "ms" << std::endl; | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cupy | |
def gen_2d_gaussian_cupy(size, sigma): | |
x = cupy.arange(0, size, dtype=cupy.float32) | |
gaussian_1d = cupy.exp(-(x - size / 2)**2.0 / (2 * sigma**2)) | |
return cupy.outer(gaussian_1d, gaussian_1d) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cupy | |
# Create the instance only once, in order to prevent building kernel every time the function is called | |
kernel = cupy.ElementwiseKernel('T x, T y, T cx, T cy, T sigma', | |
'T out', | |
''' | |
T dx = x - cx, dy = y - cy; | |
T d = sqrtf(dx * dx + dy * dy); | |
out = expf(-d * d / (2 * sigma * sigma)); | |
''', | |
'gen_2d_gaussian') | |
def gen_2d_gaussian_cupy_elementwise(size, sigma): | |
rng = cupy.arange(size, dtype=cupy.float32) | |
x, y = cupy.meshgrid(rng, rng) | |
return kernel(x, y, size / 2, size / 2, sigma) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy, scipy.signal | |
def gen_2d_gaussian_numpy(size, sigma): | |
ar = scipy.signal.gaussian(size, std=sigma).astype(numpy.float32) | |
return numpy.outer(ar, ar) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment