Skip to content

Instantly share code, notes, and snippets.

@ZFTurbo
Created June 11, 2024 13:28
Show Gist options
  • Save ZFTurbo/08f84764ecd070249235c74882065809 to your computer and use it in GitHub Desktop.
Save ZFTurbo/08f84764ecd070249235c74882065809 to your computer and use it in GitHub Desktop.
PyCuda image resize
# coding: utf-8
__author__ = 'ZFTurbo: https://github.com/ZFTurbo/'
if __name__ == '__main__':
import os
gpu_use = "0"
print('GPU use: {}'.format(gpu_use))
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "{}".format(gpu_use)
import os
import glob
import pycuda.autoinit
import pycuda.driver as cuda
import cv2
from pycuda.compiler import SourceModule
import numpy as np
def show_image(im, name='image'):
cv2.imshow(name, im.astype(np.uint8))
cv2.waitKey(0)
cv2.destroyAllWindows()
kernel_code = """
__global__ void resizeImage(const unsigned char* input, float* output, int input_width, int input_height, int output_width, int output_height)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < output_width && y < output_height)
{
float scale_x = (float)input_width / output_width;
float scale_y = (float)input_height / output_height;
int src_x = (int)(x * scale_x);
int src_y = (int)(y * scale_y);
int src_index = (src_y * input_width + src_x) * 3;
int dst_index = (y * output_width + x) * 3;
output[dst_index] = (float)input[src_index];
output[dst_index + 1] = (float)input[src_index + 1];
output[dst_index + 2] = (float)input[src_index + 2];
}
}
"""
mod = SourceModule(kernel_code)
resize_func = mod.get_function("resizeImage")
block_size = (16, 16, 1)
grid_size = ((320 - 1) // block_size[0] + 1, (320 - 1) // block_size[1] + 1, 1)
if __name__ == '__main__':
img_path = 'example.png'
img = cv2.imread(img_path)
show_image(img)
img_resized = np.zeros((180, 320, 3), dtype=np.float32)
img_gpu = cuda.mem_alloc(img.nbytes)
resized_img_gpu = cuda.mem_alloc(img_resized.nbytes)
cuda.memcpy_htod(img_gpu, img)
inp_h, inp_w = img.shape[:2]
out_w, out_h = 320, 180
resize_func(
img_gpu,
resized_img_gpu,
np.int32(inp_w),
np.int32(inp_h),
np.int32(out_w),
np.int32(out_h),
block=block_size,
grid=grid_size
)
cuda.memcpy_dtoh(img_resized, resized_img_gpu)
print(img_resized.shape)
show_image(img_resized.astype(np.float32))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment