Created
August 16, 2016 18:03
-
-
Save sklam/2ff89e40721d1f1a007449f02aee3990 to your computer and use it in GitHub Desktop.
Numba, PyCUDA, OpenGL interop. Adapted from https://wiki.tiker.net/PyCuda/Examples/GlInterop
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# GL interoperability example, by Peter Berrington. | |
# Draws a rotating teapot, using cuda to invert the RGB value | |
# each frame | |
from OpenGL.GL import * | |
from OpenGL.GLUT import * | |
from OpenGL.GLU import * | |
from OpenGL.GL.ARB.vertex_buffer_object import * | |
from OpenGL.GL.ARB.pixel_buffer_object import * | |
import numpy, sys, time | |
import pycuda.driver as cuda_driver | |
import pycuda.gl as cuda_gl | |
from pycuda.compiler import SourceModule | |
from numba import cuda as nbcuda | |
import ctypes | |
#this is all munged together from the CUDA SDK postprocessGL example. | |
initial_size = 512,512 | |
current_size = initial_size | |
animate = True | |
enable_cuda = True | |
window = None # Number of the glut window. | |
time_of_last_draw = 0.0 | |
time_of_last_titleupdate = 0.0 | |
frames_per_second = 0.0 | |
frame_counter = 0 | |
output_texture = None # pointer to offscreen render target | |
(source_pbo, dest_pbo, cuda_module, invert, | |
pycuda_source_pbo, pycuda_dest_pbo) = [None]*6 | |
heading,pitch,bank = [0.0]*3 | |
class ExternalMemory(object): | |
""" | |
Provide an externally managed memory. | |
Interface requirement: __cuda_memory__, device_ctypes_pointer, _cuda_memize_ | |
""" | |
__cuda_memory__ = True | |
def __init__(self, ptr, size): | |
self.device_ctypes_pointer = ctypes.c_void_p(ptr) | |
self._cuda_memsize_ = size | |
def create_PBOs(w,h): | |
global source_pbo, dest_pbo, pycuda_source_pbo, pycuda_dest_pbo | |
num_texels = w*h | |
data = numpy.zeros((num_texels,4),numpy.uint8) | |
source_pbo = glGenBuffers(1) | |
glBindBuffer(GL_ARRAY_BUFFER, source_pbo) | |
glBufferData(GL_ARRAY_BUFFER, data, GL_DYNAMIC_DRAW) | |
glBindBuffer(GL_ARRAY_BUFFER, 0) | |
pycuda_source_pbo = cuda_gl.BufferObject(long(source_pbo)) | |
dest_pbo = glGenBuffers(1) | |
glBindBuffer(GL_ARRAY_BUFFER, dest_pbo) | |
glBufferData(GL_ARRAY_BUFFER, data, GL_DYNAMIC_DRAW) | |
glBindBuffer(GL_ARRAY_BUFFER, 0) | |
pycuda_dest_pbo = cuda_gl.BufferObject(long(dest_pbo)) | |
def destroy_PBOs(): | |
global source_pbo, dest_pbo, pycuda_source_pbo, pycuda_dest_pbo | |
for pbo in [source_pbo, dest_pbo]: | |
glBindBuffer(GL_ARRAY_BUFFER, long(pbo)) | |
glDeleteBuffers(1, long(pbo)); | |
glBindBuffer(GL_ARRAY_BUFFER, 0) | |
source_pbo,dest_pbo,pycuda_source_pbo,pycuda_dest_pbo = [None]*4 | |
def create_texture(w,h): | |
global output_texture | |
output_texture = glGenTextures(1) | |
glBindTexture(GL_TEXTURE_2D, output_texture) | |
# set basic parameters | |
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE) | |
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE) | |
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST) | |
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST) | |
# buffer data | |
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, | |
w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, None) | |
def destroy_texture(): | |
global output_texture | |
glDeleteTextures(output_texture); | |
output_texture = None | |
def init_gl(): | |
Width, Height = current_size | |
glClearColor(0.1, 0.1, 0.5, 1.0) | |
glDisable(GL_DEPTH_TEST) | |
glViewport(0, 0, Width, Height) | |
glMatrixMode(GL_PROJECTION); | |
glLoadIdentity(); | |
gluPerspective(60.0, Width/float(Height), 0.1, 10.0) | |
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL) | |
glEnable(GL_LIGHT0) | |
red = ( 1.0, 0.1, 0.1, 1.0 ) | |
white = ( 1.0, 1.0, 1.0, 1.0 ) | |
glMaterialfv(GL_FRONT_AND_BACK, GL_DIFFUSE, red ) | |
glMaterialfv(GL_FRONT_AND_BACK, GL_SPECULAR, white) | |
glMaterialf( GL_FRONT_AND_BACK, GL_SHININESS, 60.0) | |
def resize(Width, Height): | |
global current_size | |
current_size = Width, Height | |
glViewport(0, 0, Width, Height) # Reset The Current Viewport And Perspective Transformation | |
glMatrixMode(GL_PROJECTION) | |
glLoadIdentity() | |
gluPerspective(60.0, Width/float(Height), 0.1, 10.0) | |
def do_tick(): | |
global time_of_last_titleupdate, frame_counter, frames_per_second | |
if ((time.clock () * 1000.0) - time_of_last_titleupdate >= 1000.): | |
frames_per_second = frame_counter # Save The FPS | |
frame_counter = 0 # Reset The FPS Counter | |
szTitle = "%d FPS" % (frames_per_second ) | |
glutSetWindowTitle ( szTitle ) | |
time_of_last_titleupdate = time.clock () * 1000.0 | |
frame_counter += 1 | |
# The function called whenever a key is pressed. Note the use of Python tuples to pass in: (key, x, y) | |
def keyPressed(*args): | |
global animate, enable_cuda | |
# If escape is pressed, kill everything. | |
if args[0] == '\033': | |
print ('Closing..') | |
destroy_PBOs() | |
destroy_texture() | |
exit() | |
elif args[0] == 'a': | |
print ('toggling animation') | |
animate = not animate | |
elif args[0] == 'e': | |
print ('toggling cuda') | |
enable_cuda = not enable_cuda | |
def idle(): | |
global heading, pitch, bank | |
if animate: | |
heading += 0.2 | |
pitch += 0.6 | |
bank += 1.0 | |
glutPostRedisplay() | |
def display(): | |
try: | |
render_scene() | |
if enable_cuda: | |
process_image() | |
display_image() | |
glutSwapBuffers() | |
except: | |
from traceback import print_exc | |
print_exc() | |
from os import _exit | |
_exit(0) | |
def process(width, height): | |
""" Use PyCuda """ | |
grid_dimensions = (width//16,height//16) | |
source_mapping = pycuda_source_pbo.map() | |
dest_mapping = pycuda_dest_pbo.map() | |
# invert.prepared_call(grid_dimensions, (16, 16, 1), | |
# source_mapping.device_ptr(), | |
# dest_mapping.device_ptr()) | |
shape = width * height * 4 | |
# get external memory for numba.cuda | |
source_ptr = ExternalMemory(source_mapping.device_ptr(), shape) | |
dest_ptr = ExternalMemory(dest_mapping.device_ptr(), shape) | |
# make them a device arrays | |
source_array = nbcuda.devicearray.DeviceNDArray(shape=shape, strides=(1,), | |
dtype=numpy.dtype('uint8'), | |
gpu_data=source_ptr) | |
dest_array = nbcuda.devicearray.DeviceNDArray(shape=shape, strides=(1,), | |
dtype=numpy.dtype('uint8'), | |
gpu_data=dest_ptr) | |
# call our kernel | |
invert[grid_dimensions, (16, 16)](source_array, dest_array) | |
cuda_driver.Context.synchronize() | |
source_mapping.unmap() | |
dest_mapping.unmap() | |
def process_image(): | |
""" copy image and process using CUDA """ | |
global pycuda_source_pbo,source_pbo,current_size, dest_pbo | |
image_width, image_height = current_size | |
assert source_pbo is not None | |
# tell cuda we are going to get into these buffers | |
pycuda_source_pbo.unregister() | |
# activate destination buffer | |
glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, long(source_pbo)) | |
# read data into pbo. note: use BGRA format for optimal performance | |
glReadPixels( | |
0, #start x | |
0, #start y | |
image_width, #end x | |
image_height, #end y | |
GL_BGRA, #format | |
GL_UNSIGNED_BYTE, #output type | |
ctypes.c_void_p(0)) | |
pycuda_source_pbo = cuda_gl.BufferObject(long(source_pbo)) | |
# run the Cuda kernel | |
process(image_width, image_height) | |
# blit convolved texture onto the screen | |
# download texture from PBO | |
glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, long(dest_pbo)) | |
glBindTexture(GL_TEXTURE_2D, output_texture) | |
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, | |
image_width, image_height, | |
GL_BGRA, GL_UNSIGNED_BYTE, ctypes.c_void_p(0)) | |
def display_image(): | |
""" render a screen sized quad """ | |
glDisable(GL_DEPTH_TEST) | |
glDisable(GL_LIGHTING) | |
glEnable(GL_TEXTURE_2D) | |
glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE) | |
glMatrixMode(GL_PROJECTION) | |
glPushMatrix() | |
glLoadIdentity() | |
glOrtho(-1.0, 1.0, -1.0, 1.0, -1.0, 1.0) | |
glMatrixMode( GL_MODELVIEW) | |
glLoadIdentity() | |
glViewport(0, 0, current_size[0], current_size[1]) | |
glBegin(GL_QUADS) | |
glTexCoord2f(0.0, 0.0) | |
glVertex3f(-1.0, -1.0, 0.5) | |
glTexCoord2f(1.0, 0.0) | |
glVertex3f(1.0, -1.0, 0.5) | |
glTexCoord2f(1.0, 1.0) | |
glVertex3f(1.0, 1.0, 0.5) | |
glTexCoord2f(0.0, 1.0) | |
glVertex3f(-1.0, 1.0, 0.5) | |
glEnd() | |
glMatrixMode(GL_PROJECTION) | |
glPopMatrix() | |
glDisable(GL_TEXTURE_2D) | |
glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0) | |
glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0) | |
def render_scene(): | |
glClear (GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT)# Clear Screen And Depth Buffer | |
glMatrixMode(GL_MODELVIEW) | |
glLoadIdentity () # Reset The Modelview Matrix | |
glTranslatef(0.0, 0.0, -3.0); | |
glRotatef(heading, 1.0, 0.0, 0.0) | |
glRotatef(pitch , 0.0, 1.0, 0.0) | |
glRotatef(bank , 0.0, 0.0, 1.0) | |
glViewport(0, 0, current_size[0],current_size[1]) | |
glEnable(GL_LIGHTING) | |
glEnable(GL_DEPTH_TEST) | |
glDepthFunc(GL_LESS) | |
glutSolidTeapot(1.0) | |
do_tick()#just for fps display.. | |
return True | |
def main(): | |
global window, cuda_module, cuda_gl, cuda_driver, invert | |
glutInit(sys.argv) | |
glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE | GLUT_ALPHA | GLUT_DEPTH) | |
glutInitWindowSize(*initial_size) | |
glutInitWindowPosition(0, 0) | |
window = glutCreateWindow("PyCuda GL Interop Example") | |
glutDisplayFunc(display) | |
glutIdleFunc(idle) | |
glutReshapeFunc(resize) | |
glutKeyboardFunc(keyPressed) | |
glutSpecialFunc(keyPressed) | |
init_gl() | |
# create texture for blitting to screen | |
create_texture(*initial_size) | |
#setup pycuda gl interop | |
import pycuda.gl.autoinit | |
import pycuda.gl | |
cuda_gl = pycuda.gl | |
cuda_driver = pycuda.driver | |
# cuda_module = SourceModule(""" | |
# __global__ void invert(unsigned char *source, unsigned char *dest) | |
# { | |
# int block_num = blockIdx.x + blockIdx.y * gridDim.x; | |
# int thread_num = threadIdx.y * blockDim.x + threadIdx.x; | |
# int threads_in_block = blockDim.x * blockDim.y; | |
# //Since the image is RGBA we multiply the index 4. | |
# //We'll only use the first 3 (RGB) channels though | |
# int idx = 4 * (threads_in_block * block_num + thread_num); | |
# dest[idx ] = 255 - source[idx ]; | |
# dest[idx+1] = 255 - source[idx+1]; | |
# dest[idx+2] = 255 - source[idx+2]; | |
# } | |
# """) | |
# invert = cuda_module.get_function("invert") | |
# # The argument "PP" indicates that the invert function will take two PBOs as arguments | |
# invert.prepare("PP") | |
# force compilation here | |
@nbcuda.jit("(uint8[::1], uint8[::1])") | |
def invert(source, dest): | |
block_num = nbcuda.blockIdx.x + nbcuda.blockIdx.y * nbcuda.gridDim.x | |
thread_num = nbcuda.threadIdx.y * nbcuda.blockDim.x + nbcuda.threadIdx.x | |
threads_in_block = nbcuda.blockDim.x * nbcuda.blockDim.y | |
# Since the image is RGBA we multiply the index 4. | |
# We'll only use the first 3 (RGB) channels though | |
idx = 4 * (threads_in_block * block_num + thread_num) | |
dest[idx] = 255 - source[idx] | |
dest[idx + 1] = 255 - source[idx + 1] | |
dest[idx + 2] = 255 - source[idx + 2] | |
# create source and destination pixel buffer objects for processing | |
create_PBOs(*initial_size) | |
glutMainLoop() | |
# Print message to console, and kick off the main to get it rolling. | |
if __name__ == "__main__": | |
print("Hit ESC key to quit, 'a' to toggle animation, and 'e' to toggle cuda") | |
main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment