Skip to content

Instantly share code, notes, and snippets.

@hughperkins
Created May 28, 2016 22:19
Show Gist options
  • Save hughperkins/c6cce1308620dd1b9d66c621c2520556 to your computer and use it in GitHub Desktop.
Save hughperkins/c6cce1308620dd1b9d66c621c2520556 to your computer and use it in GitHub Desktop.
from neon.layers import Convolution
from neon.initializers import Gaussian
from neon.backends import gen_backend
import numpy as np
import pycuda.driver as cuda
#import pycuda.autoinit
import pycuda.gpuarray as gpuarray
import time
image_size = 224
batch_size = 128
input_filters = 32
output_filters = 32
gen_backend(backend='gpu', batch_size=batch_size,
datatype=np.float32, device_id=0)
init = Gaussian()
conv = Convolution((3, 3, output_filters), strides=1, padding=1, init=init)
#inputs = np.zeros((batch_size,image_size, image_size,input_filters), dtype=np.float32)
inputs = np.zeros((input_filters,image_size, image_size,batch_size), dtype=np.float32)
inputs[:] = np.random.randn(*inputs.shape)
inputs_cuda = gpuarray.to_gpu(inputs)
conv.configure((input_filters,image_size, image_size))
print('configure done')
conv.allocate()
print('allocate done')
conv.fprop(inputs_cuda)
for it in range(10):
start = time.time()
for i in range(10):
conv.fprop(inputs_cuda)
cuda.Context.synchronize()
print('time=', time.time() - start)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment