Skip to content

Instantly share code, notes, and snippets.

@ClementPinard
Forked from zou3519/bench.py
Created June 12, 2018 11:25
Show Gist options
  • Save ClementPinard/54cb962b57951d1d5e10f5c23e1c9ff3 to your computer and use it in GitHub Desktop.
Save ClementPinard/54cb962b57951d1d5e10f5c23e1c9ff3 to your computer and use it in GitHub Desktop.
[pytorch] GridSampler CUDNN vs THCUNN performance comparision script
import time
import torch
import torch.backends.cudnn as cudnn
import torch.nn.functional as F
from torch.autograd import Variable
def benchmark_shape(N, C, IH, IW, H, W, nrand, nrep):
"""
Performs nrand*nrep trials.
"""
input_datas = [torch.randn(C, N, IH, IW) for i in range(0, nrand)]
grid_datas = [torch.randn(H, N, W, 2) for i in range(0, nrand)]
datas = zip(input_datas, grid_datas)
# print "Running CPU benchmark"
# cpu_results = benchmark_helper(workload_cpu, datas, nrep);
print "Running CUDNN benchmark"
cudnn_results = benchmark_helper(workload_cudnn, datas, nrep);
assert(cudnn.enabled)
cudnn.enabled = False
print "Running THCUNN benchmark"
cuda_results = benchmark_helper(workload_cuda, datas, nrep);
cudnn.enabled = True
def check_shapes(N, C, IH, IW, H, W):
input_cpu = Variable(torch.randn(C, N, IH, IW).transpose(0, 1), \
requires_grad=True)
grid_cpu = Variable(torch.randn(H, N, W, 2).transpose(0, 1), \
requires_grad=True)
out_cpu = F.grid_sample(input_cpu, grid_cpu)
assert(out_cpu.size() == torch.Size([N, C, H, W]))
input_cuda = Variable(input_cpu.data.transpose(0, 1).cuda().transpose(0, 1), requires_grad=True)
grid_cuda = Variable(grid_cpu.data.transpose(0, 1).cuda().transpose(0, 1), requires_grad=True)
cudnn.enabled = False
out_cuda = F.grid_sample(input_cuda, grid_cuda)
cudnn.enabled = True
assertTensorsEqual(out_cpu, out_cuda)
input_cudnn = Variable(input_cpu.data.transpose(0, 1).cuda().transpose(0, 1), requires_grad=True)
grid_cudnn = Variable(grid_cpu.data.transpose(0, 1).cuda().transpose(0, 1), requires_grad=True)
out_cudnn = F.grid_sample(input_cudnn, grid_cudnn)
assertTensorsEqual(out_cpu, out_cudnn)
gradients = out_cpu.data.new(out_cpu.size()).normal_()
out_cpu.backward(gradients)
gradients_cuda = gradients.cuda()
cudnn.enabled = False
out_cuda.backward(gradients_cuda)
cudnn.enabled= True
out_cudnn.backward(gradients_cuda)
assertTensorsEqual(input_cpu.grad, input_cuda.grad, msg="A")
assertTensorsEqual(input_cpu.grad, input_cudnn.grad, msg="B")
assertTensorsEqual(input_cudnn.grad, input_cuda.grad, msg="C")
assertTensorsEqual(grid_cpu.grad, grid_cuda.grad, msg="D")
assertTensorsEqual(grid_cpu.grad, grid_cudnn.grad, msg="E")
assertTensorsEqual(grid_cuda.grad, grid_cudnn.grad, msg="F")
def benchmark_helper(workload_fn, datas, nrep):
start = time.time()
result = []
for (input_data, grid_data) in datas:
for i in range(0, nrep):
out = (workload_fn(input_data, grid_data))
result.append(out)
end = time.time()
print (end - start)
return result
def assertTensorsEqual(a, b, prec=1e-5, msg=''):
assert(a.size() == b.size())
a = a.cuda()
b = b.cuda()
diff = a - b
if diff.is_signed():
diff = diff.abs()
max_err = diff.max().data[0]
if (max_err > prec):
print msg
print "Error was " + str(max_err)
def workload_cpu(input_data, grid_data):
input = Variable(input_data.transpose(0, 1), requires_grad=True)
grid = Variable(grid_data.transpose(0, 1), requires_grad=True)
out = F.grid_sample(input, grid)
grads = out.data.new(out.size()).normal_()
out.backward(grads)
del input
del grid
del out
def workload_cudnn(input_data, grid_data):
assert(cudnn.enabled)
workload_cuda_helper(input_data, grid_data)
def workload_cuda(input_data, grid_data):
assert(not cudnn.enabled)
workload_cuda_helper(input_data, grid_data)
def workload_cuda_helper(input_data, grid_data):
input = Variable(input_data.transpose(0, 1).cuda(), requires_grad=True)
grid = Variable(grid_data.transpose(0, 1).cuda(), requires_grad=True)
out = F.grid_sample(input, grid)
grads = out.data.new(out.size()).normal_()
out.backward(grads)
del input
del grid
del out
if __name__ == "__main__":
# benchmark_shape(N, C, IH, IW, H, W, nrand, nrep)
print "Testing small sizes"
benchmark_shape(10, 5, 20, 20, 15, 15, 5, 5)
print ""
print "Testing small sizes, big N"
benchmark_shape(500, 5, 20, 20, 15, 15, 5, 5)
print ""
print "Testing large sizes"
benchmark_shape(50, 10, 100, 100, 100, 100, 5, 5)
print ""
print "Testing large sizes, small C"
benchmark_shape(50, 5, 100, 100, 100, 100, 5, 5)
print ""
print "Testing large N"
benchmark_shape(500, 10, 50, 50, 50, 50, 5, 5)
print ""
print "Testing large C"
benchmark_shape(50, 100, 50, 50, 50, 50, 5, 5)
print ""
print "Testing large input"
benchmark_shape(50, 10, 500, 500, 80, 80, 5, 5)
print ""
print "Testing large output"
benchmark_shape(50, 10, 80, 80, 500, 500, 5, 5)
print ""
# check_shapes(100, 8, 100, 100, 60, 60)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment