soumith · December 19, 2015 20:09
diff --git a/gistfile1.lua b/gistfile1.lua
 #!/usr/bin/env torch
 require 'nn'
 require 'image'
 require 'xlua'
 require 'pl'


 opt = lapp[[
 		 -t,--threads            (default 8)           number of threads
   -p,--type               (default float)       float or cuda
   -i,--devid              (default 1)           device ID (if using CUDA)
 ]]

 p = xlua.Profiler()
 torch.setnumthreads(opt.threads)
 torch.manualSeed(1)
 torch.setdefaulttensortype('torch.FloatTensor')


 if opt.type == 'cuda' then
   print('==> switching to CUDA')
   require 'cunn'
   cutorch.setDevice(opt.devid)
   print('==> using GPU #' .. cutorch.getDevice())

   nn.SpatialConvolutionMM = nn.SpatialConvolution
 end

 iH = 192
 iW = 192
 fin = 16
 fout = 32
 kH = 10
 kW = 10
 batchSize = 128

 -- input:
 lena1 = torch.Tensor(batchSize,fin,iH,iW)

 -- model to test:
 model = nn.SpatialConvolution(fin, fout, kW, kW)

 -- copy to GPU if desired:
 if opt.type == 'cuda' then
   model = nn.SpatialConvolutionCUDA(fin, fout, kW, kW):cuda()
   lena1 = torch.CudaTensor(fin,iH,iW,batchSize)
 end

 -- test speed:
 p:start('spatialconv')
 lena2 = model:forward(lena1)
 if opt.type == 'cuda' then cutorch.synchronize() end
 p:lap('spatialconv')

 p:printAll{}


 print('Gops/s:', ( batchSize*fin*fout*kH*kW*((iH-kH)+1)*((iW-kW)+1)*2 ) / p:cpu('spatialconv') / 1e9 ) -- 2 operations MUL, ACC
	#!/usr/bin/env torch
	require 'nn'
	require 'image'
	require 'xlua'
	require 'pl'


	opt = lapp[[
	-t,--threads (default 8) number of threads
	-p,--type (default float) float or cuda
	-i,--devid (default 1) device ID (if using CUDA)
	]]

	p = xlua.Profiler()
	torch.setnumthreads(opt.threads)
	torch.manualSeed(1)
	torch.setdefaulttensortype('torch.FloatTensor')


	if opt.type == 'cuda' then
	print('==> switching to CUDA')
	require 'cunn'
	cutorch.setDevice(opt.devid)
	print('==> using GPU #' .. cutorch.getDevice())

	nn.SpatialConvolutionMM = nn.SpatialConvolution
	end

	iH = 192
	iW = 192
	fin = 16
	fout = 32
	kH = 10
	kW = 10
	batchSize = 128

	-- input:
	lena1 = torch.Tensor(batchSize,fin,iH,iW)

	-- model to test:
	model = nn.SpatialConvolution(fin, fout, kW, kW)

	-- copy to GPU if desired:
	if opt.type == 'cuda' then
	model = nn.SpatialConvolutionCUDA(fin, fout, kW, kW):cuda()
	lena1 = torch.CudaTensor(fin,iH,iW,batchSize)
	end

	-- test speed:
	p:start('spatialconv')
	lena2 = model:forward(lena1)
	if opt.type == 'cuda' then cutorch.synchronize() end
	p:lap('spatialconv')

	p:printAll{}


	print('Gops/s:', ( batchSizefinfoutkHkW((iH-kH)+1)((iW-kW)+1)*2 ) / p:cpu('spatialconv') / 1e9 ) -- 2 operations MUL, ACC