culurciello · November 8, 2016 14:29
diff --git a/gistfile1.lua b/gistfile1.lua
 #!/usr/bin/env torch
 require 'nn'
 require 'image'
 require 'xlua'
 require 'pl'


 opt = lapp[[
   -t,--threads            (default 8)           number of threads
   -p,--type               (default float)       float or cuda
   -i,--devid              (default 1)           device ID (if using CUDA)
 ]]

 p = xlua.Profiler()
 torch.setnumthreads(opt.threads)
 torch.manualSeed(1)
 torch.setdefaulttensortype('torch.FloatTensor')


 if opt.type == 'cuda' then
   print('==> switching to CUDA')
   require 'cunn'
   cutorch.setDevice(opt.devid)
   print('==> using GPU #' .. cutorch.getDevice())

   nn.SpatialConvolutionMM = nn.SpatialConvolution
 end

 -- input:
 lena1 = torch.Tensor(1,512,512)
 -- lena1 = image.lena()[{1}]:reshape(1,512,512)

 -- model to test:
 --model = nn.SpatialConvolution(1, 8, 9, 9)
 model = nn.SpatialConvolution(1, 16, 10, 10) -- 16 filters of 10x10 on a 512x512 image

 -- copy to GPU if desired:
 if opt.type == 'cuda' then
   model:cuda()
   lena1 = torch.CudaTensor(1,512,512)
 end

 -- test speed:
 p:start('spatialconv')
 lena2 = model:forward(lena1)
 if opt.type == 'cuda' then cutorch.synchronize() end
 p:lap('spatialconv')

 p:printAll{}


 print('Gops/s:', ( 16*10*10*((512-9)+1)*((512-9)+1)*2 ) / p:cpu('spatialconv') / 1e9 ) -- 2 operations MUL, ACC
	#!/usr/bin/env torch
	require 'nn'
	require 'image'
	require 'xlua'
	require 'pl'


	opt = lapp[[
	-t,--threads (default 8) number of threads
	-p,--type (default float) float or cuda
	-i,--devid (default 1) device ID (if using CUDA)
	]]

	p = xlua.Profiler()
	torch.setnumthreads(opt.threads)
	torch.manualSeed(1)
	torch.setdefaulttensortype('torch.FloatTensor')


	if opt.type == 'cuda' then
	print('==> switching to CUDA')
	require 'cunn'
	cutorch.setDevice(opt.devid)
	print('==> using GPU #' .. cutorch.getDevice())

	nn.SpatialConvolutionMM = nn.SpatialConvolution
	end

	-- input:
	lena1 = torch.Tensor(1,512,512)
	-- lena1 = image.lena()[{1}]:reshape(1,512,512)

	-- model to test:
	--model = nn.SpatialConvolution(1, 8, 9, 9)
	model = nn.SpatialConvolution(1, 16, 10, 10) -- 16 filters of 10x10 on a 512x512 image

	-- copy to GPU if desired:
	if opt.type == 'cuda' then
	model:cuda()
	lena1 = torch.CudaTensor(1,512,512)
	end

	-- test speed:
	p:start('spatialconv')
	lena2 = model:forward(lena1)
	if opt.type == 'cuda' then cutorch.synchronize() end
	p:lap('spatialconv')

	p:printAll{}


	print('Gops/s:', ( 161010((512-9)+1)((512-9)+1)*2 ) / p:cpu('spatialconv') / 1e9 ) -- 2 operations MUL, ACC
No results found