Soumith Chintala soumith

/\︿╱\ _____________________________ \0_ 0 /╱\╱________________________ \▁︹_/

soumith / VolumetricBatchNormalization.lua

Created December 16, 2015 01:08

	--[[
	This file implements Batch Normalization as described in the paper:
	"Batch Normalization: Accelerating Deep Network Training
	by Reducing Internal Covariate Shift"
	by Sergey Ioffe, Christian Szegedy

	This implementation is useful for inputs coming from convolution layers.
	For Non-convolutional layers, see BatchNormalization.lua

	The operation implemented is:

soumith / gist:4766a592cb3645035ef8

Created November 6, 2015 17:25

alexnet-owt-bn 28 epoch convergence

	2015-03-28 22:57:04.475 Epoch: [1][10000/10000] Time 0.304 DataTime 0.005 Err 3.9184
	2015-03-28 22:57:04.476 ==> Validation epoch # 1
	2015-03-28 22:58:07.329 json_stats: {"learningRate":0.25,"batchSize":128,"train_loss":4.7269560290813,"manualSeed":1,"msra_mul":0,"decay":0.5,"backend":"cudnn","epochSize":10000,"nDonkeys":16,"train_time":3231.9889090061,"test_accuracy":14.646,"weightDecay":0.0005,"epoch":1,"nEpochs":30,"model":"alexnetowtbn","momentum":0.9,"test_loss":4.6024402600098,"GPU":1,"retrain":"","train_accuracy":13.362265625,"test_time":62.852720022202,"best_accuracy":0,"bestAccuracy":0,"nGPU":4}
	--
	2015-03-28 23:53:36.332 Epoch: [2][10000/10000] Time 0.286 DataTime 0.006 Err 3.7128
	2015-03-28 23:53:36.332 ==> Validation epoch # 2
	2015-03-28 23:54:37.711 json_stats: {"learningRate":0.25,"batchSize":128,"train_loss":3.6877586714745,"manualSeed":1,"msra_mul":0,"decay":0.5,"backend":"cudnn","epochSize":10000,"nDonkeys":16,"train_time":3214.1851511002,"test_accuracy":16.416,"weightDecay":0.0005,"ep

soumith / gist:3f87517ad69e1620c70d

Created June 22, 2015 21:35

LSUN eyescream

	model_G = nn.Sequential()
	model_G:add(nn.JoinTable(2, 2))
	model_G:add(cudnn.SpatialConvolutionUpsample(3+1, 64, 7, 7, 1, 1)):add(cudnn.ReLU(true))
	model_G:add(nn.SpatialBatchNormalization(64, nil, nil, false))
	model_G:add(cudnn.SpatialConvolutionUpsample(64, 368, 7, 7, 1, 4)):add(cudnn.ReLU(true))
	model_G:add(nn.SpatialBatchNormalization(368, nil, nil, false))
	model_G:add(nn.SpatialDropout(0.5))
	model_G:add(cudnn.SpatialConvolutionUpsample(368, 128, 7, 7, 1, 4)):add(cudnn.ReLU(true))
	model_G:add(nn.SpatialBatchNormalization(128, nil, nil, false))
	model_G:add(nn.FeatureLPPooling(2,2,2,true))

soumith / gist:e3f722173ea16c1ea0d9

Created June 22, 2015 21:34

CIFAR-10 eyescream

	----------------------------------------------------------------------
	-- CIFAR 8x8
	opt.scale = 8
	opt.geometry = {3, opt.scale, opt.scale}
	local input_sz = opt.geometry[1] * opt.geometry[2] * opt.geometry[3]
	local numhid = 600
	model_D = nn.Sequential()
	model_D:add(nn.Reshape(input_sz))
	model_D:add(nn.Linear(input_sz, numhid))
	model_D:add(nn.ReLU())

soumith / gist:f2605f9f0a632e02b4fb

Created March 25, 2015 00:00

lstmrnn-digitsbox

	~/code/lstm$ th main.lua
	Loading ./data/ptb.train.txt, size of data = 929589
	Loading ./data/ptb.valid.txt, size of data = 73760
	Loading ./data/ptb.test.txt, size of data = 82430
	Using 1-th gpu
	Network parameters:
	{
	layers : 2
	lr : 1
	max_max_epoch : 13

soumith / gist:d364081b2fa70f8cc307

Created February 25, 2015 21:50

overfeat-small full log

	overfeat schedule
	=================
	LR=0.01,epochNumber=1,weightDecay=5e-4
	LR=0.005,epochNumber=20,weightDecay=5e-4
	LR=0.001,epochNumber=33,weightDecay=0
	LR=0.0005,epochNumber=48,weightDecay=0
	Run till epoch 76

	Epoch logs
	=============

soumith / illegal_access.log

Created December 29, 2014 08:14

soumith / gist:4a1339a078b4c0f7fd2b

Created December 25, 2014 04:03

cudnn sigmoid bug reproduction

This file has been truncated, but you can view the full file.

	cudnnActivationBackward bug (sigmoid)
	this file has printed states of the following:
	src (output)
	srcdiff (gradOutput)
	dest (input)
	destdiff (gradInput)

	look at destdiff, and notice the numerous nans produced
	--------------------------------------------------------------
	src (output)

soumith / benchmarks prelim

Created December 18, 2014 02:44

benchmarks prelim

	All timings averaged over 10 runs and in miliseconds on a Tesla K40m with ECC off.
	Time combines forward + backwardInputs + backwardGradients
	###############################################################################
	ModelType: AlexNet Kernels: cudnn Input shape: 128x3x224x224
	cudnn :updateOutput(): 147.32
	cudnn :updateGradInput(): 167.79
	cudnn :accGradParameters(): 153.96
	cudnn :TOTAL: 469.07
	ModelType: AlexNet Kernels: nn Input shape: 128x3x224x224
	nn :updateOutput(): 201.53

soumith / gist:0f95facad88cbea68c6d

Created November 20, 2014 04:24

linear with no bias

	local Linear, parent = torch.class('nn.NoBiasLinear', 'nn.Linear')

	function Linear:__init(inputSize, outputSize)
	parent.__init(self, inputSize, outputSize)

	self.bias:fill(0)
	end


	function Linear:accGradParameters(input, gradOutput, scale)