Skip to content

Instantly share code, notes, and snippets.

View soumith's full-sized avatar

Soumith Chintala soumith

View GitHub Profile
@soumith
soumith / gist:1f7645f14738d39be2b5
Created October 11, 2014 20:52
CuDNN SpatialMaxPooling bug
require 'cudnn'
require 'cunn'
local cudnntest = {}
local precision_forward = 1e-4
local precision_backward = 1e-2
local precision_jac = 1e-3
local nloop = 1
local times = {}
do
require 'torch'
local ffi = require 'ffi'
local argcheck = require 'argcheck'
local dataset = torch.class('torch.dataset')
local initcheck = argcheck{
pack=true,
{name="paths", type="table", help="Multiple paths of directories with images"},
numStrings = 10 -- for example, lets do 10, but this number can be anything upto memory limits
maxStringLength = 100 -- this has to be predetermined
-- allocate CharTensor
bigStringTensor = torch.CharTensor(numStrings, maxStringLength)
bst_data=torch.data(bigStringTensor) -- raw C pointer using torchffi
-- load some strings into the stringTensor
str='hello world'
Torch 7.0 Copyright (C) 2001-2011 Idiap, NEC Labs, NYU
<mnist> loading only 1000 examples
<mnist> reading 1000 examples with 784+1 dimensions...
<mnist> done
<mnist> loading only 1000 examples
<mnist> reading 1000 examples with 784+1 dimensions...
<mnist> done
1/10 - Recon err: 39.1
2/10 - Recon err: 25.0
3/10 - Recon err: 20.9
require 'paths'
local current_dir = 'torch7'
checkFile = function( current_dir )
local list = paths.dir( current_dir )
table.sort(list, function (a,b) return a<b end)
for i = 3, #list do
list[i] = paths.concat(current_dir, list[i])
if paths.filep( list[i] )then
print( ' ==> found file '.. list[i] )
CC := clang
AR := ar
NVCC := nvcc
CUDA_OBJS := cuda/cwc_convnet.o
DEFINE_MACROS := -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_CBLAS -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_SWSCALE -D HAVE_SSE2 -D HAVE_CUDA
CFLAGS := -msse2 $(DEFINE_MACROS)
NVFLAGS := --use_fast_math -arch=sm_30 $(DEFINE_MACROS)
LDFLAGS := -lm -lpng -ljpeg -lfftw3 -lfftw3f -lpthread -lblas -lavcodec -lavformat -lswscale -lcuda -lcudart -lcublas -L"/usr/local/cuda/lib64"
require 'torch'
require 'nn'
require 'image'
require 'gfx.js'
mod = nn.SpatialConvolution(5,10, 16, 16)
filters = mod.weight
gfx.image(filters:view(5*10, 16, 16), {zoom=5.0})
dataset=torch.Tensor{
{742313794,6385273,1362403386,80,169085576,0,1698277100,451,0},
{742313860,6385273,1362403386,80,169085576,0,1698300945,452,0},
{742338872,6385273,167840643,137,169085576,0,1718221933,480,0},
{742338872,6385273,167926801,53,169085576,0,1718222057,471,0},
{742338872,6385273,167846975,8014,169085576,0,1718222380,487,0},
{742338872,641958438357784123396879472047392017641614042483,167846975,8014,169085576,0,1718222453,517,0},
{742338934,6385273,167926801,53,169085576,0,1718273961,472,0},
{742338934,641958438357784123396879472047392017641614042483,167846975,8014,169085576,0,1718275304,519,0},
{742338934,6385273,167840643,137,169085576,0,1718275541,483,0},
require 'torch'
require 'nn'
require 'sys'
torch.setdefaulttensortype('torch.FloatTensor')
numInputNodes=30
numBatches=64
numHidden1=64
numHidden2=128
numOutputNodes=1
require 'torch'
require 'nn'
torch.setdefaulttensortype('torch.FloatTensor')
frameSize = 4 -- each input frame has 4 numbers
hiddenSize = 5 -- each hidden layer has 5 units
outputSize = 1 -- each output layer has 1 output
-------------- Model -------------------------
mlp = nn.Sequential()