sherjilozair · December 14, 2014 10:51
diff --git a/gistfile1.py b/gistfile1.py
 import theano
 import numpy
 from theano import tensor as T

 from PIL import Image

 import os
 import sys
 import gzip
 import cPickle
 import operator as op

 sigmoid = T.nnet.sigmoid
 tanh = T.tanh
 linear = lambda x: x
 relu = lambda x: T.maximum(x, 0)
 softmax = T.nnet.softmax

 class Layer:
    def __init__(self, n_in, n_out, act):
        self.act = act
        self.W = self.init_weight(n_in, n_out, act)
        self.b = self.init_bias(n_out)
        self.params = [self.W, self.b]

    def init_weight(self, n_in, n_out, act):
        a = numpy.sqrt(6. / (n_in + n_out))
        return theano.shared(numpy.random.uniform(size=(n_in, n_out), low=-a, high=a))

    def init_bias(self, n_out):
        return theano.shared(numpy.zeros(n_out,))

    def __call__(self, inp):
        return self.act(T.dot(inp, self.W) + self.b)

 class MLP:
    def __init__(self, n_in, n_out, hls, acts):
        self.layers = [Layer(*args) for args in zip([n_in]+hls, hls+[n_out], acts)]
        self.params = reduce(op.add, map(lambda l: l.params, self.layers))

    def __call__(self, inp):
        return reduce(lambda x, fn: fn(x), self.layers, inp)

 class RNN:
    def __init__(self, num_words, dim_embeddings, dimH):
        a = numpy.sqrt(6. / (num_words + dim_embeddings))
        init_embeddings = numpy.random.uniform(size=(num_words, dim_embeddings), low=-a, high=a)
        self.embeddings = theano.shared(name='embeddings', value=init_embeddings)
        self.H0 = theano.shared(name='h0', value=numpy.zeros(dimH,))
        self.idxs = T.icol()

        self.X = self.embeddings[self.idxs].reshape((self.idxs.shape[0], dim_embeddings))

        self.H2Z = MLP(dimH, dimH, [], [linear])
        self.x2Z = MLP(dim_embeddings, dimH, [], [linear])
        self.Z2H = MLP(dimH, dimH, [50,], [relu, linear])
        self.H2P = MLP(dimH, num_words, [50], [relu, softmax])        
        
        def fn(x, h):
            h = self.Z2H(self.x2Z(x) + self.H2Z(self.H0))
            s = self.H2P(h)
            return [h, s]
        [self.H, self.S], _ = theano.scan(fn=fn, sequences=self.X, outputs_info=[self.H0, None], n_steps=self.X.shape[0])
        self.cost = T.mean(T.nnet.categorical_crossentropy(self.S[:,0,:], self.idxs))
        #self.cost = T.mean(T.choose(self.idxs, -T.log(self.S[:,0,:])))
        self.params = [self.embeddings] + self.H2Z.params + self.x2Z.params + self.Z2H.params + self.H2P.params
        self.grads = T.grad(self.cost, self.params)
        self.lr = T.scalar()
        self.updates = map(lambda (param, grad): (param, param - self.lr * grad), zip(self.params, self.grads))
        self.train_fn = theano.function([self.idxs, self.lr], [self.cost], updates=self.updates, allow_input_downcast=True)
        self.f = theano.function([self.idxs], [self.H, self.S, self.cost], allow_input_downcast=True)

 rnn = RNN(10, 10, 20)

 idx = numpy.array([[1, 2]]).T

 for i in xrange(1000):
    rnn.train_fn(idx, 0.1)

 H, S, C = rnn.f(idx)

 print S[0, 0, :]
	import theano
	import numpy
	from theano import tensor as T

	from PIL import Image

	import os
	import sys
	import gzip
	import cPickle
	import operator as op

	sigmoid = T.nnet.sigmoid
	tanh = T.tanh
	linear = lambda x: x
	relu = lambda x: T.maximum(x, 0)
	softmax = T.nnet.softmax

	class Layer:
	def __init__(self, n_in, n_out, act):
	self.act = act
	self.W = self.init_weight(n_in, n_out, act)
	self.b = self.init_bias(n_out)
	self.params = [self.W, self.b]

	def init_weight(self, n_in, n_out, act):
	a = numpy.sqrt(6. / (n_in + n_out))
	return theano.shared(numpy.random.uniform(size=(n_in, n_out), low=-a, high=a))

	def init_bias(self, n_out):
	return theano.shared(numpy.zeros(n_out,))

	def __call__(self, inp):
	return self.act(T.dot(inp, self.W) + self.b)

	class MLP:
	def __init__(self, n_in, n_out, hls, acts):
	self.layers = [Layer(*args) for args in zip([n_in]+hls, hls+[n_out], acts)]
	self.params = reduce(op.add, map(lambda l: l.params, self.layers))

	def __call__(self, inp):
	return reduce(lambda x, fn: fn(x), self.layers, inp)

	class RNN:
	def __init__(self, num_words, dim_embeddings, dimH):
	a = numpy.sqrt(6. / (num_words + dim_embeddings))
	init_embeddings = numpy.random.uniform(size=(num_words, dim_embeddings), low=-a, high=a)
	self.embeddings = theano.shared(name='embeddings', value=init_embeddings)
	self.H0 = theano.shared(name='h0', value=numpy.zeros(dimH,))
	self.idxs = T.icol()

	self.X = self.embeddings[self.idxs].reshape((self.idxs.shape[0], dim_embeddings))

	self.H2Z = MLP(dimH, dimH, [], [linear])
	self.x2Z = MLP(dim_embeddings, dimH, [], [linear])
	self.Z2H = MLP(dimH, dimH, [50,], [relu, linear])
	self.H2P = MLP(dimH, num_words, [50], [relu, softmax])

	def fn(x, h):
	h = self.Z2H(self.x2Z(x) + self.H2Z(self.H0))
	s = self.H2P(h)
	return [h, s]
	[self.H, self.S], _ = theano.scan(fn=fn, sequences=self.X, outputs_info=[self.H0, None], n_steps=self.X.shape[0])
	self.cost = T.mean(T.nnet.categorical_crossentropy(self.S[:,0,:], self.idxs))
	#self.cost = T.mean(T.choose(self.idxs, -T.log(self.S[:,0,:])))
	self.params = [self.embeddings] + self.H2Z.params + self.x2Z.params + self.Z2H.params + self.H2P.params
	self.grads = T.grad(self.cost, self.params)
	self.lr = T.scalar()
	self.updates = map(lambda (param, grad): (param, param - self.lr * grad), zip(self.params, self.grads))
	self.train_fn = theano.function([self.idxs, self.lr], [self.cost], updates=self.updates, allow_input_downcast=True)
	self.f = theano.function([self.idxs], [self.H, self.S, self.cost], allow_input_downcast=True)

	rnn = RNN(10, 10, 20)

	idx = numpy.array([[1, 2]]).T

	for i in xrange(1000):
	rnn.train_fn(idx, 0.1)

	H, S, C = rnn.f(idx)

	print S[0, 0, :]