Skip to content

Instantly share code, notes, and snippets.

@sherjilozair
Created December 14, 2014 10:51
Show Gist options
  • Save sherjilozair/5160b30a8d6de2e81413 to your computer and use it in GitHub Desktop.
Save sherjilozair/5160b30a8d6de2e81413 to your computer and use it in GitHub Desktop.
import theano
import numpy
from theano import tensor as T
from PIL import Image
import os
import sys
import gzip
import cPickle
import operator as op
sigmoid = T.nnet.sigmoid
tanh = T.tanh
linear = lambda x: x
relu = lambda x: T.maximum(x, 0)
softmax = T.nnet.softmax
class Layer:
def __init__(self, n_in, n_out, act):
self.act = act
self.W = self.init_weight(n_in, n_out, act)
self.b = self.init_bias(n_out)
self.params = [self.W, self.b]
def init_weight(self, n_in, n_out, act):
a = numpy.sqrt(6. / (n_in + n_out))
return theano.shared(numpy.random.uniform(size=(n_in, n_out), low=-a, high=a))
def init_bias(self, n_out):
return theano.shared(numpy.zeros(n_out,))
def __call__(self, inp):
return self.act(T.dot(inp, self.W) + self.b)
class MLP:
def __init__(self, n_in, n_out, hls, acts):
self.layers = [Layer(*args) for args in zip([n_in]+hls, hls+[n_out], acts)]
self.params = reduce(op.add, map(lambda l: l.params, self.layers))
def __call__(self, inp):
return reduce(lambda x, fn: fn(x), self.layers, inp)
class RNN:
def __init__(self, num_words, dim_embeddings, dimH):
a = numpy.sqrt(6. / (num_words + dim_embeddings))
init_embeddings = numpy.random.uniform(size=(num_words, dim_embeddings), low=-a, high=a)
self.embeddings = theano.shared(name='embeddings', value=init_embeddings)
self.H0 = theano.shared(name='h0', value=numpy.zeros(dimH,))
self.idxs = T.icol()
self.X = self.embeddings[self.idxs].reshape((self.idxs.shape[0], dim_embeddings))
self.H2Z = MLP(dimH, dimH, [], [linear])
self.x2Z = MLP(dim_embeddings, dimH, [], [linear])
self.Z2H = MLP(dimH, dimH, [50,], [relu, linear])
self.H2P = MLP(dimH, num_words, [50], [relu, softmax])
def fn(x, h):
h = self.Z2H(self.x2Z(x) + self.H2Z(self.H0))
s = self.H2P(h)
return [h, s]
[self.H, self.S], _ = theano.scan(fn=fn, sequences=self.X, outputs_info=[self.H0, None], n_steps=self.X.shape[0])
self.cost = T.mean(T.nnet.categorical_crossentropy(self.S[:,0,:], self.idxs))
#self.cost = T.mean(T.choose(self.idxs, -T.log(self.S[:,0,:])))
self.params = [self.embeddings] + self.H2Z.params + self.x2Z.params + self.Z2H.params + self.H2P.params
self.grads = T.grad(self.cost, self.params)
self.lr = T.scalar()
self.updates = map(lambda (param, grad): (param, param - self.lr * grad), zip(self.params, self.grads))
self.train_fn = theano.function([self.idxs, self.lr], [self.cost], updates=self.updates, allow_input_downcast=True)
self.f = theano.function([self.idxs], [self.H, self.S, self.cost], allow_input_downcast=True)
rnn = RNN(10, 10, 20)
idx = numpy.array([[1, 2]]).T
for i in xrange(1000):
rnn.train_fn(idx, 0.1)
H, S, C = rnn.f(idx)
print S[0, 0, :]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment