Skip to content

Instantly share code, notes, and snippets.

@shashankg7
Last active March 31, 2016 08:15
Show Gist options
  • Save shashankg7/aec2303803e7b39b150a9f78cb59db09 to your computer and use it in GitHub Desktop.
Save shashankg7/aec2303803e7b39b150a9f78cb59db09 to your computer and use it in GitHub Desktop.
Word Embedding models (only theano code, for reference)
import numpy as np
import theano
from theano import tensor as T
rng = np.random
class Autoencoder(object):
def __init__(self, maxnum, reduced_dims, learnrate=0.4):
self.threshold = 1e-2
# Input variable (equivalent to dummyword in original implementation)
self.inputs = theano.shared(np.zeros((maxnum, 1), dtype=np.float32))
self.W1 = theano.shared((rng.randn(reduced_dims, maxnum)*0.1)
.astype(theano.config.floatX), name='W1')
self.W2 = theano.shared((rng.randn(maxnum, reduced_dims)*0.1).astype
(theano.config.floatX), name='W2')
self.output = T.dot(self.W1, self.inputs)
self.recons = T.dot(self.W2, self.output)
self.totloss = T.sum((self.inputs - self.recons)**2)
self.W1_grad = T.clip(T.grad(self.totloss, self.W1),
-1*self.threshold, self.threshold)
self.W2_grad = T.clip(T.grad(self.totloss, self.W2),
-1*self.threshold, self.threshold)
self.updates = [(self.W1, self.W1 - learnrate * self.W1_grad),
(self.W2, self.W2 - learnrate * self.W2_grad)]
self.train = theano.function([], self.totloss, updates=self.updates,
allow_input_downcast=True)
def trainonone(self, wordvec):
wordvec = np.array(wordvec, dtype=np.float32)
self.inputs.set_value(wordvec)
# Gradients w.r.t paramters with values clipped in range (-1*threshold,
# threshold)
self.loss = self.train()
print "Loss incurred : ", self.loss
def getoutput(self, wordvec):
# Returns the embedding given a word (calculate output W1*input
self.inputs.set_value(wordvec)
genembedding = self.output.eval()
return genembedding
'''
Defines glove model and performs one mini-batch SGD update in theano.
'''
from theano import tensor as T
import theano
import numpy as np
class glove(object):
def __init__(self, vocab_size, dim, lr=0.05):
W = np.asarray(np.random.rand(vocab_size, dim),
dtype=theano.config.floatX) / float(dim)
W1 = np.asarray(np.random.rand(vocab_size, dim),
dtype=theano.config.floatX) / float(dim)
self.W = theano.shared(W, name='W', borrow=True)
self.W1 = theano.shared(W1, name='W1', borrow=True)
gW = np.asarray(np.ones((vocab_size, dim)), dtype=theano.config.floatX)
gW1 = np.asarray(np.ones((vocab_size, dim)), dtype=theano.config.floatX)
self.gW = theano.shared(gW, name='gW', borrow=True)
self.gW1 = theano.shared(gW1, name='gW1', borrow=True)
X = T.vector()
fX = T.vector()
ind_W = T.ivector()
ind_W1 = T.ivector()
w = self.W[ind_W, :]
w1 = self.W1[ind_W1, :]
cost = T.sum(fX * ((T.sum(w * w1, axis=1) - X) ** 2))
grad = T.clip(T.grad(cost, [w, w1]), -5.0, 5.0)
updates1 = [(self.gW, T.inc_subtensor(self.gW[ind_W, :],
grad[0] ** 2))]
updates2 = [(self.gW1, T.inc_subtensor(self.gW1[ind_W1, :],
grad[1] ** 2))]
updates3 = [(self.W, T.inc_subtensor(self.W[ind_W, :],
- (lr / T.sqrt(self.gW[ind_W, :])) *
grad[0]))]
updates4 = [(self.W1, T.inc_subtensor(self.W1[ind_W1, :],
- (lr / T.sqrt(self.gW1[ind_W1, :])) *
grad[1]))]
updates = updates1 + updates2 + updates3 + updates4
self.cost_fn = theano.function(inputs=[ind_W, ind_W1, X, fX],outputs=cost, updates=updates)
def sgd(self,indw, indw1, X, fX):
'''
Performs one iteration of SGD.
'''
return self.cost_fn(indw, indw1, X, fX)
def save_params(self):
'''
Saves the word embedding lookup matrix to file.
'''
W = self.W.get_value() + self.W1.get_value()
np.save('lookup', W)
nnz = coocur.coocur_mat.nonzero()
model = glove(vocab_size, self.dim)
# nnz has i,j indices of non-zero entries
nz = np.zeros((nnz[0].shape[0], 2))
nz[:, 0] = nnz[0]
nz[:, 1] = nnz[1]
np.random.shuffle(nz)
print "Starting training, brace yourself"
for epoch in xrange(self.n_epochs):
for i in xrange(0, nnz[0].shape[0], self.minibatch_size):
indw = np.asarray(nz[i:(i+self.minibatch_size), 0], dtype=np.int32)
indw1 = np.asarray(nz[i:(i+self.minibatch_size), 1], dtype=np.int32)
batch_size = indw.shape[0]
X = np.asarray(coocur.coocur_mat[indw,
indw1].todense(), dtype=theano.config.floatX).reshape(batch_size,)
fX = np.zeros_like(X)
for i in xrange(0, X.shape[0]):
if X[i] > 100:
fX[i] = (X[i] / 100) ** 0.75
else:
fX[i] = 1.
X = np.log(X)
cost = model.sgd(indw, indw1, X, fX)
print "Cost in epoch %d is %f" %(epoch, cost)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment