Skip to content

Instantly share code, notes, and snippets.

@suryabhupa
Created July 30, 2016 00:47
Show Gist options
  • Save suryabhupa/47a58d0b9ec544664a18ef63b6f70a21 to your computer and use it in GitHub Desktop.
Save suryabhupa/47a58d0b9ec544664a18ef63b6f70a21 to your computer and use it in GitHub Desktop.
import numpy as np
import theano
import theano.tensor as T
import lasagne
from theano import printing
np.random.seed(1234)
class TTLayer(lasagne.layers.Layer):
"""
Parameters
----------
References
----------
.. [1] Tensorizing Neural Networks
Alexander Novikov, Dmitry Podoprikhin, Anton Osokin, Dmitry Vetrov
In Advances in Neural Information Processing Systems 28 (NIPS-2015)
Notes
-----
Examples
--------
"""
def __init__(self, incoming, tt_input_shape, tt_output_shape, tt_ranks,
cores=lasagne.init.Normal(0.01), b=lasagne.init.Constant(0.),
nonlinearity=lasagne.nonlinearities.rectify, **kwargs):
super(TTLayer, self).__init__(incoming, **kwargs)
self.nonlinearity = (nonlinearities.identity if nonlinearity is None
else nonlinearity)
np.random.seed(1234)
num_inputs = int(np.prod(self.input_shape[1:]))
tt_input_shape = np.array(tt_input_shape)
tt_output_shape = np.array(tt_output_shape)
tt_ranks = np.array(tt_ranks)
if np.prod(tt_input_shape) != num_inputs:
raise ValueError("The size of the input tensor (i.e. product "
"of the elements in tt_input_shape) should "
"equal to the number of input neurons %d." %
(num_inputs))
if tt_input_shape.shape[0] != tt_output_shape.shape[0]:
raise ValueError("The number of input and output dimensions "
"should be the same.")
if tt_ranks.shape[0] != tt_output_shape.shape[0] + 1:
raise ValueError("The number of the TT-ranks should be "
"1 + the number of the dimensions.")
self.tt_input_shape = tt_input_shape
self.tt_output_shape = tt_output_shape
self.tt_ranks = tt_ranks
self.nonlinearity = nonlinearity
self.num_dim = tt_input_shape.shape[0]
# local_cores_arr = _generate_orthogonal_tt_cores(tt_input_shape,
# tt_output_shape,
# tt_ranks)
size = 256
W = np.eye(size)
W = np.random.rand(size, size)
local_cores_arr = matrix_svd(W.copy(), tt_input_shape, tt_output_shape, tt_ranks)
print('local_cores_arr.size', local_cores_arr.size)
self.cores_arr = self.add_param(local_cores_arr, local_cores_arr.shape,
name='cores_arr')
if b is None:
self.b = None
else:
num_units = np.prod(tt_output_shape)
self.b = self.add_param(b, (num_units,), name="b",
regularizable=False)
def get_output_for(self, input, **kwargs):
np.random.seed(1234)
# theano.scan doesn't work when intermediate results' shape changes over
# iterations (see https://github.com/Theano/Theano/issues/2127),
# so we are using `for loop` instead.
res = input
# TODO: it maybe faster to precompute the indices in advance.
core_arr_idx = 0
print("self.cores_arr", self.cores_arr)
print("input", input)
for k in range(self.num_dim - 1, -1, -1):
# res is of size o_k+1 x ... x o_d x batch_size x i_1 x ... x i_k-1 x i_k x r_k+1
# print('self.cores_arr', self.cores_arr)
# print('self.num_dim', self.num_dim)
curr_shape = (self.tt_input_shape[k] * self.tt_ranks[k + 1], self.tt_ranks[k] * self.tt_output_shape[k])
# print('curr_shape', curr_shape)
curr_core = self.cores_arr[core_arr_idx:core_arr_idx+T.prod(curr_shape)]
# print('curr_coreBEFORE', curr_core)
curr_core = curr_core.reshape(curr_shape)
# print('curr_coreAFTER', curr_core)
res = T.dot(res.reshape((-1, curr_shape[0])), curr_core)
# print('res', res)
# res is of size o_k+1 x ... x o_d x batch_size x i_1 x ... x i_k-1 x r_k x o_k
res = T.transpose(res.reshape((-1, self.tt_output_shape[k])))
# res is of size o_k x o_k+1 x ... x o_d x batch_size x i_1 x ... x i_k-1 x r_k
core_arr_idx += T.prod(curr_shape)
# res is of size o_1 x ... x o_d x batch_size
res = T.transpose(res.reshape((-1, input.shape[0])))
# res is of size batch_size x o_1 x ... x o_d
if self.b is not None:
res = res + self.b.dimshuffle('x', 0)
return self.nonlinearity(res)
def get_output_shape_for(self, input_shape):
return (input_shape[0], np.prod(self.tt_output_shape))
def _generate_orthogonal_tt_cores(input_shape, output_shape, ranks):
# Generate random orthogonalized tt-tensor.
np.random.seed(1234)
input_shape = np.array(input_shape)
output_shape = np.array(output_shape)
ranks = np.array(ranks)
cores_arr_len = np.sum(input_shape * output_shape *
ranks[1:] * ranks[:-1])
cores_arr = lasagne.utils.floatX(np.zeros(cores_arr_len))
cores_arr_idx = 0
core_list = []
rv = 1
print('self.input_shape', input_shape)
print('self.ranks', ranks)
print('self.output_shape', output_shape)
for k in range(input_shape.shape[0]):
shape = [ranks[k], input_shape[k], output_shape[k], ranks[k+1]]
tall_shape = (np.prod(shape[:3]), shape[3])
print('shape[0]', shape[0])
print('np.prod(shape[1:])', np.prod(shape[1:]))
curr_core = np.dot(rv, lasagne.random.get_rng().normal(0, 1, size=(shape[0], np.prod(shape[1:]))))
curr_core = curr_core.reshape(tall_shape)
if k < input_shape.shape[0]-1:
curr_core, rv = np.linalg.qr(curr_core)
cores_arr[cores_arr_idx:cores_arr_idx+curr_core.size] = curr_core.flatten()
cores_arr_idx += curr_core.size
# TODO: use something reasonable instead of this dirty hack.
glarot_style = (np.prod(input_shape) * np.prod(ranks))**(1.0 / input_shape.shape[0])
# print('cores_arr', cores_arr)
# print('cores_arr.dim', len(cores_arr))
# print('cores_arr.shape', cores_arr.shape)
return (0.1 / glarot_style) * lasagne.utils.floatX(cores_arr)
def matrix_svd(X, left_modes, right_modes, ranks):
""" TT-SVD for matrix
Args:
X: input matrix, numpy array float32
left_modes: tt-left-modes, numpy array int32
right_modes: tt-right-modes, numpy array int32
ranks: tt-ranks, numpy array int32
Returns:
core: tt-cores array, numpy 1D array float32
"""
X = np.array(X)
left_modes = np.array(left_modes)
right_modes = np.array(right_modes)
ranks = np.array(ranks)
c = X.copy()
d = left_modes.size
c = np.reshape(c, np.concatenate((left_modes, right_modes)))
order = np.repeat(np.arange(0, d), 2) + np.tile([0, d], d)
c = np.transpose(c, axes=order)
c = np.reshape(c, left_modes * right_modes)
return svd(c, left_modes * right_modes, ranks)
def svd(X, modes, ranks):
""" TT-SVD
Args:
X: input array, numpy array float32
modes: tt-modes, numpy array int32
ranks: tt-ranks, numpy array int32
Returns:
core: tt-cores array, numpy 1D array float32
"""
c = X.copy()
d = modes.size
core = np.zeros(np.sum(ranks[:-1] * modes * ranks[1:]), dtype='float32')
pos = 0
for i in range(0, d - 1):
m = ranks[i] * modes[i]
c = np.reshape(c, [m, -1])
u, s, v = np.linalg.svd(c, full_matrices=False)
print("u_svd", u)
# u, r = np.linalg.qr(u)
# print("u_qr", u)
# print("u.shape", u.shape)
u = u[:, 0:ranks[i + 1]]
# print("u.shapeAFTER", u.shape)
s = s[0:ranks[i + 1]]
v = v[0:ranks[i + 1], :]
# print("u.size():", u.size)
# print("size():", ranks[i] * modes[i] * ranks[i + 1])
# print("u.ravel():", u.ravel())
core[pos:pos + ranks[i] * modes[i] * ranks[i + 1]] = u.ravel()
pos += ranks[i] * modes[i] * ranks[i + 1]
c = np.dot(np.diag(s), v)
# print("c.ravel():", c.ravel())
core[pos:pos + ranks[d - 1] * modes[d - 1] * ranks[d]] = c.ravel()
return core
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment