Created
July 30, 2016 00:47
-
-
Save suryabhupa/47a58d0b9ec544664a18ef63b6f70a21 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import theano | |
import theano.tensor as T | |
import lasagne | |
from theano import printing | |
np.random.seed(1234) | |
class TTLayer(lasagne.layers.Layer): | |
""" | |
Parameters | |
---------- | |
References | |
---------- | |
.. [1] Tensorizing Neural Networks | |
Alexander Novikov, Dmitry Podoprikhin, Anton Osokin, Dmitry Vetrov | |
In Advances in Neural Information Processing Systems 28 (NIPS-2015) | |
Notes | |
----- | |
Examples | |
-------- | |
""" | |
def __init__(self, incoming, tt_input_shape, tt_output_shape, tt_ranks, | |
cores=lasagne.init.Normal(0.01), b=lasagne.init.Constant(0.), | |
nonlinearity=lasagne.nonlinearities.rectify, **kwargs): | |
super(TTLayer, self).__init__(incoming, **kwargs) | |
self.nonlinearity = (nonlinearities.identity if nonlinearity is None | |
else nonlinearity) | |
np.random.seed(1234) | |
num_inputs = int(np.prod(self.input_shape[1:])) | |
tt_input_shape = np.array(tt_input_shape) | |
tt_output_shape = np.array(tt_output_shape) | |
tt_ranks = np.array(tt_ranks) | |
if np.prod(tt_input_shape) != num_inputs: | |
raise ValueError("The size of the input tensor (i.e. product " | |
"of the elements in tt_input_shape) should " | |
"equal to the number of input neurons %d." % | |
(num_inputs)) | |
if tt_input_shape.shape[0] != tt_output_shape.shape[0]: | |
raise ValueError("The number of input and output dimensions " | |
"should be the same.") | |
if tt_ranks.shape[0] != tt_output_shape.shape[0] + 1: | |
raise ValueError("The number of the TT-ranks should be " | |
"1 + the number of the dimensions.") | |
self.tt_input_shape = tt_input_shape | |
self.tt_output_shape = tt_output_shape | |
self.tt_ranks = tt_ranks | |
self.nonlinearity = nonlinearity | |
self.num_dim = tt_input_shape.shape[0] | |
# local_cores_arr = _generate_orthogonal_tt_cores(tt_input_shape, | |
# tt_output_shape, | |
# tt_ranks) | |
size = 256 | |
W = np.eye(size) | |
W = np.random.rand(size, size) | |
local_cores_arr = matrix_svd(W.copy(), tt_input_shape, tt_output_shape, tt_ranks) | |
print('local_cores_arr.size', local_cores_arr.size) | |
self.cores_arr = self.add_param(local_cores_arr, local_cores_arr.shape, | |
name='cores_arr') | |
if b is None: | |
self.b = None | |
else: | |
num_units = np.prod(tt_output_shape) | |
self.b = self.add_param(b, (num_units,), name="b", | |
regularizable=False) | |
def get_output_for(self, input, **kwargs): | |
np.random.seed(1234) | |
# theano.scan doesn't work when intermediate results' shape changes over | |
# iterations (see https://github.com/Theano/Theano/issues/2127), | |
# so we are using `for loop` instead. | |
res = input | |
# TODO: it maybe faster to precompute the indices in advance. | |
core_arr_idx = 0 | |
print("self.cores_arr", self.cores_arr) | |
print("input", input) | |
for k in range(self.num_dim - 1, -1, -1): | |
# res is of size o_k+1 x ... x o_d x batch_size x i_1 x ... x i_k-1 x i_k x r_k+1 | |
# print('self.cores_arr', self.cores_arr) | |
# print('self.num_dim', self.num_dim) | |
curr_shape = (self.tt_input_shape[k] * self.tt_ranks[k + 1], self.tt_ranks[k] * self.tt_output_shape[k]) | |
# print('curr_shape', curr_shape) | |
curr_core = self.cores_arr[core_arr_idx:core_arr_idx+T.prod(curr_shape)] | |
# print('curr_coreBEFORE', curr_core) | |
curr_core = curr_core.reshape(curr_shape) | |
# print('curr_coreAFTER', curr_core) | |
res = T.dot(res.reshape((-1, curr_shape[0])), curr_core) | |
# print('res', res) | |
# res is of size o_k+1 x ... x o_d x batch_size x i_1 x ... x i_k-1 x r_k x o_k | |
res = T.transpose(res.reshape((-1, self.tt_output_shape[k]))) | |
# res is of size o_k x o_k+1 x ... x o_d x batch_size x i_1 x ... x i_k-1 x r_k | |
core_arr_idx += T.prod(curr_shape) | |
# res is of size o_1 x ... x o_d x batch_size | |
res = T.transpose(res.reshape((-1, input.shape[0]))) | |
# res is of size batch_size x o_1 x ... x o_d | |
if self.b is not None: | |
res = res + self.b.dimshuffle('x', 0) | |
return self.nonlinearity(res) | |
def get_output_shape_for(self, input_shape): | |
return (input_shape[0], np.prod(self.tt_output_shape)) | |
def _generate_orthogonal_tt_cores(input_shape, output_shape, ranks): | |
# Generate random orthogonalized tt-tensor. | |
np.random.seed(1234) | |
input_shape = np.array(input_shape) | |
output_shape = np.array(output_shape) | |
ranks = np.array(ranks) | |
cores_arr_len = np.sum(input_shape * output_shape * | |
ranks[1:] * ranks[:-1]) | |
cores_arr = lasagne.utils.floatX(np.zeros(cores_arr_len)) | |
cores_arr_idx = 0 | |
core_list = [] | |
rv = 1 | |
print('self.input_shape', input_shape) | |
print('self.ranks', ranks) | |
print('self.output_shape', output_shape) | |
for k in range(input_shape.shape[0]): | |
shape = [ranks[k], input_shape[k], output_shape[k], ranks[k+1]] | |
tall_shape = (np.prod(shape[:3]), shape[3]) | |
print('shape[0]', shape[0]) | |
print('np.prod(shape[1:])', np.prod(shape[1:])) | |
curr_core = np.dot(rv, lasagne.random.get_rng().normal(0, 1, size=(shape[0], np.prod(shape[1:])))) | |
curr_core = curr_core.reshape(tall_shape) | |
if k < input_shape.shape[0]-1: | |
curr_core, rv = np.linalg.qr(curr_core) | |
cores_arr[cores_arr_idx:cores_arr_idx+curr_core.size] = curr_core.flatten() | |
cores_arr_idx += curr_core.size | |
# TODO: use something reasonable instead of this dirty hack. | |
glarot_style = (np.prod(input_shape) * np.prod(ranks))**(1.0 / input_shape.shape[0]) | |
# print('cores_arr', cores_arr) | |
# print('cores_arr.dim', len(cores_arr)) | |
# print('cores_arr.shape', cores_arr.shape) | |
return (0.1 / glarot_style) * lasagne.utils.floatX(cores_arr) | |
def matrix_svd(X, left_modes, right_modes, ranks): | |
""" TT-SVD for matrix | |
Args: | |
X: input matrix, numpy array float32 | |
left_modes: tt-left-modes, numpy array int32 | |
right_modes: tt-right-modes, numpy array int32 | |
ranks: tt-ranks, numpy array int32 | |
Returns: | |
core: tt-cores array, numpy 1D array float32 | |
""" | |
X = np.array(X) | |
left_modes = np.array(left_modes) | |
right_modes = np.array(right_modes) | |
ranks = np.array(ranks) | |
c = X.copy() | |
d = left_modes.size | |
c = np.reshape(c, np.concatenate((left_modes, right_modes))) | |
order = np.repeat(np.arange(0, d), 2) + np.tile([0, d], d) | |
c = np.transpose(c, axes=order) | |
c = np.reshape(c, left_modes * right_modes) | |
return svd(c, left_modes * right_modes, ranks) | |
def svd(X, modes, ranks): | |
""" TT-SVD | |
Args: | |
X: input array, numpy array float32 | |
modes: tt-modes, numpy array int32 | |
ranks: tt-ranks, numpy array int32 | |
Returns: | |
core: tt-cores array, numpy 1D array float32 | |
""" | |
c = X.copy() | |
d = modes.size | |
core = np.zeros(np.sum(ranks[:-1] * modes * ranks[1:]), dtype='float32') | |
pos = 0 | |
for i in range(0, d - 1): | |
m = ranks[i] * modes[i] | |
c = np.reshape(c, [m, -1]) | |
u, s, v = np.linalg.svd(c, full_matrices=False) | |
print("u_svd", u) | |
# u, r = np.linalg.qr(u) | |
# print("u_qr", u) | |
# print("u.shape", u.shape) | |
u = u[:, 0:ranks[i + 1]] | |
# print("u.shapeAFTER", u.shape) | |
s = s[0:ranks[i + 1]] | |
v = v[0:ranks[i + 1], :] | |
# print("u.size():", u.size) | |
# print("size():", ranks[i] * modes[i] * ranks[i + 1]) | |
# print("u.ravel():", u.ravel()) | |
core[pos:pos + ranks[i] * modes[i] * ranks[i + 1]] = u.ravel() | |
pos += ranks[i] * modes[i] * ranks[i + 1] | |
c = np.dot(np.diag(s), v) | |
# print("c.ravel():", c.ravel()) | |
core[pos:pos + ranks[d - 1] * modes[d - 1] * ranks[d]] = c.ravel() | |
return core |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment