Last active
July 7, 2017 11:21
-
-
Save pligor/00f2629022dbef346b28c33d547ab026 to your computer and use it in GitHub Desktop.
Sequence to Sequence Prediction using Raw RNN versus using native Seq2Seq module of tensorflow
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#These two classes implement the same functionality in two different ways | |
#Not thoroughly tested if they are identical but they produce similar results | |
#Unfortunately the usage of the native seq2seq module does not yield any increase in speed performance | |
#So for now it is only a matter of style which one you will choose | |
################################Seq2Seq with Raw RNN##############################################3 | |
from __future__ import division | |
import numpy as np | |
import tensorflow as tf | |
from cost_functions.huber_loss import huberLoss | |
from data_providers.price_history_seq2seq_data_provider import PriceHistorySeq2SeqDataProvider | |
from mylibs.jupyter_notebook_helper import DynStats, getRunTime | |
# from model_selection.cross_validator import CrossValidator | |
from interfaces.neural_net_model_interface import NeuralNetModelInterface | |
from tensorflow.contrib import rnn | |
from collections import OrderedDict | |
from mylibs.py_helper import merge_dicts | |
from mylibs.tf_helper import tfMSE | |
class PriceHistorySeq2Seq(NeuralNetModelInterface): | |
FEATURE_LEN = 1 | |
TARGET_FEATURE_LEN = 1 | |
def __init__(self, rng, dtype, config): | |
# super(ShiftFunc, self).__init__(random_state=rng, data_provider_class=ProductPairsDataProvider) | |
super(PriceHistorySeq2Seq, self).__init__() | |
self.rng = rng | |
self.dtype = dtype | |
self.config = config | |
self.train_data = None | |
self.valid_data = None | |
self.init = None | |
self.error = None | |
self.inputs = None | |
self.predictions = None | |
self.targets = None | |
self.train_step = None | |
# end of sequence token length is necessary for machine translation models, not sure for our model | |
self.EOS_TOKEN_LEN = 1 | |
class COST_FUNCS(object): | |
HUBER_LOSS = "huberloss" | |
MSE = 'mse' | |
class RNN_CELLS(object): | |
BASIC_RNN = tf.contrib.rnn.BasicRNNCell # "basic_rnn" | |
GRU = tf.contrib.rnn.GRUCell # "gru" | |
def run(self, npz_path, epochs, batch_size, num_units, input_len, target_len, | |
eos_token = PriceHistorySeq2SeqDataProvider.EOS_TOKEN_DEFAULT, | |
preds_gather_enabled=True, | |
cost_func=COST_FUNCS.HUBER_LOSS, rnn_cell=RNN_CELLS.BASIC_RNN): | |
graph = self.getGraph(batch_size=batch_size, verbose=False, num_units=num_units, input_len=input_len, | |
rnn_cell=rnn_cell, target_len=target_len, cost_func=cost_func, eos_token=eos_token) | |
train_data = PriceHistorySeq2SeqDataProvider(npz_path=npz_path, batch_size=batch_size, rng=self.rng, eos_token=eos_token) | |
preds_dp = PriceHistorySeq2SeqDataProvider(npz_path=npz_path, batch_size=batch_size, rng=self.rng, | |
shuffle_order=False, eos_token=eos_token, | |
) if preds_gather_enabled else None | |
stats = self.train_validate(train_data=train_data, valid_data=None, graph=graph, epochs=epochs, | |
preds_gather_enabled=preds_gather_enabled, preds_dp=preds_dp, batch_size=batch_size) | |
return stats | |
def train_validate(self, train_data, valid_data, **kwargs): | |
graph = kwargs['graph'] | |
epochs = kwargs['epochs'] | |
batch_size = kwargs['batch_size'] | |
verbose = kwargs['verbose'] if 'verbose' in kwargs.keys() else True | |
preds_dp = kwargs['preds_dp'] if 'preds_dp' in kwargs.keys() else None | |
preds_gather_enabled = kwargs['preds_gather_enabled'] if 'preds_gather_enabled' in kwargs.keys() else True | |
if verbose: | |
print "epochs: %d" % epochs | |
with tf.Session(graph=graph, config=self.config) as sess: | |
sess.run(self.init) # sess.run(tf.initialize_all_variables()) | |
dynStats = DynStats() | |
for epoch in range(epochs): | |
train_error, runTime = getRunTime( | |
lambda: | |
self.trainEpoch( | |
sess=sess, | |
data_provider=train_data, | |
) | |
) | |
# if (epoch + 1) % 1 == 0: | |
# valid_error = validateEpoch( | |
# inputs=self.inputs, | |
# targets=self.targets, | |
# sess=sess, | |
# valid_data=valid_data, | |
# error=self.error, | |
# extraFeedDict={self.is_training: False}, | |
# # keep_prob_keys=[self.keep_prob_input, self.keep_prob_hidden] | |
# ) | |
if verbose: | |
# print 'EndEpoch%02d(%.3f secs):err(train)=%.4f,acc(train)=%.2f,err(valid)=%.2f,acc(valid)=%.2f, ' % \ | |
# (epoch + 1, runTime, train_error, train_accuracy, valid_error, valid_accuracy) | |
print 'End Epoch %02d (%.3f secs): err(train) = %.4f' % (epoch + 1, runTime, train_error) | |
dynStats.gatherStats(train_error) | |
predictions_dict = self.getPredictions(batch_size=batch_size, data_provider=preds_dp, sess=sess)[ | |
0] if preds_gather_enabled else None | |
if verbose: | |
if preds_gather_enabled: | |
return dynStats, predictions_dict | |
else: | |
return dynStats | |
def getPredictions(self, sess, data_provider, batch_size, extraFeedDict=None): | |
if extraFeedDict is None: | |
extraFeedDict = {} | |
assert data_provider.data_len % batch_size == 0 | |
total_error = 0. | |
instances_order = data_provider.current_order | |
target_len = data_provider.targets.shape[1] | |
all_predictions = np.zeros(shape=(data_provider.data_len, target_len)) | |
for inst_ind, (input_batch, target_batch) in enumerate(data_provider): | |
cur_error, cur_preds = sess.run( | |
[self.error, self.predictions], | |
feed_dict=merge_dicts({self.inputs: input_batch, | |
self.targets: target_batch, | |
}, extraFeedDict)) | |
assert np.all(instances_order == data_provider.current_order) | |
all_predictions[inst_ind * batch_size: (inst_ind + 1) * batch_size, :] = cur_preds[:, :-1] | |
total_error += cur_error | |
total_error /= data_provider.num_batches | |
assert np.all(all_predictions != 0) # all predictions are expected to be something else than absolute zero | |
preds_dict = OrderedDict(zip(instances_order, all_predictions)) | |
return preds_dict, total_error | |
def trainEpoch(self, sess, data_provider, extraFeedDict=None): | |
if extraFeedDict is None: | |
extraFeedDict = {} | |
train_error = 0. | |
num_batches = data_provider.num_batches | |
for step, (input_batch, target_batch) in enumerate(data_provider): | |
feed_dic = merge_dicts({self.inputs: input_batch, | |
self.targets: target_batch, | |
}, extraFeedDict) | |
_, batch_error = sess.run([self.train_step, self.error], feed_dict=feed_dic) | |
train_error += batch_error | |
train_error /= num_batches | |
return train_error | |
def getGraph(self, | |
# TODO in this version we are building it full length and then we are going to improve it (trunc backprop len) | |
batch_size, | |
num_units, | |
input_len, | |
target_len, | |
eos_token = PriceHistorySeq2SeqDataProvider.EOS_TOKEN_DEFAULT, | |
rnn_cell=RNN_CELLS.BASIC_RNN, | |
cost_func=COST_FUNCS.HUBER_LOSS, | |
learningRate=1e-3, # default of Adam is 1e-3 | |
# lamda2=1e-2, | |
verbose=True): | |
# momentum = 0.5 | |
# tf.reset_default_graph() #kind of redundant statement | |
if verbose: | |
# print "lamda2: %f" % lamda2 | |
print "learning rate: %f" % learningRate | |
output_seq_len = target_len + self.EOS_TOKEN_LEN | |
graph = tf.Graph() # create new graph | |
with graph.as_default(): | |
with tf.name_scope('data'): | |
inputs = tf.placeholder(dtype=self.dtype, | |
shape=(batch_size, input_len, self.FEATURE_LEN), name="inputs") | |
targets = tf.placeholder(dtype=self.dtype, shape=(batch_size, output_seq_len), name="targets") | |
# temporary to make it easy for ourselves the first time | |
# decoder_inputs = tf.placeholder(dtype=self.dtype, | |
# shape=(batch_size, output_seq_len, | |
# self.TARGET_FEATURE_LEN)) | |
with tf.name_scope('inputs'): | |
# unpack matrix into 1 dim array | |
inputs_series = tf.unstack(inputs, axis=1) | |
if verbose: | |
print len(inputs_series) | |
print inputs_series[0] # shape: (5,) | |
with tf.name_scope('encoder_rnn_layer'): | |
# don't really care for encoder outputs, but only for its final state | |
# the encoder consumes all the input to get a sense of the trend of price history | |
_, encoder_final_state = rnn.static_rnn(cell=rnn_cell(num_units=num_units), | |
inputs=inputs_series, | |
initial_state=None, | |
# TODO when using trunc backprop this should not be zero | |
dtype=self.dtype) | |
if verbose: | |
print encoder_final_state | |
# BAD idea | |
# decoder_outputs = [] | |
# inout = [eos_token] | |
# for ii in range(output_seq_len): | |
# if verbose: | |
# print "rnn: {}".format(ii) | |
# with tf.variable_scope('decoder_rnn_layer_{}'.format(ii)): | |
# # note that we use the same number of units for decoder here | |
# inout, _ = rnn.static_rnn(cell=rnn_cell(num_units=num_units), | |
# inputs=inout, | |
# initial_state=encoder_final_state, | |
# dtype=self.dtype) | |
# decoder_outputs += inout | |
with tf.variable_scope('decoder_rnn_layer'): | |
eos_token_tensor = tf.constant(np.ones(shape=(batch_size, 1)) * eos_token, | |
dtype=tf.float32, name='eos_token') | |
WW = tf.Variable(self.rng.randn(num_units, self.TARGET_FEATURE_LEN), dtype=self.dtype, name='weights') | |
bb = tf.Variable(np.zeros(self.TARGET_FEATURE_LEN), dtype=self.dtype, name='bias') | |
decoder_output_tensor_array, _, _ = tf.nn.raw_rnn(cell=rnn_cell(num_units=num_units), | |
loop_fn=self.get_loop_fn( | |
encoder_final_state=encoder_final_state, | |
eos_token=eos_token_tensor, batch_size=batch_size, | |
WW=WW, bb=bb, static_seq_len=output_seq_len, | |
)) | |
# del decoder_final_state, decoder_final_loop_state #not interesting | |
if verbose: | |
# print len(decoder_outputs) | |
# print decoder_outputs[0] | |
print decoder_output_tensor_array | |
# https://www.tensorflow.org/api_docs/python/tf/TensorArray | |
decoder_out_tensor = decoder_output_tensor_array.stack(name='decoder_out_tensor') | |
if verbose: | |
print decoder_out_tensor | |
with tf.name_scope('readout_layer'): | |
# just grab the dimensions | |
# decoder_max_steps, decoder_batch_size, decoder_dim = tf.unstack(tf.shape(decoder_out_tensor)) | |
decoder_dim = decoder_out_tensor.get_shape()[-1].value # last dimension's value | |
decoder_outputs_flat = tf.reshape(decoder_out_tensor, (-1, decoder_dim)) | |
# TODO note that we are using exactly the same that we used above inside RNN cell (not sure if this is the best) | |
readouts = tf.add(tf.matmul(decoder_outputs_flat, WW), bb, "readouts") | |
if verbose: | |
print readouts | |
with tf.name_scope('predictions'): | |
# in purpose batch size goes last and output_seq_len goes second (we omit the target feature len because it is 1) | |
# because raw rnn stack things on top of each other and puts sequence length first | |
predictions_transposed = tf.reshape(readouts, shape=(output_seq_len, batch_size)) | |
predictions = tf.transpose(predictions_transposed) | |
# decoder_logits = tf.reshape(decoder_logits_flat, (decoder_max_steps, decoder_batch_size, vocab_size)) | |
# https://github.com/ematvey/tensorflow-seq2seq-tutorials/blob/master/2-seq2seq-advanced.ipynb | |
if verbose: | |
print predictions_transposed | |
print predictions | |
with tf.name_scope('error'): | |
if cost_func == self.COST_FUNCS.HUBER_LOSS: | |
losses = huberLoss(y_true=targets, y_pred=predictions) # both have shape: (batch_size, target_len) | |
elif cost_func == self.COST_FUNCS.MSE: | |
losses = tfMSE(outputs=predictions, targets=targets) | |
else: | |
raise Exception("invalid or non supported cost function") | |
if verbose: | |
print losses | |
# fix error to exclude the EOS from the error calculation | |
mask = np.ones(shape=losses.get_shape()) | |
mask[:, -1:] = 0 | |
losses = losses * tf.constant(mask, dtype=tf.float32) | |
error = tf.reduce_mean(losses) | |
if verbose: | |
print error | |
with tf.name_scope('training_step'): | |
train_step = tf.train.AdamOptimizer(learning_rate=learningRate).minimize(error) | |
init = tf.global_variables_initializer() | |
self.init = init | |
self.inputs = inputs | |
self.targets = targets | |
self.error = error | |
# self.is_training = is_training | |
self.train_step = train_step | |
self.predictions = predictions | |
# self.keep_prob_input = keep_prob_input | |
# self.keep_prob_hidden = keep_prob_hidden | |
return graph | |
def get_loop_fn(self, encoder_final_state, eos_token, batch_size, WW, bb, static_seq_len): | |
def loop_fn(time, previous_output, previous_state, previous_loop_state): | |
# inputs: time, previous_cell_output, previous_cell_state, previous_loop_state | |
# outputs: elements_finished, input, cell_state, output, loop_state | |
if previous_state is None: # time == 0 | |
assert previous_output is None | |
return self.loop_fn_initial(encoder_final_state=encoder_final_state, eos_token=eos_token, | |
batch_size=batch_size) | |
else: | |
return self.loop_fn_transition(time=time, previous_cell_output=previous_output, | |
previous_cell_state=previous_state, | |
batch_size=batch_size, WW=WW, bb=bb, static_seq_len=static_seq_len) | |
return loop_fn | |
def loop_fn_initial(self, encoder_final_state, eos_token, batch_size): | |
# https://www.tensorflow.org/api_docs/python/tf/nn/raw_rnn | |
# here we have a static rnn case so all length so be taken into account, | |
# so I guess they are all False always | |
# From documentation: a boolean Tensor of shape [batch_size] | |
initial_elements_finished = self.all_elems_non_finished(batch_size=batch_size) | |
initial_input = eos_token | |
initial_cell_state = encoder_final_state | |
initial_cell_output = None | |
# give it the shape that we want but how exactly ???: | |
# initial_cell_output = tf.Variable(np.zeros(shape=(batch_size, self.TARGET_FEATURE_LEN)), dtype=tf.float32) | |
initial_loop_state = None # we don't need to pass any additional information | |
return (initial_elements_finished, | |
initial_input, | |
initial_cell_state, | |
initial_cell_output, | |
initial_loop_state) | |
def loop_fn_transition(self, time, previous_cell_output, previous_cell_state, batch_size, WW, bb, static_seq_len): | |
"""note that the matrix W is going to be shared among outputs""" | |
# print "previous cell output!" | |
# print previous_cell_output | |
print "time: {}".format(time) | |
# finished = self.all_elems_finished(batch_size=batch_size, | |
# finished=time - 1 >= static_seq_len) # (time >= decoder_lengths) | |
# finished = self.all_elems_finished(batch_size=batch_size, | |
# finished=time - 1 >= static_seq_len) # (time >= decoder_lengths) | |
finished = time >= self.get_seq_len_tensor(batch_size=batch_size, static_seq_len=static_seq_len) | |
# this operation produces boolean tensor of [batch_size] defining if corresponding sequence has ended | |
# this is always false in our case so just comment next two lines | |
# finished = tf.reduce_all(elements_finished) # -> boolean scalar | |
# input = tf.cond(finished, lambda: pad_step_embedded, get_next_input) | |
next_input = tf.add(tf.matmul(previous_cell_output, WW), bb) | |
# print "next input!" | |
# print next_input | |
next_cell_state = previous_cell_state | |
# emit_output = tf.identity(next_input), | |
emit_output = previous_cell_output | |
next_loop_state = None # we don't need to pass any additional information | |
return (finished, | |
next_input, | |
next_cell_state, | |
emit_output, | |
next_loop_state) | |
@staticmethod | |
def get_seq_len_tensor(batch_size, static_seq_len): | |
return tf.constant(np.full(shape=(batch_size,), fill_value=static_seq_len), dtype=tf.int32) | |
@staticmethod | |
def all_elems_non_finished(batch_size, finished=False): | |
# print "finished" | |
# print finished | |
# return tf.constant(np.repeat(finished, batch_size), dtype=tf.bool) # (0 >= decoder_lengths) | |
return tf.constant(np.full(shape=(batch_size,), fill_value=finished, dtype=np.bool), | |
dtype=tf.bool) # (0 >= decoder_lengths) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#These two classes implement the same functionality in two different ways | |
#Not thoroughly tested if they are identical but they produce similar results | |
#Unfortunately the usage of the native seq2seq module does not yield any increase in speed performance | |
#So for now it is only a matter of style which one you will choose | |
################################Seq2Seq Native############################################## | |
from __future__ import division | |
import numpy as np | |
import tensorflow as tf | |
from cost_functions.huber_loss import huberLoss | |
from data_providers.price_history_seq2seq_data_provider import PriceHistorySeq2SeqDataProvider | |
from mylibs.jupyter_notebook_helper import DynStats, getRunTime | |
# from model_selection.cross_validator import CrossValidator | |
from interfaces.neural_net_model_interface import NeuralNetModelInterface | |
from tensorflow.contrib import rnn | |
from collections import OrderedDict | |
from mylibs.py_helper import merge_dicts | |
from mylibs.tf_helper import tfMSE | |
import tensorflow.contrib.seq2seq as seq2seq | |
class PriceHistorySeq2SeqNative(NeuralNetModelInterface): | |
FEATURE_LEN = 1 | |
TARGET_FEATURE_LEN = 1 | |
def __init__(self, rng, dtype, config, debug=False): | |
# super(ShiftFunc, self).__init__(random_state=rng, data_provider_class=ProductPairsDataProvider) | |
super(PriceHistorySeq2SeqNative, self).__init__() | |
# with bidirectional encoder, decoder state size should be | |
# 2x encoder state size | |
self.rng = rng | |
self.dtype = dtype | |
self.config = config | |
self.train_data = None | |
self.valid_data = None | |
self.init = None | |
self.error = None | |
self.inputs = None | |
self.predictions = None | |
self.targets = None | |
self.train_step = None | |
self.debug = debug | |
# end of sequence token length is necessary for machine translation models, not sure for our model | |
self.EOS_TOKEN_LEN = 1 | |
class COST_FUNCS(object): | |
HUBER_LOSS = "huberloss" | |
MSE = 'mse' | |
class RNN_CELLS(object): | |
BASIC_RNN = tf.contrib.rnn.BasicRNNCell # "basic_rnn" | |
GRU = tf.contrib.rnn.GRUCell # "gru" | |
def run(self, npz_path, epochs, batch_size, num_units, input_len, target_len, | |
eos_token=PriceHistorySeq2SeqDataProvider.EOS_TOKEN_DEFAULT, | |
preds_gather_enabled=True, | |
cost_func=COST_FUNCS.HUBER_LOSS, rnn_cell=RNN_CELLS.BASIC_RNN): | |
graph = self.getGraph(batch_size=batch_size, verbose=False, num_units=num_units, input_len=input_len, | |
rnn_cell=rnn_cell, target_len=target_len, cost_func=cost_func, eos_token=eos_token) | |
train_data = PriceHistorySeq2SeqDataProvider(npz_path=npz_path, batch_size=batch_size, rng=self.rng, | |
eos_token=eos_token) | |
preds_dp = PriceHistorySeq2SeqDataProvider(npz_path=npz_path, batch_size=batch_size, rng=self.rng, | |
shuffle_order=False, eos_token=eos_token, | |
) if preds_gather_enabled else None | |
stats = self.train_validate(train_data=train_data, valid_data=None, graph=graph, epochs=epochs, | |
preds_gather_enabled=preds_gather_enabled, preds_dp=preds_dp, batch_size=batch_size) | |
return stats | |
def train_validate(self, train_data, valid_data, **kwargs): | |
graph = kwargs['graph'] | |
epochs = kwargs['epochs'] | |
batch_size = kwargs['batch_size'] | |
verbose = kwargs['verbose'] if 'verbose' in kwargs.keys() else True | |
preds_dp = kwargs['preds_dp'] if 'preds_dp' in kwargs.keys() else None | |
preds_gather_enabled = kwargs['preds_gather_enabled'] if 'preds_gather_enabled' in kwargs.keys() else True | |
if verbose: | |
print "epochs: %d" % epochs | |
with tf.Session(graph=graph, config=self.config) as sess: | |
sess.run(self.init) # sess.run(tf.initialize_all_variables()) | |
dynStats = DynStats() | |
for epoch in range(epochs): | |
train_error, runTime = getRunTime( | |
lambda: | |
self.trainEpoch( | |
sess=sess, | |
data_provider=train_data, | |
) | |
) | |
# if (epoch + 1) % 1 == 0: | |
# valid_error = validateEpoch( | |
# inputs=self.inputs, | |
# targets=self.targets, | |
# sess=sess, | |
# valid_data=valid_data, | |
# error=self.error, | |
# extraFeedDict={self.is_training: False}, | |
# # keep_prob_keys=[self.keep_prob_input, self.keep_prob_hidden] | |
# ) | |
if verbose: | |
# print 'EndEpoch%02d(%.3f secs):err(train)=%.4f,acc(train)=%.2f,err(valid)=%.2f,acc(valid)=%.2f, ' % \ | |
# (epoch + 1, runTime, train_error, train_accuracy, valid_error, valid_accuracy) | |
print 'End Epoch %02d (%.3f secs): err(train) = %.4f' % (epoch + 1, runTime, train_error) | |
dynStats.gatherStats(train_error) | |
predictions_dict = self.getPredictions(batch_size=batch_size, data_provider=preds_dp, sess=sess)[ | |
0] if preds_gather_enabled else None | |
if verbose: | |
if preds_gather_enabled: | |
return dynStats, predictions_dict | |
else: | |
return dynStats | |
def getPredictions(self, sess, data_provider, batch_size, extraFeedDict=None): | |
if extraFeedDict is None: | |
extraFeedDict = {} | |
assert data_provider.data_len % batch_size == 0 | |
total_error = 0. | |
instances_order = data_provider.current_order | |
target_len = data_provider.targets.shape[1] | |
all_predictions = np.zeros(shape=(data_provider.data_len, target_len)) | |
for inst_ind, (input_batch, target_batch) in enumerate(data_provider): | |
cur_error, cur_preds = sess.run( | |
[self.error, self.predictions], | |
feed_dict=merge_dicts({self.inputs: input_batch, | |
self.targets: target_batch, | |
}, extraFeedDict)) | |
assert np.all(instances_order == data_provider.current_order) | |
all_predictions[inst_ind * batch_size: (inst_ind + 1) * batch_size, :] = cur_preds[:, :-1] | |
total_error += cur_error | |
total_error /= data_provider.num_batches | |
assert np.all(all_predictions != 0) # all predictions are expected to be something else than absolute zero | |
preds_dict = OrderedDict(zip(instances_order, all_predictions)) | |
return preds_dict, total_error | |
def trainEpoch(self, sess, data_provider, extraFeedDict=None): | |
if extraFeedDict is None: | |
extraFeedDict = {} | |
train_error = 0. | |
num_batches = data_provider.num_batches | |
for step, (input_batch, target_batch) in enumerate(data_provider): | |
feed_dic = merge_dicts({self.inputs: input_batch, | |
self.targets: target_batch, | |
}, extraFeedDict) | |
_, batch_error = sess.run([self.train_step, self.error], feed_dict=feed_dic) | |
train_error += batch_error | |
train_error /= num_batches | |
return train_error | |
def getGraph(self, | |
# TODO in this version we are building it full length and then we are going to improve it (trunc backprop len) | |
batch_size, | |
num_units, | |
input_len, | |
target_len, | |
eos_token=PriceHistorySeq2SeqDataProvider.EOS_TOKEN_DEFAULT, | |
rnn_cell=RNN_CELLS.BASIC_RNN, | |
cost_func=COST_FUNCS.HUBER_LOSS, | |
learningRate=1e-3, # default of Adam is 1e-3 | |
# lamda2=1e-2, | |
verbose=True): | |
# momentum = 0.5 | |
# tf.reset_default_graph() #kind of redundant statement | |
if verbose: | |
# print "lamda2: %f" % lamda2 | |
print "learning rate: %f" % learningRate | |
output_seq_len = target_len + self.EOS_TOKEN_LEN | |
graph = tf.Graph() # create new graph | |
with graph.as_default(): | |
with tf.name_scope('data'): | |
if self.debug: | |
inputs, targets = self.get_debug_data(batch_size=batch_size, input_len=input_len, | |
output_seq_len=output_seq_len) | |
else: | |
inputs = tf.placeholder(dtype=self.dtype, | |
shape=(batch_size, input_len, self.FEATURE_LEN), name="inputs") | |
targets = tf.placeholder(dtype=self.dtype, shape=(batch_size, output_seq_len), name="targets") | |
with tf.name_scope('inputs'): | |
# unpack matrix into 1 dim array | |
inputs_series = tf.unstack(inputs, axis=1) | |
if verbose: | |
print len(inputs_series) | |
print inputs_series[0] # shape: (5,) | |
with tf.name_scope('encoder_rnn_layer'): | |
# with tf.variable_scope("Encoder") as scope: | |
# (self.encoder_outputs, self.encoder_state) = ( | |
# tf.nn.dynamic_rnn(cell=self.encoder_cell, | |
# inputs=self.encoder_inputs_embedded, | |
# sequence_length=self.encoder_inputs_length, | |
# time_major=True, | |
# dtype=tf.float32) | |
# ) | |
# don't really care for encoder outputs, but only for its final state | |
# the encoder consumes all the input to get a sense of the trend of price history | |
_, encoder_final_state = rnn.static_rnn(cell=rnn_cell(num_units=num_units), | |
inputs=inputs_series, | |
initial_state=None, | |
# TODO when using trunc backprop this should not be zero | |
dtype=self.dtype) | |
if verbose: | |
print encoder_final_state | |
with tf.variable_scope('decoder_rnn_layer'): | |
# Potentially useful classes / functions from seq2seq module of tf r.1.1 onwards | |
# class BasicDecoder: Basic sampling decoder. | |
# class BasicDecoderOutput | |
# class Decoder: An RNN Decoder abstract interface object. | |
# class Helper: Interface for implementing sampling in seq2seq decoders. <--- this is only interface, no implementation | |
# dynamic_decode(...): Perform dynamic decoding with decoder. | |
# Training Helper: A helper for use during training. Only reads inputs. <-- this requires inputs | |
# helper = seq2seq.TrainingHelper() | |
# #TODO probably good to use it with the dummy version of the model where we provide the decoder inputs | |
# EOS_SLICE = tf.ones([1, batch_size], dtype=tf.int32) * self.EOS | |
eos_token_tensor = tf.constant(np.ones(shape=(batch_size, 1)) * eos_token, | |
dtype=tf.float32, name='eos_token_tensor') | |
WW = tf.Variable(self.rng.randn(num_units, self.TARGET_FEATURE_LEN), dtype=self.dtype, name='weights') | |
bb = tf.Variable(np.zeros(self.TARGET_FEATURE_LEN), dtype=self.dtype, name='bias') | |
helper = seq2seq.CustomHelper( | |
initialize_fn=self.get_initialize_fn(batch_size=batch_size, eos_token_tensor=eos_token_tensor), | |
sample_fn=self.get_sample_fn(batch_size=batch_size), | |
next_inputs_fn=self.get_next_inputs_fn(batch_size=batch_size, static_seq_len=output_seq_len, WW=WW, | |
bb=bb) | |
) | |
decoder = seq2seq.BasicDecoder(cell=rnn_cell(num_units=num_units), | |
helper=helper, | |
initial_state=encoder_final_state) | |
# https://stackoverflow.com/questions/44483159/how-to-use-tensorflow-v1-1-seq2seq-dynamic-decode | |
dyn_decode_outs = seq2seq.dynamic_decode(decoder=decoder, | |
output_time_major=False, | |
# we have batches as our major | |
impute_finished=False, | |
# we don't really care because it affects dynamic sequences | |
) | |
basic_decoder_output, _ = dyn_decode_outs | |
if verbose: | |
print basic_decoder_output | |
decoder_out_tensor = basic_decoder_output.rnn_output | |
# del final_state, final_sequence_lengths #not interesting <-- doc has it wrong, there is no final_sequence_lengths output | |
if verbose: | |
print decoder_out_tensor | |
with tf.name_scope('readout_layer'): | |
# just grab the dimensions | |
# decoder_max_steps, decoder_batch_size, decoder_dim = tf.unstack(tf.shape(decoder_out_tensor)) | |
decoder_dim = decoder_out_tensor.get_shape()[-1].value # last dimension's value | |
decoder_outputs_flat = tf.reshape(decoder_out_tensor, (-1, decoder_dim)) | |
# TODO note that we are using exactly the same that we used above inside RNN cell (not sure if this is the best) | |
readouts = tf.add(tf.matmul(decoder_outputs_flat, WW), bb, "readouts") | |
if verbose: | |
print readouts | |
with tf.name_scope('predictions'): | |
# in purpose batch size goes last and output_seq_len goes second (we omit the target feature len because it is 1) | |
# because raw rnn stack things on top of each other and puts sequence length first | |
predictions = tf.reshape(readouts, shape=(batch_size, output_seq_len)) | |
# decoder_logits = tf.reshape(decoder_logits_flat, (decoder_max_steps, decoder_batch_size, vocab_size)) | |
# https://github.com/ematvey/tensorflow-seq2seq-tutorials/blob/master/2-seq2seq-advanced.ipynb | |
if verbose: | |
print predictions | |
with tf.name_scope('error'): | |
if cost_func == self.COST_FUNCS.HUBER_LOSS: | |
losses = huberLoss(y_true=targets, y_pred=predictions) # both have shape: (batch_size, target_len) | |
elif cost_func == self.COST_FUNCS.MSE: | |
losses = tfMSE(outputs=predictions, targets=targets) | |
else: | |
raise Exception("invalid or non supported cost function") | |
if verbose: | |
print losses | |
# fix error to exclude the EOS from the error calculation | |
mask = np.ones(shape=losses.get_shape()) | |
mask[:, -1:] = 0 | |
losses = losses * tf.constant(mask, dtype=tf.float32) | |
error = tf.reduce_mean(losses) | |
if verbose: | |
print error | |
with tf.name_scope('training_step'): | |
train_step = tf.train.AdamOptimizer(learning_rate=learningRate).minimize(error) | |
init = tf.global_variables_initializer() | |
self.init = init | |
self.inputs = inputs | |
self.targets = targets | |
self.error = error | |
# self.is_training = is_training | |
self.train_step = train_step | |
self.predictions = predictions | |
# self.keep_prob_input = keep_prob_input | |
# self.keep_prob_hidden = keep_prob_hidden | |
return graph | |
def get_initialize_fn(self, batch_size, eos_token_tensor): | |
def initialize_fn(): | |
"""callable that returns (finished, next_inputs) for the first iteration.""" | |
finished = self.all_elems_non_finished(batch_size=batch_size) | |
next_inputs = eos_token_tensor | |
return (finished, | |
next_inputs) | |
return initialize_fn | |
def get_sample_fn(self, batch_size): | |
def sample_fn(time, outputs, state): | |
""" callable that takes (time, outputs, state) and emits tensor sample_ids.""" | |
# in translation case you have the output being reduced to a number of logits which can be many if the | |
# vocabulary size is large and from this logits only one of them is the maximum, so we could consider this | |
# as the sample id. But here we do not have such case therefore we will use an invalid id index: -1 | |
return -1 * tf.ones(shape=(batch_size,), | |
dtype=self.dtype) # we actually do not care about sampling here, we are not doing classification | |
return sample_fn | |
def get_next_inputs_fn(self, batch_size, static_seq_len, WW, bb): | |
def next_inputs_fn(time, outputs, state, sample_ids): | |
"""callable that takes (time, outputs, state, sample_ids) and emits (finished, next_inputs, next_state)""" | |
finished = self.get_seq_len_tensor(batch_size=batch_size, static_seq_len=static_seq_len) <= time + 1 | |
next_inputs = tf.add(tf.matmul(outputs, WW), bb) | |
next_state = state | |
return (finished, next_inputs, next_state) | |
return next_inputs_fn | |
@staticmethod | |
def get_seq_len_tensor(batch_size, static_seq_len): | |
return tf.constant(np.full(shape=(batch_size,), fill_value=static_seq_len), dtype=tf.int32) | |
@staticmethod | |
def all_elems_non_finished(batch_size, finished=False): | |
# print "finished" | |
# print finished | |
# return tf.constant(np.repeat(finished, batch_size), dtype=tf.bool) # (0 >= decoder_lengths) | |
return tf.constant(np.full(shape=(batch_size,), fill_value=finished, dtype=np.bool), | |
dtype=tf.bool) # (0 >= decoder_lengths) | |
def get_debug_data(self, batch_size, input_len, output_seq_len): | |
inputs = tf.constant(value=np.arange(batch_size * input_len * self.FEATURE_LEN).reshape( | |
shape=(batch_size, input_len, self.FEATURE_LEN)), dtype=self.dtype, name="inputs") | |
targets = tf.constant(value=np.arange(batch_size * output_seq_len).reshape(shape=(batch_size, output_seq_len)), | |
dtype=self.dtype, name="targets") | |
return inputs, targets |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#for naming purposes alone | |
#learn more here: https://stackoverflow.com/questions/19896900/how-to-change-the-name-of-a-gist-in-github |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment