Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save pligor/00f2629022dbef346b28c33d547ab026 to your computer and use it in GitHub Desktop.
Save pligor/00f2629022dbef346b28c33d547ab026 to your computer and use it in GitHub Desktop.
Sequence to Sequence Prediction using Raw RNN versus using native Seq2Seq module of tensorflow
#These two classes implement the same functionality in two different ways
#Not thoroughly tested if they are identical but they produce similar results
#Unfortunately the usage of the native seq2seq module does not yield any increase in speed performance
#So for now it is only a matter of style which one you will choose
################################Seq2Seq with Raw RNN##############################################3
from __future__ import division
import numpy as np
import tensorflow as tf
from cost_functions.huber_loss import huberLoss
from data_providers.price_history_seq2seq_data_provider import PriceHistorySeq2SeqDataProvider
from mylibs.jupyter_notebook_helper import DynStats, getRunTime
# from model_selection.cross_validator import CrossValidator
from interfaces.neural_net_model_interface import NeuralNetModelInterface
from tensorflow.contrib import rnn
from collections import OrderedDict
from mylibs.py_helper import merge_dicts
from mylibs.tf_helper import tfMSE
class PriceHistorySeq2Seq(NeuralNetModelInterface):
FEATURE_LEN = 1
TARGET_FEATURE_LEN = 1
def __init__(self, rng, dtype, config):
# super(ShiftFunc, self).__init__(random_state=rng, data_provider_class=ProductPairsDataProvider)
super(PriceHistorySeq2Seq, self).__init__()
self.rng = rng
self.dtype = dtype
self.config = config
self.train_data = None
self.valid_data = None
self.init = None
self.error = None
self.inputs = None
self.predictions = None
self.targets = None
self.train_step = None
# end of sequence token length is necessary for machine translation models, not sure for our model
self.EOS_TOKEN_LEN = 1
class COST_FUNCS(object):
HUBER_LOSS = "huberloss"
MSE = 'mse'
class RNN_CELLS(object):
BASIC_RNN = tf.contrib.rnn.BasicRNNCell # "basic_rnn"
GRU = tf.contrib.rnn.GRUCell # "gru"
def run(self, npz_path, epochs, batch_size, num_units, input_len, target_len,
eos_token = PriceHistorySeq2SeqDataProvider.EOS_TOKEN_DEFAULT,
preds_gather_enabled=True,
cost_func=COST_FUNCS.HUBER_LOSS, rnn_cell=RNN_CELLS.BASIC_RNN):
graph = self.getGraph(batch_size=batch_size, verbose=False, num_units=num_units, input_len=input_len,
rnn_cell=rnn_cell, target_len=target_len, cost_func=cost_func, eos_token=eos_token)
train_data = PriceHistorySeq2SeqDataProvider(npz_path=npz_path, batch_size=batch_size, rng=self.rng, eos_token=eos_token)
preds_dp = PriceHistorySeq2SeqDataProvider(npz_path=npz_path, batch_size=batch_size, rng=self.rng,
shuffle_order=False, eos_token=eos_token,
) if preds_gather_enabled else None
stats = self.train_validate(train_data=train_data, valid_data=None, graph=graph, epochs=epochs,
preds_gather_enabled=preds_gather_enabled, preds_dp=preds_dp, batch_size=batch_size)
return stats
def train_validate(self, train_data, valid_data, **kwargs):
graph = kwargs['graph']
epochs = kwargs['epochs']
batch_size = kwargs['batch_size']
verbose = kwargs['verbose'] if 'verbose' in kwargs.keys() else True
preds_dp = kwargs['preds_dp'] if 'preds_dp' in kwargs.keys() else None
preds_gather_enabled = kwargs['preds_gather_enabled'] if 'preds_gather_enabled' in kwargs.keys() else True
if verbose:
print "epochs: %d" % epochs
with tf.Session(graph=graph, config=self.config) as sess:
sess.run(self.init) # sess.run(tf.initialize_all_variables())
dynStats = DynStats()
for epoch in range(epochs):
train_error, runTime = getRunTime(
lambda:
self.trainEpoch(
sess=sess,
data_provider=train_data,
)
)
# if (epoch + 1) % 1 == 0:
# valid_error = validateEpoch(
# inputs=self.inputs,
# targets=self.targets,
# sess=sess,
# valid_data=valid_data,
# error=self.error,
# extraFeedDict={self.is_training: False},
# # keep_prob_keys=[self.keep_prob_input, self.keep_prob_hidden]
# )
if verbose:
# print 'EndEpoch%02d(%.3f secs):err(train)=%.4f,acc(train)=%.2f,err(valid)=%.2f,acc(valid)=%.2f, ' % \
# (epoch + 1, runTime, train_error, train_accuracy, valid_error, valid_accuracy)
print 'End Epoch %02d (%.3f secs): err(train) = %.4f' % (epoch + 1, runTime, train_error)
dynStats.gatherStats(train_error)
predictions_dict = self.getPredictions(batch_size=batch_size, data_provider=preds_dp, sess=sess)[
0] if preds_gather_enabled else None
if verbose:
print
if preds_gather_enabled:
return dynStats, predictions_dict
else:
return dynStats
def getPredictions(self, sess, data_provider, batch_size, extraFeedDict=None):
if extraFeedDict is None:
extraFeedDict = {}
assert data_provider.data_len % batch_size == 0
total_error = 0.
instances_order = data_provider.current_order
target_len = data_provider.targets.shape[1]
all_predictions = np.zeros(shape=(data_provider.data_len, target_len))
for inst_ind, (input_batch, target_batch) in enumerate(data_provider):
cur_error, cur_preds = sess.run(
[self.error, self.predictions],
feed_dict=merge_dicts({self.inputs: input_batch,
self.targets: target_batch,
}, extraFeedDict))
assert np.all(instances_order == data_provider.current_order)
all_predictions[inst_ind * batch_size: (inst_ind + 1) * batch_size, :] = cur_preds[:, :-1]
total_error += cur_error
total_error /= data_provider.num_batches
assert np.all(all_predictions != 0) # all predictions are expected to be something else than absolute zero
preds_dict = OrderedDict(zip(instances_order, all_predictions))
return preds_dict, total_error
def trainEpoch(self, sess, data_provider, extraFeedDict=None):
if extraFeedDict is None:
extraFeedDict = {}
train_error = 0.
num_batches = data_provider.num_batches
for step, (input_batch, target_batch) in enumerate(data_provider):
feed_dic = merge_dicts({self.inputs: input_batch,
self.targets: target_batch,
}, extraFeedDict)
_, batch_error = sess.run([self.train_step, self.error], feed_dict=feed_dic)
train_error += batch_error
train_error /= num_batches
return train_error
def getGraph(self,
# TODO in this version we are building it full length and then we are going to improve it (trunc backprop len)
batch_size,
num_units,
input_len,
target_len,
eos_token = PriceHistorySeq2SeqDataProvider.EOS_TOKEN_DEFAULT,
rnn_cell=RNN_CELLS.BASIC_RNN,
cost_func=COST_FUNCS.HUBER_LOSS,
learningRate=1e-3, # default of Adam is 1e-3
# lamda2=1e-2,
verbose=True):
# momentum = 0.5
# tf.reset_default_graph() #kind of redundant statement
if verbose:
# print "lamda2: %f" % lamda2
print "learning rate: %f" % learningRate
output_seq_len = target_len + self.EOS_TOKEN_LEN
graph = tf.Graph() # create new graph
with graph.as_default():
with tf.name_scope('data'):
inputs = tf.placeholder(dtype=self.dtype,
shape=(batch_size, input_len, self.FEATURE_LEN), name="inputs")
targets = tf.placeholder(dtype=self.dtype, shape=(batch_size, output_seq_len), name="targets")
# temporary to make it easy for ourselves the first time
# decoder_inputs = tf.placeholder(dtype=self.dtype,
# shape=(batch_size, output_seq_len,
# self.TARGET_FEATURE_LEN))
with tf.name_scope('inputs'):
# unpack matrix into 1 dim array
inputs_series = tf.unstack(inputs, axis=1)
if verbose:
print len(inputs_series)
print inputs_series[0] # shape: (5,)
print
with tf.name_scope('encoder_rnn_layer'):
# don't really care for encoder outputs, but only for its final state
# the encoder consumes all the input to get a sense of the trend of price history
_, encoder_final_state = rnn.static_rnn(cell=rnn_cell(num_units=num_units),
inputs=inputs_series,
initial_state=None,
# TODO when using trunc backprop this should not be zero
dtype=self.dtype)
if verbose:
print encoder_final_state
print
# BAD idea
# decoder_outputs = []
# inout = [eos_token]
# for ii in range(output_seq_len):
# if verbose:
# print "rnn: {}".format(ii)
# with tf.variable_scope('decoder_rnn_layer_{}'.format(ii)):
# # note that we use the same number of units for decoder here
# inout, _ = rnn.static_rnn(cell=rnn_cell(num_units=num_units),
# inputs=inout,
# initial_state=encoder_final_state,
# dtype=self.dtype)
# decoder_outputs += inout
with tf.variable_scope('decoder_rnn_layer'):
eos_token_tensor = tf.constant(np.ones(shape=(batch_size, 1)) * eos_token,
dtype=tf.float32, name='eos_token')
WW = tf.Variable(self.rng.randn(num_units, self.TARGET_FEATURE_LEN), dtype=self.dtype, name='weights')
bb = tf.Variable(np.zeros(self.TARGET_FEATURE_LEN), dtype=self.dtype, name='bias')
decoder_output_tensor_array, _, _ = tf.nn.raw_rnn(cell=rnn_cell(num_units=num_units),
loop_fn=self.get_loop_fn(
encoder_final_state=encoder_final_state,
eos_token=eos_token_tensor, batch_size=batch_size,
WW=WW, bb=bb, static_seq_len=output_seq_len,
))
# del decoder_final_state, decoder_final_loop_state #not interesting
if verbose:
# print len(decoder_outputs)
# print decoder_outputs[0]
print decoder_output_tensor_array
print
# https://www.tensorflow.org/api_docs/python/tf/TensorArray
decoder_out_tensor = decoder_output_tensor_array.stack(name='decoder_out_tensor')
if verbose:
print decoder_out_tensor
print
with tf.name_scope('readout_layer'):
# just grab the dimensions
# decoder_max_steps, decoder_batch_size, decoder_dim = tf.unstack(tf.shape(decoder_out_tensor))
decoder_dim = decoder_out_tensor.get_shape()[-1].value # last dimension's value
decoder_outputs_flat = tf.reshape(decoder_out_tensor, (-1, decoder_dim))
# TODO note that we are using exactly the same that we used above inside RNN cell (not sure if this is the best)
readouts = tf.add(tf.matmul(decoder_outputs_flat, WW), bb, "readouts")
if verbose:
print readouts
print
with tf.name_scope('predictions'):
# in purpose batch size goes last and output_seq_len goes second (we omit the target feature len because it is 1)
# because raw rnn stack things on top of each other and puts sequence length first
predictions_transposed = tf.reshape(readouts, shape=(output_seq_len, batch_size))
predictions = tf.transpose(predictions_transposed)
# decoder_logits = tf.reshape(decoder_logits_flat, (decoder_max_steps, decoder_batch_size, vocab_size))
# https://github.com/ematvey/tensorflow-seq2seq-tutorials/blob/master/2-seq2seq-advanced.ipynb
if verbose:
print predictions_transposed
print predictions
print
with tf.name_scope('error'):
if cost_func == self.COST_FUNCS.HUBER_LOSS:
losses = huberLoss(y_true=targets, y_pred=predictions) # both have shape: (batch_size, target_len)
elif cost_func == self.COST_FUNCS.MSE:
losses = tfMSE(outputs=predictions, targets=targets)
else:
raise Exception("invalid or non supported cost function")
if verbose:
print losses
print
# fix error to exclude the EOS from the error calculation
mask = np.ones(shape=losses.get_shape())
mask[:, -1:] = 0
losses = losses * tf.constant(mask, dtype=tf.float32)
error = tf.reduce_mean(losses)
if verbose:
print error
print
with tf.name_scope('training_step'):
train_step = tf.train.AdamOptimizer(learning_rate=learningRate).minimize(error)
init = tf.global_variables_initializer()
self.init = init
self.inputs = inputs
self.targets = targets
self.error = error
# self.is_training = is_training
self.train_step = train_step
self.predictions = predictions
# self.keep_prob_input = keep_prob_input
# self.keep_prob_hidden = keep_prob_hidden
return graph
def get_loop_fn(self, encoder_final_state, eos_token, batch_size, WW, bb, static_seq_len):
def loop_fn(time, previous_output, previous_state, previous_loop_state):
# inputs: time, previous_cell_output, previous_cell_state, previous_loop_state
# outputs: elements_finished, input, cell_state, output, loop_state
if previous_state is None: # time == 0
assert previous_output is None
return self.loop_fn_initial(encoder_final_state=encoder_final_state, eos_token=eos_token,
batch_size=batch_size)
else:
return self.loop_fn_transition(time=time, previous_cell_output=previous_output,
previous_cell_state=previous_state,
batch_size=batch_size, WW=WW, bb=bb, static_seq_len=static_seq_len)
return loop_fn
def loop_fn_initial(self, encoder_final_state, eos_token, batch_size):
# https://www.tensorflow.org/api_docs/python/tf/nn/raw_rnn
# here we have a static rnn case so all length so be taken into account,
# so I guess they are all False always
# From documentation: a boolean Tensor of shape [batch_size]
initial_elements_finished = self.all_elems_non_finished(batch_size=batch_size)
initial_input = eos_token
initial_cell_state = encoder_final_state
initial_cell_output = None
# give it the shape that we want but how exactly ???:
# initial_cell_output = tf.Variable(np.zeros(shape=(batch_size, self.TARGET_FEATURE_LEN)), dtype=tf.float32)
initial_loop_state = None # we don't need to pass any additional information
return (initial_elements_finished,
initial_input,
initial_cell_state,
initial_cell_output,
initial_loop_state)
def loop_fn_transition(self, time, previous_cell_output, previous_cell_state, batch_size, WW, bb, static_seq_len):
"""note that the matrix W is going to be shared among outputs"""
# print "previous cell output!"
# print previous_cell_output
# print
print "time: {}".format(time)
print
# finished = self.all_elems_finished(batch_size=batch_size,
# finished=time - 1 >= static_seq_len) # (time >= decoder_lengths)
# finished = self.all_elems_finished(batch_size=batch_size,
# finished=time - 1 >= static_seq_len) # (time >= decoder_lengths)
finished = time >= self.get_seq_len_tensor(batch_size=batch_size, static_seq_len=static_seq_len)
# this operation produces boolean tensor of [batch_size] defining if corresponding sequence has ended
# this is always false in our case so just comment next two lines
# finished = tf.reduce_all(elements_finished) # -> boolean scalar
# input = tf.cond(finished, lambda: pad_step_embedded, get_next_input)
next_input = tf.add(tf.matmul(previous_cell_output, WW), bb)
# print "next input!"
# print next_input
# print
next_cell_state = previous_cell_state
# emit_output = tf.identity(next_input),
emit_output = previous_cell_output
next_loop_state = None # we don't need to pass any additional information
return (finished,
next_input,
next_cell_state,
emit_output,
next_loop_state)
@staticmethod
def get_seq_len_tensor(batch_size, static_seq_len):
return tf.constant(np.full(shape=(batch_size,), fill_value=static_seq_len), dtype=tf.int32)
@staticmethod
def all_elems_non_finished(batch_size, finished=False):
# print "finished"
# print finished
# return tf.constant(np.repeat(finished, batch_size), dtype=tf.bool) # (0 >= decoder_lengths)
return tf.constant(np.full(shape=(batch_size,), fill_value=finished, dtype=np.bool),
dtype=tf.bool) # (0 >= decoder_lengths)
#These two classes implement the same functionality in two different ways
#Not thoroughly tested if they are identical but they produce similar results
#Unfortunately the usage of the native seq2seq module does not yield any increase in speed performance
#So for now it is only a matter of style which one you will choose
################################Seq2Seq Native##############################################
from __future__ import division
import numpy as np
import tensorflow as tf
from cost_functions.huber_loss import huberLoss
from data_providers.price_history_seq2seq_data_provider import PriceHistorySeq2SeqDataProvider
from mylibs.jupyter_notebook_helper import DynStats, getRunTime
# from model_selection.cross_validator import CrossValidator
from interfaces.neural_net_model_interface import NeuralNetModelInterface
from tensorflow.contrib import rnn
from collections import OrderedDict
from mylibs.py_helper import merge_dicts
from mylibs.tf_helper import tfMSE
import tensorflow.contrib.seq2seq as seq2seq
class PriceHistorySeq2SeqNative(NeuralNetModelInterface):
FEATURE_LEN = 1
TARGET_FEATURE_LEN = 1
def __init__(self, rng, dtype, config, debug=False):
# super(ShiftFunc, self).__init__(random_state=rng, data_provider_class=ProductPairsDataProvider)
super(PriceHistorySeq2SeqNative, self).__init__()
# with bidirectional encoder, decoder state size should be
# 2x encoder state size
self.rng = rng
self.dtype = dtype
self.config = config
self.train_data = None
self.valid_data = None
self.init = None
self.error = None
self.inputs = None
self.predictions = None
self.targets = None
self.train_step = None
self.debug = debug
# end of sequence token length is necessary for machine translation models, not sure for our model
self.EOS_TOKEN_LEN = 1
class COST_FUNCS(object):
HUBER_LOSS = "huberloss"
MSE = 'mse'
class RNN_CELLS(object):
BASIC_RNN = tf.contrib.rnn.BasicRNNCell # "basic_rnn"
GRU = tf.contrib.rnn.GRUCell # "gru"
def run(self, npz_path, epochs, batch_size, num_units, input_len, target_len,
eos_token=PriceHistorySeq2SeqDataProvider.EOS_TOKEN_DEFAULT,
preds_gather_enabled=True,
cost_func=COST_FUNCS.HUBER_LOSS, rnn_cell=RNN_CELLS.BASIC_RNN):
graph = self.getGraph(batch_size=batch_size, verbose=False, num_units=num_units, input_len=input_len,
rnn_cell=rnn_cell, target_len=target_len, cost_func=cost_func, eos_token=eos_token)
train_data = PriceHistorySeq2SeqDataProvider(npz_path=npz_path, batch_size=batch_size, rng=self.rng,
eos_token=eos_token)
preds_dp = PriceHistorySeq2SeqDataProvider(npz_path=npz_path, batch_size=batch_size, rng=self.rng,
shuffle_order=False, eos_token=eos_token,
) if preds_gather_enabled else None
stats = self.train_validate(train_data=train_data, valid_data=None, graph=graph, epochs=epochs,
preds_gather_enabled=preds_gather_enabled, preds_dp=preds_dp, batch_size=batch_size)
return stats
def train_validate(self, train_data, valid_data, **kwargs):
graph = kwargs['graph']
epochs = kwargs['epochs']
batch_size = kwargs['batch_size']
verbose = kwargs['verbose'] if 'verbose' in kwargs.keys() else True
preds_dp = kwargs['preds_dp'] if 'preds_dp' in kwargs.keys() else None
preds_gather_enabled = kwargs['preds_gather_enabled'] if 'preds_gather_enabled' in kwargs.keys() else True
if verbose:
print "epochs: %d" % epochs
with tf.Session(graph=graph, config=self.config) as sess:
sess.run(self.init) # sess.run(tf.initialize_all_variables())
dynStats = DynStats()
for epoch in range(epochs):
train_error, runTime = getRunTime(
lambda:
self.trainEpoch(
sess=sess,
data_provider=train_data,
)
)
# if (epoch + 1) % 1 == 0:
# valid_error = validateEpoch(
# inputs=self.inputs,
# targets=self.targets,
# sess=sess,
# valid_data=valid_data,
# error=self.error,
# extraFeedDict={self.is_training: False},
# # keep_prob_keys=[self.keep_prob_input, self.keep_prob_hidden]
# )
if verbose:
# print 'EndEpoch%02d(%.3f secs):err(train)=%.4f,acc(train)=%.2f,err(valid)=%.2f,acc(valid)=%.2f, ' % \
# (epoch + 1, runTime, train_error, train_accuracy, valid_error, valid_accuracy)
print 'End Epoch %02d (%.3f secs): err(train) = %.4f' % (epoch + 1, runTime, train_error)
dynStats.gatherStats(train_error)
predictions_dict = self.getPredictions(batch_size=batch_size, data_provider=preds_dp, sess=sess)[
0] if preds_gather_enabled else None
if verbose:
print
if preds_gather_enabled:
return dynStats, predictions_dict
else:
return dynStats
def getPredictions(self, sess, data_provider, batch_size, extraFeedDict=None):
if extraFeedDict is None:
extraFeedDict = {}
assert data_provider.data_len % batch_size == 0
total_error = 0.
instances_order = data_provider.current_order
target_len = data_provider.targets.shape[1]
all_predictions = np.zeros(shape=(data_provider.data_len, target_len))
for inst_ind, (input_batch, target_batch) in enumerate(data_provider):
cur_error, cur_preds = sess.run(
[self.error, self.predictions],
feed_dict=merge_dicts({self.inputs: input_batch,
self.targets: target_batch,
}, extraFeedDict))
assert np.all(instances_order == data_provider.current_order)
all_predictions[inst_ind * batch_size: (inst_ind + 1) * batch_size, :] = cur_preds[:, :-1]
total_error += cur_error
total_error /= data_provider.num_batches
assert np.all(all_predictions != 0) # all predictions are expected to be something else than absolute zero
preds_dict = OrderedDict(zip(instances_order, all_predictions))
return preds_dict, total_error
def trainEpoch(self, sess, data_provider, extraFeedDict=None):
if extraFeedDict is None:
extraFeedDict = {}
train_error = 0.
num_batches = data_provider.num_batches
for step, (input_batch, target_batch) in enumerate(data_provider):
feed_dic = merge_dicts({self.inputs: input_batch,
self.targets: target_batch,
}, extraFeedDict)
_, batch_error = sess.run([self.train_step, self.error], feed_dict=feed_dic)
train_error += batch_error
train_error /= num_batches
return train_error
def getGraph(self,
# TODO in this version we are building it full length and then we are going to improve it (trunc backprop len)
batch_size,
num_units,
input_len,
target_len,
eos_token=PriceHistorySeq2SeqDataProvider.EOS_TOKEN_DEFAULT,
rnn_cell=RNN_CELLS.BASIC_RNN,
cost_func=COST_FUNCS.HUBER_LOSS,
learningRate=1e-3, # default of Adam is 1e-3
# lamda2=1e-2,
verbose=True):
# momentum = 0.5
# tf.reset_default_graph() #kind of redundant statement
if verbose:
# print "lamda2: %f" % lamda2
print "learning rate: %f" % learningRate
output_seq_len = target_len + self.EOS_TOKEN_LEN
graph = tf.Graph() # create new graph
with graph.as_default():
with tf.name_scope('data'):
if self.debug:
inputs, targets = self.get_debug_data(batch_size=batch_size, input_len=input_len,
output_seq_len=output_seq_len)
else:
inputs = tf.placeholder(dtype=self.dtype,
shape=(batch_size, input_len, self.FEATURE_LEN), name="inputs")
targets = tf.placeholder(dtype=self.dtype, shape=(batch_size, output_seq_len), name="targets")
with tf.name_scope('inputs'):
# unpack matrix into 1 dim array
inputs_series = tf.unstack(inputs, axis=1)
if verbose:
print len(inputs_series)
print inputs_series[0] # shape: (5,)
print
with tf.name_scope('encoder_rnn_layer'):
# with tf.variable_scope("Encoder") as scope:
# (self.encoder_outputs, self.encoder_state) = (
# tf.nn.dynamic_rnn(cell=self.encoder_cell,
# inputs=self.encoder_inputs_embedded,
# sequence_length=self.encoder_inputs_length,
# time_major=True,
# dtype=tf.float32)
# )
# don't really care for encoder outputs, but only for its final state
# the encoder consumes all the input to get a sense of the trend of price history
_, encoder_final_state = rnn.static_rnn(cell=rnn_cell(num_units=num_units),
inputs=inputs_series,
initial_state=None,
# TODO when using trunc backprop this should not be zero
dtype=self.dtype)
if verbose:
print encoder_final_state
print
with tf.variable_scope('decoder_rnn_layer'):
# Potentially useful classes / functions from seq2seq module of tf r.1.1 onwards
# class BasicDecoder: Basic sampling decoder.
# class BasicDecoderOutput
# class Decoder: An RNN Decoder abstract interface object.
# class Helper: Interface for implementing sampling in seq2seq decoders. <--- this is only interface, no implementation
# dynamic_decode(...): Perform dynamic decoding with decoder.
# Training Helper: A helper for use during training. Only reads inputs. <-- this requires inputs
# helper = seq2seq.TrainingHelper()
# #TODO probably good to use it with the dummy version of the model where we provide the decoder inputs
# EOS_SLICE = tf.ones([1, batch_size], dtype=tf.int32) * self.EOS
eos_token_tensor = tf.constant(np.ones(shape=(batch_size, 1)) * eos_token,
dtype=tf.float32, name='eos_token_tensor')
WW = tf.Variable(self.rng.randn(num_units, self.TARGET_FEATURE_LEN), dtype=self.dtype, name='weights')
bb = tf.Variable(np.zeros(self.TARGET_FEATURE_LEN), dtype=self.dtype, name='bias')
helper = seq2seq.CustomHelper(
initialize_fn=self.get_initialize_fn(batch_size=batch_size, eos_token_tensor=eos_token_tensor),
sample_fn=self.get_sample_fn(batch_size=batch_size),
next_inputs_fn=self.get_next_inputs_fn(batch_size=batch_size, static_seq_len=output_seq_len, WW=WW,
bb=bb)
)
decoder = seq2seq.BasicDecoder(cell=rnn_cell(num_units=num_units),
helper=helper,
initial_state=encoder_final_state)
# https://stackoverflow.com/questions/44483159/how-to-use-tensorflow-v1-1-seq2seq-dynamic-decode
dyn_decode_outs = seq2seq.dynamic_decode(decoder=decoder,
output_time_major=False,
# we have batches as our major
impute_finished=False,
# we don't really care because it affects dynamic sequences
)
basic_decoder_output, _ = dyn_decode_outs
if verbose:
print basic_decoder_output
print
decoder_out_tensor = basic_decoder_output.rnn_output
# del final_state, final_sequence_lengths #not interesting <-- doc has it wrong, there is no final_sequence_lengths output
if verbose:
print decoder_out_tensor
print
with tf.name_scope('readout_layer'):
# just grab the dimensions
# decoder_max_steps, decoder_batch_size, decoder_dim = tf.unstack(tf.shape(decoder_out_tensor))
decoder_dim = decoder_out_tensor.get_shape()[-1].value # last dimension's value
decoder_outputs_flat = tf.reshape(decoder_out_tensor, (-1, decoder_dim))
# TODO note that we are using exactly the same that we used above inside RNN cell (not sure if this is the best)
readouts = tf.add(tf.matmul(decoder_outputs_flat, WW), bb, "readouts")
if verbose:
print readouts
print
with tf.name_scope('predictions'):
# in purpose batch size goes last and output_seq_len goes second (we omit the target feature len because it is 1)
# because raw rnn stack things on top of each other and puts sequence length first
predictions = tf.reshape(readouts, shape=(batch_size, output_seq_len))
# decoder_logits = tf.reshape(decoder_logits_flat, (decoder_max_steps, decoder_batch_size, vocab_size))
# https://github.com/ematvey/tensorflow-seq2seq-tutorials/blob/master/2-seq2seq-advanced.ipynb
if verbose:
print predictions
print
with tf.name_scope('error'):
if cost_func == self.COST_FUNCS.HUBER_LOSS:
losses = huberLoss(y_true=targets, y_pred=predictions) # both have shape: (batch_size, target_len)
elif cost_func == self.COST_FUNCS.MSE:
losses = tfMSE(outputs=predictions, targets=targets)
else:
raise Exception("invalid or non supported cost function")
if verbose:
print losses
print
# fix error to exclude the EOS from the error calculation
mask = np.ones(shape=losses.get_shape())
mask[:, -1:] = 0
losses = losses * tf.constant(mask, dtype=tf.float32)
error = tf.reduce_mean(losses)
if verbose:
print error
print
with tf.name_scope('training_step'):
train_step = tf.train.AdamOptimizer(learning_rate=learningRate).minimize(error)
init = tf.global_variables_initializer()
self.init = init
self.inputs = inputs
self.targets = targets
self.error = error
# self.is_training = is_training
self.train_step = train_step
self.predictions = predictions
# self.keep_prob_input = keep_prob_input
# self.keep_prob_hidden = keep_prob_hidden
return graph
def get_initialize_fn(self, batch_size, eos_token_tensor):
def initialize_fn():
"""callable that returns (finished, next_inputs) for the first iteration."""
finished = self.all_elems_non_finished(batch_size=batch_size)
next_inputs = eos_token_tensor
return (finished,
next_inputs)
return initialize_fn
def get_sample_fn(self, batch_size):
def sample_fn(time, outputs, state):
""" callable that takes (time, outputs, state) and emits tensor sample_ids."""
# in translation case you have the output being reduced to a number of logits which can be many if the
# vocabulary size is large and from this logits only one of them is the maximum, so we could consider this
# as the sample id. But here we do not have such case therefore we will use an invalid id index: -1
return -1 * tf.ones(shape=(batch_size,),
dtype=self.dtype) # we actually do not care about sampling here, we are not doing classification
return sample_fn
def get_next_inputs_fn(self, batch_size, static_seq_len, WW, bb):
def next_inputs_fn(time, outputs, state, sample_ids):
"""callable that takes (time, outputs, state, sample_ids) and emits (finished, next_inputs, next_state)"""
finished = self.get_seq_len_tensor(batch_size=batch_size, static_seq_len=static_seq_len) <= time + 1
next_inputs = tf.add(tf.matmul(outputs, WW), bb)
next_state = state
return (finished, next_inputs, next_state)
return next_inputs_fn
@staticmethod
def get_seq_len_tensor(batch_size, static_seq_len):
return tf.constant(np.full(shape=(batch_size,), fill_value=static_seq_len), dtype=tf.int32)
@staticmethod
def all_elems_non_finished(batch_size, finished=False):
# print "finished"
# print finished
# return tf.constant(np.repeat(finished, batch_size), dtype=tf.bool) # (0 >= decoder_lengths)
return tf.constant(np.full(shape=(batch_size,), fill_value=finished, dtype=np.bool),
dtype=tf.bool) # (0 >= decoder_lengths)
def get_debug_data(self, batch_size, input_len, output_seq_len):
inputs = tf.constant(value=np.arange(batch_size * input_len * self.FEATURE_LEN).reshape(
shape=(batch_size, input_len, self.FEATURE_LEN)), dtype=self.dtype, name="inputs")
targets = tf.constant(value=np.arange(batch_size * output_seq_len).reshape(shape=(batch_size, output_seq_len)),
dtype=self.dtype, name="targets")
return inputs, targets
#for naming purposes alone
#learn more here: https://stackoverflow.com/questions/19896900/how-to-change-the-name-of-a-gist-in-github
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment