Skip to content

Instantly share code, notes, and snippets.

@goldsborough
Created February 21, 2018 19:28
Show Gist options
  • Save goldsborough/9bfdb236f1b085931f05c4fa55ee6a8f to your computer and use it in GitHub Desktop.
Save goldsborough/9bfdb236f1b085931f05c4fa55ee6a8f to your computer and use it in GitHub Desktop.
import argparse
import collections
import datetime
import math
import numpy as np
import os
import sys
import tensorflow as tf
import time
import warnings
import matplotlib.pyplot as plot
import seaborn
tf.set_random_seed(sum(map(ord, 'chicken')))
warnings.filterwarnings('ignore')
seaborn.set_style('dark')
Data = collections.namedtuple('Data', 'samples, number_of_streams, names')
Graph = collections.namedtuple('Graph', ['x',
'y',
'initial_state',
'final_state',
'loss',
'predictions',
'accuracy',
'summaries',
'learning_rate',
'dropout',
'train'])
DATA_STREAMS = dict(sine=lambda t: np.sin(1/20 * t),
sine1=lambda t: np.sin(1/20 * t - 1),
sine2=lambda t: np.sin(1/20 * t - 2),
sine3=lambda t: np.sin(1/20 * t - 3),
sine4=lambda t: np.sin(1/20 * t - 4),
sine5=lambda t: np.sin(1/20 * t - 5),
cosine=lambda t: np.cos(1/20 * t),
linear=lambda t: 1/100 * t,
sine_up=lambda t: 2 * np.sin(1/20 * t) + t/10000,
sawtooth=lambda t: (t/100 - np.floor(t/100)) - 0.5,
mod=lambda t: (t/100 % 1),
sinsq=lambda t: np.sin((t/10)**2),
delta10=lambda t: 1 if t % 10 == 0 else 0,
delta10_1=lambda t: 1 if t % 10 == 1 else 0,
delta10_2=lambda t: 1 if t % 10 == 2 else 0,
delta10_3=lambda t: 1 if t % 10 == 3 else 0,
delta10_4=lambda t: 1 if t % 10 == 4 else 0,
delta100=lambda t: 1 if t % 100 == 0 else 0)
INITIALIZERS = dict(orthogonal=tf.orthogonal_initializer(),
random_uniform=tf.random_uniform_initializer(),
random_normal=tf.random_normal_initializer(),
truncated_normal=tf.truncated_normal_initializer(),
glorot=None)
REGULARIZERS = dict(l1=tf.contrib.layers.l1_regularizer(0.1),
l2=tf.contrib.layers.l2_regularizer(0.1))
def dense(inputs,
output_size,
activation=None,
initializer=None,
regularizer=None):
weights = tf.get_variable('W',
[inputs.shape[1], output_size],
initializer=initializer,
regularizer=regularizer)
bias = tf.get_variable('b',
[output_size],
initializer=tf.constant_initializer(0.1))
output = tf.matmul(inputs, weights) + bias
if activation is not None:
output = activation(output)
return output
def feedforward(inputs,
output_size,
number_of_layers,
units_per_layer,
hidden_activation,
final_activation=None,
initializer=None,
regularizer=None):
if number_of_layers == 0:
return inputs
tensors = inputs
for layer in range(1, number_of_layers):
with tf.variable_scope('hidden-{0}'.format(layer)):
tensors = dense(tensors,
units_per_layer,
hidden_activation,
initializer,
regularizer)
with tf.variable_scope('final'):
output = dense(tensors,
output_size,
final_activation,
initializer,
regularizer)
return output
def leaky_relu(x, alpha=0.05):
return tf.maximum(alpha * x, x)
class ScrappyCell(tf.contrib.rnn.RNNCell):
def __init__(self,
state_size,
hidden_units,
hidden_layers,
hidden_activation=leaky_relu,
final_activation=tf.tanh,
reuse=None):
self._state_size = state_size
self._reuse = reuse
self._hidden_units = hidden_units
self._hidden_layers = hidden_layers
self._hidden_activation = hidden_activation
self._final_activation = final_activation
status = ('Initializing Scrappy cell with {0} units in {1} layers '
'with {2} as hidden activation and {3} as final activation')
print(status.format(hidden_units,
hidden_layers,
hidden_activation.__name__,
final_activation.__name__))
@property
def state_size(self):
return self._state_size
@property
def output_size(self):
return self._state_size
def __call__(self, inputs, state, scope=None):
with tf.variable_scope(scope or type(self).__name__, reuse=self._reuse):
concatenated = tf.concat([inputs, state], axis=1)
state = dense(concatenated, self.output_size, self._final_activation)
output = feedforward(inputs=state,
output_size=self.output_size,
number_of_layers=self._hidden_layers,
units_per_layer=self._hidden_units,
hidden_activation=self._hidden_activation,
final_activation=self._final_activation)
return output, output
class RNNCell(tf.contrib.rnn.RNNCell):
def __init__(self, state_size, reuse=None):
self._state_size = state_size
self._reuse = reuse
@property
def state_size(self):
return self._state_size
@property
def output_size(self):
return self._state_size
def __call__(self, inputs, state, scope=None):
with tf.variable_scope(scope or type(self).__name__, reuse=self._reuse):
input_size = inputs.shape[1] + self._state_size
weights = tf.get_variable('W', [input_size, self._state_size])
bias = tf.get_variable('b', [self._state_size])
concatenated = tf.concat([inputs, state], axis=1)
output = tf.matmul(concatenated, weights) + bias
return output, output
def layer_normalize(tensor,
scope=None,
epsilon=1e-5,
initial_gain=1.0,
initial_bias=0.0):
assert len(tensor.shape) == 2
input_size = tensor.shape[1]
with tf.variable_scope('{0}/layer_norm'.format(scope)):
gain_initializer = tf.constant_initializer(initial_gain)
gain = tf.get_variable('gain',
shape=[input_size],
initializer=gain_initializer)
bias_initializer = tf.constant_initializer(initial_bias)
bias = tf.get_variable('bias',
shape=[input_size],
initializer=bias_initializer)
mean, variance = tf.nn.moments(tensor, axes=[1], keep_dims=True)
z_shift = (tensor - mean) / tf.sqrt(variance + epsilon)
return gain * z_shift + bias
class LSTMCell(tf.contrib.rnn.RNNCell):
def __init__(self,
state_size,
forget_bias=1.0,
activation=tf.tanh,
final_activation=tf.tanh,
layer_normalize=False,
reuse=None):
self._state_size = state_size
self._forget_bias = forget_bias
self._activation = activation or (lambda z: z)
self._final_activation = final_activation or (lambda z: z)
self._reuse = reuse
self._layer_normalize = layer_normalize
if layer_normalize:
print('Using layer normalization')
@property
def state_size(self):
return tf.contrib.rnn.LSTMStateTuple(self._state_size, self._state_size)
@property
def output_size(self):
return self._state_size
def perform_layer_normalization(self, values):
scopes = ['input_gate', 'output_gate', 'forget_gate', 'input']
return [layer_normalize(i, scopes[n]) for n, i in enumerate(values)]
def __call__(self, inputs, state, scope=None):
with tf.variable_scope(scope or type(self).__name__, reuse=self._reuse):
cell_state, hidden_state = state
concatenated = tf.concat([inputs, hidden_state], axis=1)
values = dense(concatenated, 4 * self._state_size)
values = tf.split(values, 4, axis=1)
if self._layer_normalize:
values = self.perform_layer_normalization(values)
input_gate = tf.sigmoid(values[0], 'i')
output_gate = tf.sigmoid(values[1], 'o')
forget_gate = tf.sigmoid(values[2] + self._forget_bias, 'f')
intermediate_input = self._activation(values[3])
cell_state *= forget_gate
cell_state += input_gate * intermediate_input
output = output_gate * self._activation(cell_state)
new_state = tf.contrib.rnn.LSTMStateTuple(cell_state, output)
return output, new_state
def generate_single_epoch(data, sequence_length, number_of_batches):
batch_length = len(data) // number_of_batches
batches = []
for batch_index in range(number_of_batches):
start = batch_length * batch_index
end = batch_length * (batch_index + 1)
batches.append(data[start:end])
epoch_size = (batch_length - 1) // sequence_length
assert epoch_size > 0
batches = np.array(batches, dtype=np.float32)
for i in range(epoch_size):
x = batches[:, i * sequence_length: (i + 1) * sequence_length]
y = batches[:, i * sequence_length + 1: (i + 1) * sequence_length + 1]
yield (x, y)
def generate_epochs(data, arguments):
if arguments.training_points:
data = data[:arguments.training_points]
for _ in range(arguments.epochs):
yield generate_single_epoch(data,
arguments.sequence_length,
arguments.batch_size)
def final_feedforward(y, hyper, number_of_streams):
with tf.variable_scope('final_feedforward'):
y = tf.reshape(y, [-1, int(y.shape[-1])])
predictions = feedforward(inputs=y,
output_size=number_of_streams,
number_of_layers=hyper.final_layers,
units_per_layer=hyper.final_units,
hidden_activation=leaky_relu,
initializer=hyper.initializer,
regularizer=hyper.regularizer)
return predictions
def make_rnn(hyper, dropout):
status = 'Using {0} cells with a state size of {1}'
print(status.format(hyper.model, hyper.state_size))
assert hyper.model in ['lstm', 'rnn', 'gru', 'scrappy']
if hyper.model == 'lstm':
# if hyper.layer_normalize:
cell = LSTMCell(hyper.state_size,
layer_normalize=hyper.layer_normalize)
# # else:
# if hyper.use_peepholes:
# print('Using peephole connections in LSTM cells')
# cell = tf.contrib.rnn.LSTMCell(hyper.state_size,
# use_peepholes=hyper.use_peepholes,
# state_is_tuple=True,
# activation=lambda x: x)
elif hyper.model == 'rnn':
cell = RNNCell(hyper.state_size)
# cell = tf.contrib.rnn.BasicRNNCell(hyper.state_size)
elif hyper.model == 'gru':
cell = tf.contrib.rnn.GRUCell(hyper.state_size)
elif hyper.model == 'scrappy':
cell = ScrappyCell(hyper.state_size,
hyper.hidden_units,
hyper.hidden_layers)
if hyper.dropout < 1:
print('Using dropout with a rate of {0:.3f}'.format(1 - hyper.dropout))
cell = tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=dropout)
layers = [cell] * hyper.model_layers
rnn = tf.contrib.rnn.MultiRNNCell(layers)
rnn = tf.contrib.rnn.DropoutWrapper(rnn, output_keep_prob=dropout)
return rnn
def configure_learning_rate(hyper, global_step):
learning_rate = tf.constant(hyper.learning_rate)
if hyper.decay is not None:
input_size = hyper.batch_size * hyper.sequence_length
decay_steps = math.ceil(hyper.data_points / input_size)
status = 'Decaying learning rate from {0} by {1} every {2} steps'
print(status.format(hyper.learning_rate, hyper.decay, decay_steps))
learning_rate = tf.train.exponential_decay(learning_rate,
decay_steps=decay_steps,
decay_rate=hyper.decay,
global_step=global_step,
staircase=True)
tf.summary.scalar('learning_rate', learning_rate)
return learning_rate
def convolve(x, hyper):
if not hyper.convolution_filters:
return x
message = ('Convolving streams with filter size = {0}, '
'stride = {1}, filters = {2}, padding = {3}')
print(message.format(hyper.convolution_size,
hyper.convolution_stride,
hyper.convolution_filters,
hyper.convolution_padding))
with tf.variable_scope('convolution'):
shape = (hyper.convolution_size, # The width of the filter
int(x.shape[-1]), # The number of input channels to convolve
hyper.convolution_filters) # The number of output channels
filters = tf.get_variable('filters',
initializer=hyper.initializer,
regularizer=hyper.regularizer,
shape=shape)
biases = tf.get_variable('biases',
shape=[hyper.convolution_filters],
initializer=tf.constant_initializer(0.1))
tf.summary.histogram('filters', filters)
tf.summary.histogram('biases', biases)
convolved = tf.nn.conv1d(value=x,
filters=filters,
stride=hyper.convolution_stride,
padding=hyper.convolution_padding.upper())
result = tf.nn.relu(convolved + biases)
assert int(result.shape[-1]) == hyper.convolution_filters
return result
def build_graph(hyper, number_of_streams):
start = time.time()
tf.reset_default_graph()
x_input = tf.placeholder(tf.float32, [None, None, number_of_streams])
y_hat = tf.placeholder(tf.float32, [None, None, number_of_streams])
x = convolve(x_input, hyper)
# offset = x[0, 0]
# x %= 1
dropout = tf.Variable(hyper.dropout, trainable=False)
rnn = make_rnn(hyper, dropout)
batch_size = tf.shape(x_input)[0]
initial_state = rnn.zero_state(batch_size, dtype=tf.float32)
y, final_state = tf.nn.dynamic_rnn(rnn, x, initial_state=initial_state)
predictions = final_feedforward(y, hyper, number_of_streams)
# sequence_length = tf.shape(x_input)[1]
# unmod = offset + (tf.slice(np.arange(hyper.sequence_length, dtype=np.float32), [0], [sequence_length]) / 100)
# predictions += tf.reshape(unmod, tf.shape(predictions))
labels = tf.reshape(y_hat, [-1, number_of_streams])
positives = tf.abs(predictions - labels) < hyper.accuracy_epsilon
accuracy = tf.reduce_mean(tf.cast(positives, dtype=tf.float32))
total_loss = tf.reduce_mean(tf.square(predictions - labels))
tf.summary.scalar('accuracy', accuracy)
tf.summary.scalar('loss', total_loss)
global_step = tf.Variable(0, trainable=False)
learning_rate = configure_learning_rate(hyper, global_step)
optimizer = tf.train.AdamOptimizer(learning_rate)
train = optimizer.minimize(total_loss, global_step=global_step)
summaries = tf.summary.merge_all()
print('Compiled graph in: {0:.3f}s'.format(time.time() - start))
return Graph(x_input,
y_hat,
initial_state,
final_state,
total_loss,
predictions,
accuracy,
summaries,
learning_rate,
dropout,
train)
def prediction_file(arguments, index):
if arguments.dry:
return open('/tmp/ignore', 'w')
file_name = 'epoch-{0}'.format(index)
path = os.path.join(arguments.workspace, 'predictions', file_name)
return open(path, 'w')
def plot_predictions(arguments, predictions, data):
if not arguments.plot and not arguments.plot_all:
return
real = list(zip(*data.samples))
matrix = zip(list(zip(*predictions)), real, data.names)
predict_start = arguments.prediction_start + arguments.predict_ahead
predict_end = predict_start + len(predictions)
predict_range = list(range(predict_start, predict_end))
for index, (prediction, real, name) in enumerate(matrix, start=1):
print("Plotting result for '{0}'...".format(name))
handles = []
plot.figure(index)
plot.title(arguments.workspace)
plot.subplot(111)
label = '{0}-predicted'.format(name)
handles += plot.plot(predict_range, prediction, 'r--', label=label)
plot.subplot(111)
handles += plot.plot(real, 'g-', label='{0}-real'.format(name))
if arguments.training_points:
plot.axvline(arguments.training_points, linestyle='-.')
if arguments.prediction_start:
plot.axvline(arguments.prediction_start, linestyle=':')
plot.legend(loc='upper center',
handles=handles,
bbox_to_anchor=(0.95, 1),
fontsize='large')
plot.show()
def process_prediction(prediction, data, index, mean_error, output):
data = data.samples
zero = np.zeros_like(data[0])
error = np.abs(prediction - data[index]) if index < len(data) else zero
mean_error += (error - mean_error) / index
status = '{0} {1} {2} {3}\n'
label = data[index] if index < len(data) else zero
error_string = ', '.join(map(str, mean_error))
prediction = ', '.join(map(str, prediction))
label = ', '.join(map(str, label))
status = status.format(index, prediction, label, error_string)
if output.name != '/tmp/ignore':
output.write(status)
return mean_error
def train_online(graph, experience):
print('Training online (batch of {0}) ...'.format(len(experience)))
session = tf.get_default_session()
x, y = experience[:-1], experience[1:]
feed_dict = {graph.x: [x], graph.y: [y]}
session.run(graph.train, feed_dict)
experience.clear()
def make_example(arguments, data, sequence_length):
shape = (arguments.batch_size, sequence_length, data.number_of_streams)
return np.zeros(shape=shape, dtype=np.float32)
def roll_forward(graph, arguments, data, offset):
# print('\nRolling {0} steps forward'.format(arguments.predict_ahead - 1))
example = make_example(arguments, data, arguments.predict_ahead - 1)
example[0] = data.samples[offset:offset + arguments.predict_ahead - 1]
session = tf.get_default_session()
feed_dict = {graph.x: example, graph.dropout: 1.0}
fetches = [graph.predictions, graph.final_state]
prediction, state = session.run(fetches, feed_dict)
return state, [prediction[-1:]]
def generate_samples(graph, arguments, data, epoch_index):
offset = arguments.prediction_start
session = tf.get_default_session()
state = None
if arguments.predict_ahead > 0:
state, generated = roll_forward(graph, arguments, data, offset)
else:
generated = [data.samples[offset]]
example = make_example(arguments, data, 1)
error = 0
experience = []
with prediction_file(arguments, epoch_index) as output:
for index in range(1 + offset, 1 + offset + arguments.predict):
if arguments.train_online and index < len(data.samples):
experience.append(data.samples[index])
if len(experience) == arguments.online_batch_size:
train_online(graph, experience)
index += arguments.predict_ahead
if arguments.help_prediction and index < len(data.samples):
example[0, 0] = data.samples[index]
else:
example[0, 0] = generated[-1]
feed_dict = {graph.x: example, graph.dropout: 1.0}
if state is not None:
feed_dict[graph.initial_state] = state
fetches = [graph.predictions, graph.final_state]
prediction, state = session.run(fetches, feed_dict)
prediction = prediction[0]
assert len(prediction) == data.number_of_streams
generated.append(prediction)
error = process_prediction(prediction, data, index, error, output)
error_string = ', '.join(
['{0} = {1:.5f}'.format(k, v) for k, v in zip(data.names, error)])
print(' | Prediction Error: {0}'.format(error_string))
if epoch_index == 'final':
plot_predictions(arguments, generated, data)
return error
def training_step(graph, x, y, state, write_summary):
session = tf.get_default_session()
feed_dict = {graph.x: x, graph.y: y}
if state is not None:
feed_dict[graph.initial_state] = state
fetches = [graph.loss,
graph.accuracy,
graph.final_state,
graph.summaries,
graph.learning_rate,
graph.train]
values = session.run(fetches, feed_dict)
loss, accuracy, state, summaries, learning_rate = values[:-1]
write_summary(summaries)
return loss, accuracy, state, learning_rate
def make_summary_writer(arguments, session):
if not arguments.tensorboard or arguments.dry:
return lambda _: _
path = os.path.join(arguments.workspace, 'tensorboard')
summary_writer = tf.summary.FileWriter(path, graph=session.graph)
global_step = 0
print('Writing TensorBoard logs to {0}'.format(path))
def write_summary(summary):
nonlocal global_step
summary_writer.add_summary(summary, global_step=global_step)
global_step += 1
return write_summary
def print_status(variables):
status = ('\r Epoch: {:<4} | Iteration: {:<4} | '
'Loss: {:.10f} | Accuracy: {:.8f} | '
'Learning Rate: {:.8f}')
status = status.format(variables['epoch_index'],
variables['iteration'],
variables['loss'],
variables['accuracy'],
variables['learning_rate'])
print(status, end='')
def _train_graph(data, graph, arguments, save):
losses, accuracies, errors = [], [], []
with tf.Session() as session:
tf.global_variables_initializer().run()
write_summary = make_summary_writer(arguments, session)
epoch_generator = generate_epochs(data.samples, arguments)
for epoch_index, epoch in enumerate(epoch_generator, start=1):
do_checkpoint = arguments.checkpoint and epoch_index > 1
if do_checkpoint and epoch_index % arguments.checkpoint == 0:
error = generate_samples(graph, arguments, data, epoch_index)
errors.append(error)
stop = False
state = None
for iteration, (x, y) in enumerate(epoch, start=1):
try:
values = training_step(graph, x, y, state, write_summary)
loss, accuracy, state, learning_rate = values
print_status(locals())
losses.append(loss)
accuracies.append(accuracy)
except KeyboardInterrupt:
if stop: break
stop = True
save(session)
if stop: break
error = generate_samples(graph, arguments, data, 'final')
errors.append(error)
return losses, accuracies, errors
def make_checkpoint(arguments):
file_name = 'checkpoint-{0}'.format(time.strftime('%H-%M-%S'))
checkpoint = os.path.join(arguments.workspace, 'checkpoints', file_name)
print("Checkpoint is: '{0}'".format(checkpoint))
return checkpoint
def train_graph(graph, data, arguments):
start = time.time()
saver = tf.train.Saver()
checkpoint = make_checkpoint(arguments)
if arguments.dry:
save = lambda _: _
else:
save = lambda session: saver.save(session, checkpoint)
metrics = _train_graph(data, graph, arguments, save)
print('\nTraining time: {0:.3f} s'.format(time.time() - start))
return metrics
def parse_arguments(command_line_arguments):
parser = argparse.ArgumentParser(description='RNN benchmark tool')
parser.add_argument('-e',
'--epochs',
type=int,
default=1000,
help='Number of epochs to run')
parser.add_argument('-l',
'--sequence-length',
type=int,
default=100,
help='Length of the sequence in each batch')
parser.add_argument('-b',
'--batch-size',
type=int,
default=1,
help='The batch size to vectorize training')
parser.add_argument('-s',
'--state-size',
type=int,
default=128,
help='The dimension of the state')
parser.add_argument('-o',
'--model-layers',
type=int,
default=1,
help='The number of (overall) layers of the model')
parser.add_argument('--hidden-layers',
type=int,
default=1,
help='The number of hidden layers inside a '
'(deep) RNN or scrappy cell')
parser.add_argument('--hidden-units',
type=int,
default=128,
help='The number of hidden units inside a '
'(deep) RNN or scrappy cell')
parser.add_argument('--final-layers',
type=int,
default=1,
help='The number of hidden layers used to process the '
'output of the model, in each layer')
parser.add_argument('--final-units',
type=int,
default=128,
help='The number of hidden units used to process the '
'output of the model, in each layer')
parser.add_argument('--training-points',
type=int,
default=0,
help='The amount of data to train on')
parser.add_argument('-r',
'--learning-rate',
type=float,
default=1e-4,
help='The learning rate to use')
parser.add_argument('--decay',
type=float,
help='The learning rate decay rate per epoch')
parser.add_argument('--accuracy-epsilon',
type=float,
default=0.01,
help='The epsilon within we allow when '
'counting a true positive for a prediction')
parser.add_argument('-c',
'--checkpoint',
type=int,
default=100,
help='How often to generate samples. If not '
'supplied, generates only at the end.')
parser.add_argument('--workspace',
default=None,
help='The name of the workspace for the run.'
'If not supplied, an appropriate one will '
'be generated')
parser.add_argument('-d',
'--data-points',
type=int,
help='How much data to generate as input')
parser.add_argument('--prediction-start',
type=int,
default=0,
help='At what point in the data to start predicting')
parser.add_argument('-p',
'--predict',
type=int,
default=10000,
help='How many data points to predict '
'on every checkpoint')
parser.add_argument('--dropout',
type=float,
default=0.0,
help='The dropout probability')
parser.add_argument('--layer-normalize',
action='store_true',
help='Whether to use layer normalization')
parser.add_argument('--initializer',
choices=INITIALIZERS.keys(),
default='orthgonal',
help='The initializer to use for weights')
parser.add_argument('--regularizer',
choices=REGULARIZERS.keys(),
help='The regularizer to use for weights')
parser.add_argument('--use-peepholes',
action='store_true',
help='Whether to use peepholes')
parser.add_argument('--convolution-filters',
type=int,
help='If set, will apply 1-D convolutions to the '
'signal with the given number of filters.')
parser.add_argument('--convolution-size',
type=int,
default=3,
help='If --convolution-filters is set, uses this '
'filter size for the convolution')
parser.add_argument('--convolution-stride',
type=int,
default=1,
help='If --convolution-filters is set, uses this '
'stride for the convolution')
parser.add_argument('--convolution-padding',
type=str,
choices=('valid', 'same'),
default='same',
help='If --convolution-filters is set, uses this '
'padding technique for the convolution')
parser.add_argument('--predict-ahead',
type=int,
default=0,
help='The offset into the future for prediction')
parser.add_argument('--train-online',
action='store_true',
help='Whether to do online training during prediction')
parser.add_argument('--online-batch-size',
type=int,
help='How many samples to collect online before '
'performing a single training step')
parser.add_argument('--z-shift',
action='store_true',
help='Whether to z-shift the data')
parser.add_argument('--help-prediction',
action='store_true',
help='Whether to feed in ground truth '
'instead of predictions during prediction')
parser.add_argument('--plot',
action='store_true',
help='Wether to plot the predictions')
parser.add_argument('--plot-all',
action='store_true',
help='Wether to plot everything')
parser.add_argument('--tensorboard',
action='store_true',
help='Wether to enable TensorBoard')
parser.add_argument('--dry',
action='store_true',
help='Dry run (does not store anything to disk)')
parser.add_argument('--from-files',
action='store_true',
help='Assume the streams are files to load, not '
'names of streams to generate internally')
parser.add_argument('model',
choices=['lstm', 'gru', 'rnn', 'scrappy'],
help='The kind of model to run')
parser.add_argument('streams',
nargs='+',
help='The kind of streams to generate')
arguments = parser.parse_args(command_line_arguments)
if not arguments.from_files:
if not all(s in DATA_STREAMS.keys() for s in arguments.streams):
streams = ', '.join(DATA_STREAMS.keys())
raise KeyError('Streams must be one of {{{}}}'.format(streams))
if arguments.workspace is None:
parts = [arguments.model,
'-'.join(arguments.streams),
arguments.model_layers,
arguments.state_size,
arguments.hidden_layers,
arguments.hidden_units,
arguments.final_layers,
arguments.final_units,
datetime.datetime.now().isoformat()]
arguments.workspace = '-'.join(map(str, parts))
arguments.dropout = 1.0 - arguments.dropout
if arguments.initializer:
print('Using {0} initialization'.format(arguments.initializer))
arguments.initializer = INITIALIZERS.get(arguments.initializer)
if arguments.regularizer:
print('Using {0} regularization'.format(arguments.regularizer))
arguments.regularizer = REGULARIZERS[arguments.regularizer]
if arguments.train_online and not arguments.online_batch_size:
message = 'Setting online batch size to sequence length ({0})'
print(message.format(arguments.sequence_length))
arguments.online_batch_size = arguments.sequence_length
return arguments
def generate_data(streams, data_points):
assert data_points is not None
message = "Generating {0} data points each with streams: {1}"
print(message.format(data_points, ', '.join(streams)))
data = []
for t in range(data_points):
data.append([DATA_STREAMS[stream](t) for stream in streams])
return data
def read_streams_from_disk(streams, data_points):
data = []
for stream in streams:
with open(stream, 'r') as source:
stream_data = []
for count, sample in enumerate(source):
if count == data_points:
break
if sample and not sample.isspace():
stream_data.append(float(sample))
message = "Loaded {0} data points from file '{1}'"
print(message.format(len(stream_data), stream))
data.append(stream_data)
return list(zip(*data))
def z_shift(data):
data = np.array(data)
mean = np.mean(data, axis=0)
print('Data has mean {0}'.format(', '.join(map(str, mean))))
variance = np.mean(np.square(data), axis=0) - np.square(mean)
stddev = np.sqrt(variance)
print('Data has stddev {0}'.format(', '.join(map(str, stddev))))
z = (data - mean) / stddev
assert np.all(np.mean(z, axis=0) < 1e-9)
return z.tolist()
def load_data(arguments):
streams, data_points = arguments.streams, arguments.data_points
if arguments.from_files:
data = read_streams_from_disk(streams, data_points)
else:
data = generate_data(streams, data_points)
if arguments.z_shift:
print('Z-shifting data')
data = z_shift(data)
if data_points is None:
arguments.data_points = len(data)
return Data(data, number_of_streams=len(streams), names=streams)
def setup_workspace(arguments):
if arguments.dry:
return
inner_most = os.path.join(arguments.workspace, 'checkpoints')
os.makedirs(inner_most, exist_ok=True)
inner_most = os.path.join(arguments.workspace, 'predictions')
os.makedirs(inner_most, exist_ok=True)
print('Created directory {0}'.format(arguments.workspace))
def write_metrics(arguments, loss, accuracy):
if arguments.dry:
return
path = os.path.join(arguments.workspace, 'metrics.csv')
print('Writing metrics to {0} ...'.format(path))
with open(path, 'w') as output:
for l, a in zip(loss, accuracy):
output.write('{0} {1}\n'.format(l, a))
def save_results(arguments, metrics):
loss, accuracy, error = metrics
if not arguments.dry:
write_metrics(arguments, loss, accuracy)
if not arguments.plot_all:
return
plot.figure(1)
plot.subplot(111)
loss_plot, = plot.plot(loss, 'r-', label='loss')
plot.subplot(111)
accuracy_plot, = plot.plot(accuracy, 'b-', label='accuracy')
plot.title(arguments.workspace + ' (loss/accuracy)')
plot.legend(loc='upper center',
handles=[loss_plot, accuracy_plot],
bbox_to_anchor=(0.95, 1),
fontsize='x-large')
plot.figure(2)
plot.title('Mean error over time')
plot.plot(error, 'r-')
plot.show()
def main():
'''
python rnn_tester.py --learning-rate=0.01 --decay=0.99 --epochs=1000 --data-points=5000 --predict=10000 --batch-size=8 --model-layers=3 --state-size=32 --final-layers=2 --final-units=128 --plot --dry --from-files lstm sine.csv
'''
arguments = parse_arguments(sys.argv[1:])
print(arguments)
setup_workspace(arguments)
data = load_data(arguments)
graph = build_graph(arguments, data.number_of_streams)
metrics = train_graph(graph, data, arguments)
save_results(arguments, metrics)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment