Created
February 21, 2018 19:28
-
-
Save goldsborough/9bfdb236f1b085931f05c4fa55ee6a8f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import collections | |
import datetime | |
import math | |
import numpy as np | |
import os | |
import sys | |
import tensorflow as tf | |
import time | |
import warnings | |
import matplotlib.pyplot as plot | |
import seaborn | |
tf.set_random_seed(sum(map(ord, 'chicken'))) | |
warnings.filterwarnings('ignore') | |
seaborn.set_style('dark') | |
Data = collections.namedtuple('Data', 'samples, number_of_streams, names') | |
Graph = collections.namedtuple('Graph', ['x', | |
'y', | |
'initial_state', | |
'final_state', | |
'loss', | |
'predictions', | |
'accuracy', | |
'summaries', | |
'learning_rate', | |
'dropout', | |
'train']) | |
DATA_STREAMS = dict(sine=lambda t: np.sin(1/20 * t), | |
sine1=lambda t: np.sin(1/20 * t - 1), | |
sine2=lambda t: np.sin(1/20 * t - 2), | |
sine3=lambda t: np.sin(1/20 * t - 3), | |
sine4=lambda t: np.sin(1/20 * t - 4), | |
sine5=lambda t: np.sin(1/20 * t - 5), | |
cosine=lambda t: np.cos(1/20 * t), | |
linear=lambda t: 1/100 * t, | |
sine_up=lambda t: 2 * np.sin(1/20 * t) + t/10000, | |
sawtooth=lambda t: (t/100 - np.floor(t/100)) - 0.5, | |
mod=lambda t: (t/100 % 1), | |
sinsq=lambda t: np.sin((t/10)**2), | |
delta10=lambda t: 1 if t % 10 == 0 else 0, | |
delta10_1=lambda t: 1 if t % 10 == 1 else 0, | |
delta10_2=lambda t: 1 if t % 10 == 2 else 0, | |
delta10_3=lambda t: 1 if t % 10 == 3 else 0, | |
delta10_4=lambda t: 1 if t % 10 == 4 else 0, | |
delta100=lambda t: 1 if t % 100 == 0 else 0) | |
INITIALIZERS = dict(orthogonal=tf.orthogonal_initializer(), | |
random_uniform=tf.random_uniform_initializer(), | |
random_normal=tf.random_normal_initializer(), | |
truncated_normal=tf.truncated_normal_initializer(), | |
glorot=None) | |
REGULARIZERS = dict(l1=tf.contrib.layers.l1_regularizer(0.1), | |
l2=tf.contrib.layers.l2_regularizer(0.1)) | |
def dense(inputs, | |
output_size, | |
activation=None, | |
initializer=None, | |
regularizer=None): | |
weights = tf.get_variable('W', | |
[inputs.shape[1], output_size], | |
initializer=initializer, | |
regularizer=regularizer) | |
bias = tf.get_variable('b', | |
[output_size], | |
initializer=tf.constant_initializer(0.1)) | |
output = tf.matmul(inputs, weights) + bias | |
if activation is not None: | |
output = activation(output) | |
return output | |
def feedforward(inputs, | |
output_size, | |
number_of_layers, | |
units_per_layer, | |
hidden_activation, | |
final_activation=None, | |
initializer=None, | |
regularizer=None): | |
if number_of_layers == 0: | |
return inputs | |
tensors = inputs | |
for layer in range(1, number_of_layers): | |
with tf.variable_scope('hidden-{0}'.format(layer)): | |
tensors = dense(tensors, | |
units_per_layer, | |
hidden_activation, | |
initializer, | |
regularizer) | |
with tf.variable_scope('final'): | |
output = dense(tensors, | |
output_size, | |
final_activation, | |
initializer, | |
regularizer) | |
return output | |
def leaky_relu(x, alpha=0.05): | |
return tf.maximum(alpha * x, x) | |
class ScrappyCell(tf.contrib.rnn.RNNCell): | |
def __init__(self, | |
state_size, | |
hidden_units, | |
hidden_layers, | |
hidden_activation=leaky_relu, | |
final_activation=tf.tanh, | |
reuse=None): | |
self._state_size = state_size | |
self._reuse = reuse | |
self._hidden_units = hidden_units | |
self._hidden_layers = hidden_layers | |
self._hidden_activation = hidden_activation | |
self._final_activation = final_activation | |
status = ('Initializing Scrappy cell with {0} units in {1} layers ' | |
'with {2} as hidden activation and {3} as final activation') | |
print(status.format(hidden_units, | |
hidden_layers, | |
hidden_activation.__name__, | |
final_activation.__name__)) | |
@property | |
def state_size(self): | |
return self._state_size | |
@property | |
def output_size(self): | |
return self._state_size | |
def __call__(self, inputs, state, scope=None): | |
with tf.variable_scope(scope or type(self).__name__, reuse=self._reuse): | |
concatenated = tf.concat([inputs, state], axis=1) | |
state = dense(concatenated, self.output_size, self._final_activation) | |
output = feedforward(inputs=state, | |
output_size=self.output_size, | |
number_of_layers=self._hidden_layers, | |
units_per_layer=self._hidden_units, | |
hidden_activation=self._hidden_activation, | |
final_activation=self._final_activation) | |
return output, output | |
class RNNCell(tf.contrib.rnn.RNNCell): | |
def __init__(self, state_size, reuse=None): | |
self._state_size = state_size | |
self._reuse = reuse | |
@property | |
def state_size(self): | |
return self._state_size | |
@property | |
def output_size(self): | |
return self._state_size | |
def __call__(self, inputs, state, scope=None): | |
with tf.variable_scope(scope or type(self).__name__, reuse=self._reuse): | |
input_size = inputs.shape[1] + self._state_size | |
weights = tf.get_variable('W', [input_size, self._state_size]) | |
bias = tf.get_variable('b', [self._state_size]) | |
concatenated = tf.concat([inputs, state], axis=1) | |
output = tf.matmul(concatenated, weights) + bias | |
return output, output | |
def layer_normalize(tensor, | |
scope=None, | |
epsilon=1e-5, | |
initial_gain=1.0, | |
initial_bias=0.0): | |
assert len(tensor.shape) == 2 | |
input_size = tensor.shape[1] | |
with tf.variable_scope('{0}/layer_norm'.format(scope)): | |
gain_initializer = tf.constant_initializer(initial_gain) | |
gain = tf.get_variable('gain', | |
shape=[input_size], | |
initializer=gain_initializer) | |
bias_initializer = tf.constant_initializer(initial_bias) | |
bias = tf.get_variable('bias', | |
shape=[input_size], | |
initializer=bias_initializer) | |
mean, variance = tf.nn.moments(tensor, axes=[1], keep_dims=True) | |
z_shift = (tensor - mean) / tf.sqrt(variance + epsilon) | |
return gain * z_shift + bias | |
class LSTMCell(tf.contrib.rnn.RNNCell): | |
def __init__(self, | |
state_size, | |
forget_bias=1.0, | |
activation=tf.tanh, | |
final_activation=tf.tanh, | |
layer_normalize=False, | |
reuse=None): | |
self._state_size = state_size | |
self._forget_bias = forget_bias | |
self._activation = activation or (lambda z: z) | |
self._final_activation = final_activation or (lambda z: z) | |
self._reuse = reuse | |
self._layer_normalize = layer_normalize | |
if layer_normalize: | |
print('Using layer normalization') | |
@property | |
def state_size(self): | |
return tf.contrib.rnn.LSTMStateTuple(self._state_size, self._state_size) | |
@property | |
def output_size(self): | |
return self._state_size | |
def perform_layer_normalization(self, values): | |
scopes = ['input_gate', 'output_gate', 'forget_gate', 'input'] | |
return [layer_normalize(i, scopes[n]) for n, i in enumerate(values)] | |
def __call__(self, inputs, state, scope=None): | |
with tf.variable_scope(scope or type(self).__name__, reuse=self._reuse): | |
cell_state, hidden_state = state | |
concatenated = tf.concat([inputs, hidden_state], axis=1) | |
values = dense(concatenated, 4 * self._state_size) | |
values = tf.split(values, 4, axis=1) | |
if self._layer_normalize: | |
values = self.perform_layer_normalization(values) | |
input_gate = tf.sigmoid(values[0], 'i') | |
output_gate = tf.sigmoid(values[1], 'o') | |
forget_gate = tf.sigmoid(values[2] + self._forget_bias, 'f') | |
intermediate_input = self._activation(values[3]) | |
cell_state *= forget_gate | |
cell_state += input_gate * intermediate_input | |
output = output_gate * self._activation(cell_state) | |
new_state = tf.contrib.rnn.LSTMStateTuple(cell_state, output) | |
return output, new_state | |
def generate_single_epoch(data, sequence_length, number_of_batches): | |
batch_length = len(data) // number_of_batches | |
batches = [] | |
for batch_index in range(number_of_batches): | |
start = batch_length * batch_index | |
end = batch_length * (batch_index + 1) | |
batches.append(data[start:end]) | |
epoch_size = (batch_length - 1) // sequence_length | |
assert epoch_size > 0 | |
batches = np.array(batches, dtype=np.float32) | |
for i in range(epoch_size): | |
x = batches[:, i * sequence_length: (i + 1) * sequence_length] | |
y = batches[:, i * sequence_length + 1: (i + 1) * sequence_length + 1] | |
yield (x, y) | |
def generate_epochs(data, arguments): | |
if arguments.training_points: | |
data = data[:arguments.training_points] | |
for _ in range(arguments.epochs): | |
yield generate_single_epoch(data, | |
arguments.sequence_length, | |
arguments.batch_size) | |
def final_feedforward(y, hyper, number_of_streams): | |
with tf.variable_scope('final_feedforward'): | |
y = tf.reshape(y, [-1, int(y.shape[-1])]) | |
predictions = feedforward(inputs=y, | |
output_size=number_of_streams, | |
number_of_layers=hyper.final_layers, | |
units_per_layer=hyper.final_units, | |
hidden_activation=leaky_relu, | |
initializer=hyper.initializer, | |
regularizer=hyper.regularizer) | |
return predictions | |
def make_rnn(hyper, dropout): | |
status = 'Using {0} cells with a state size of {1}' | |
print(status.format(hyper.model, hyper.state_size)) | |
assert hyper.model in ['lstm', 'rnn', 'gru', 'scrappy'] | |
if hyper.model == 'lstm': | |
# if hyper.layer_normalize: | |
cell = LSTMCell(hyper.state_size, | |
layer_normalize=hyper.layer_normalize) | |
# # else: | |
# if hyper.use_peepholes: | |
# print('Using peephole connections in LSTM cells') | |
# cell = tf.contrib.rnn.LSTMCell(hyper.state_size, | |
# use_peepholes=hyper.use_peepholes, | |
# state_is_tuple=True, | |
# activation=lambda x: x) | |
elif hyper.model == 'rnn': | |
cell = RNNCell(hyper.state_size) | |
# cell = tf.contrib.rnn.BasicRNNCell(hyper.state_size) | |
elif hyper.model == 'gru': | |
cell = tf.contrib.rnn.GRUCell(hyper.state_size) | |
elif hyper.model == 'scrappy': | |
cell = ScrappyCell(hyper.state_size, | |
hyper.hidden_units, | |
hyper.hidden_layers) | |
if hyper.dropout < 1: | |
print('Using dropout with a rate of {0:.3f}'.format(1 - hyper.dropout)) | |
cell = tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=dropout) | |
layers = [cell] * hyper.model_layers | |
rnn = tf.contrib.rnn.MultiRNNCell(layers) | |
rnn = tf.contrib.rnn.DropoutWrapper(rnn, output_keep_prob=dropout) | |
return rnn | |
def configure_learning_rate(hyper, global_step): | |
learning_rate = tf.constant(hyper.learning_rate) | |
if hyper.decay is not None: | |
input_size = hyper.batch_size * hyper.sequence_length | |
decay_steps = math.ceil(hyper.data_points / input_size) | |
status = 'Decaying learning rate from {0} by {1} every {2} steps' | |
print(status.format(hyper.learning_rate, hyper.decay, decay_steps)) | |
learning_rate = tf.train.exponential_decay(learning_rate, | |
decay_steps=decay_steps, | |
decay_rate=hyper.decay, | |
global_step=global_step, | |
staircase=True) | |
tf.summary.scalar('learning_rate', learning_rate) | |
return learning_rate | |
def convolve(x, hyper): | |
if not hyper.convolution_filters: | |
return x | |
message = ('Convolving streams with filter size = {0}, ' | |
'stride = {1}, filters = {2}, padding = {3}') | |
print(message.format(hyper.convolution_size, | |
hyper.convolution_stride, | |
hyper.convolution_filters, | |
hyper.convolution_padding)) | |
with tf.variable_scope('convolution'): | |
shape = (hyper.convolution_size, # The width of the filter | |
int(x.shape[-1]), # The number of input channels to convolve | |
hyper.convolution_filters) # The number of output channels | |
filters = tf.get_variable('filters', | |
initializer=hyper.initializer, | |
regularizer=hyper.regularizer, | |
shape=shape) | |
biases = tf.get_variable('biases', | |
shape=[hyper.convolution_filters], | |
initializer=tf.constant_initializer(0.1)) | |
tf.summary.histogram('filters', filters) | |
tf.summary.histogram('biases', biases) | |
convolved = tf.nn.conv1d(value=x, | |
filters=filters, | |
stride=hyper.convolution_stride, | |
padding=hyper.convolution_padding.upper()) | |
result = tf.nn.relu(convolved + biases) | |
assert int(result.shape[-1]) == hyper.convolution_filters | |
return result | |
def build_graph(hyper, number_of_streams): | |
start = time.time() | |
tf.reset_default_graph() | |
x_input = tf.placeholder(tf.float32, [None, None, number_of_streams]) | |
y_hat = tf.placeholder(tf.float32, [None, None, number_of_streams]) | |
x = convolve(x_input, hyper) | |
# offset = x[0, 0] | |
# x %= 1 | |
dropout = tf.Variable(hyper.dropout, trainable=False) | |
rnn = make_rnn(hyper, dropout) | |
batch_size = tf.shape(x_input)[0] | |
initial_state = rnn.zero_state(batch_size, dtype=tf.float32) | |
y, final_state = tf.nn.dynamic_rnn(rnn, x, initial_state=initial_state) | |
predictions = final_feedforward(y, hyper, number_of_streams) | |
# sequence_length = tf.shape(x_input)[1] | |
# unmod = offset + (tf.slice(np.arange(hyper.sequence_length, dtype=np.float32), [0], [sequence_length]) / 100) | |
# predictions += tf.reshape(unmod, tf.shape(predictions)) | |
labels = tf.reshape(y_hat, [-1, number_of_streams]) | |
positives = tf.abs(predictions - labels) < hyper.accuracy_epsilon | |
accuracy = tf.reduce_mean(tf.cast(positives, dtype=tf.float32)) | |
total_loss = tf.reduce_mean(tf.square(predictions - labels)) | |
tf.summary.scalar('accuracy', accuracy) | |
tf.summary.scalar('loss', total_loss) | |
global_step = tf.Variable(0, trainable=False) | |
learning_rate = configure_learning_rate(hyper, global_step) | |
optimizer = tf.train.AdamOptimizer(learning_rate) | |
train = optimizer.minimize(total_loss, global_step=global_step) | |
summaries = tf.summary.merge_all() | |
print('Compiled graph in: {0:.3f}s'.format(time.time() - start)) | |
return Graph(x_input, | |
y_hat, | |
initial_state, | |
final_state, | |
total_loss, | |
predictions, | |
accuracy, | |
summaries, | |
learning_rate, | |
dropout, | |
train) | |
def prediction_file(arguments, index): | |
if arguments.dry: | |
return open('/tmp/ignore', 'w') | |
file_name = 'epoch-{0}'.format(index) | |
path = os.path.join(arguments.workspace, 'predictions', file_name) | |
return open(path, 'w') | |
def plot_predictions(arguments, predictions, data): | |
if not arguments.plot and not arguments.plot_all: | |
return | |
real = list(zip(*data.samples)) | |
matrix = zip(list(zip(*predictions)), real, data.names) | |
predict_start = arguments.prediction_start + arguments.predict_ahead | |
predict_end = predict_start + len(predictions) | |
predict_range = list(range(predict_start, predict_end)) | |
for index, (prediction, real, name) in enumerate(matrix, start=1): | |
print("Plotting result for '{0}'...".format(name)) | |
handles = [] | |
plot.figure(index) | |
plot.title(arguments.workspace) | |
plot.subplot(111) | |
label = '{0}-predicted'.format(name) | |
handles += plot.plot(predict_range, prediction, 'r--', label=label) | |
plot.subplot(111) | |
handles += plot.plot(real, 'g-', label='{0}-real'.format(name)) | |
if arguments.training_points: | |
plot.axvline(arguments.training_points, linestyle='-.') | |
if arguments.prediction_start: | |
plot.axvline(arguments.prediction_start, linestyle=':') | |
plot.legend(loc='upper center', | |
handles=handles, | |
bbox_to_anchor=(0.95, 1), | |
fontsize='large') | |
plot.show() | |
def process_prediction(prediction, data, index, mean_error, output): | |
data = data.samples | |
zero = np.zeros_like(data[0]) | |
error = np.abs(prediction - data[index]) if index < len(data) else zero | |
mean_error += (error - mean_error) / index | |
status = '{0} {1} {2} {3}\n' | |
label = data[index] if index < len(data) else zero | |
error_string = ', '.join(map(str, mean_error)) | |
prediction = ', '.join(map(str, prediction)) | |
label = ', '.join(map(str, label)) | |
status = status.format(index, prediction, label, error_string) | |
if output.name != '/tmp/ignore': | |
output.write(status) | |
return mean_error | |
def train_online(graph, experience): | |
print('Training online (batch of {0}) ...'.format(len(experience))) | |
session = tf.get_default_session() | |
x, y = experience[:-1], experience[1:] | |
feed_dict = {graph.x: [x], graph.y: [y]} | |
session.run(graph.train, feed_dict) | |
experience.clear() | |
def make_example(arguments, data, sequence_length): | |
shape = (arguments.batch_size, sequence_length, data.number_of_streams) | |
return np.zeros(shape=shape, dtype=np.float32) | |
def roll_forward(graph, arguments, data, offset): | |
# print('\nRolling {0} steps forward'.format(arguments.predict_ahead - 1)) | |
example = make_example(arguments, data, arguments.predict_ahead - 1) | |
example[0] = data.samples[offset:offset + arguments.predict_ahead - 1] | |
session = tf.get_default_session() | |
feed_dict = {graph.x: example, graph.dropout: 1.0} | |
fetches = [graph.predictions, graph.final_state] | |
prediction, state = session.run(fetches, feed_dict) | |
return state, [prediction[-1:]] | |
def generate_samples(graph, arguments, data, epoch_index): | |
offset = arguments.prediction_start | |
session = tf.get_default_session() | |
state = None | |
if arguments.predict_ahead > 0: | |
state, generated = roll_forward(graph, arguments, data, offset) | |
else: | |
generated = [data.samples[offset]] | |
example = make_example(arguments, data, 1) | |
error = 0 | |
experience = [] | |
with prediction_file(arguments, epoch_index) as output: | |
for index in range(1 + offset, 1 + offset + arguments.predict): | |
if arguments.train_online and index < len(data.samples): | |
experience.append(data.samples[index]) | |
if len(experience) == arguments.online_batch_size: | |
train_online(graph, experience) | |
index += arguments.predict_ahead | |
if arguments.help_prediction and index < len(data.samples): | |
example[0, 0] = data.samples[index] | |
else: | |
example[0, 0] = generated[-1] | |
feed_dict = {graph.x: example, graph.dropout: 1.0} | |
if state is not None: | |
feed_dict[graph.initial_state] = state | |
fetches = [graph.predictions, graph.final_state] | |
prediction, state = session.run(fetches, feed_dict) | |
prediction = prediction[0] | |
assert len(prediction) == data.number_of_streams | |
generated.append(prediction) | |
error = process_prediction(prediction, data, index, error, output) | |
error_string = ', '.join( | |
['{0} = {1:.5f}'.format(k, v) for k, v in zip(data.names, error)]) | |
print(' | Prediction Error: {0}'.format(error_string)) | |
if epoch_index == 'final': | |
plot_predictions(arguments, generated, data) | |
return error | |
def training_step(graph, x, y, state, write_summary): | |
session = tf.get_default_session() | |
feed_dict = {graph.x: x, graph.y: y} | |
if state is not None: | |
feed_dict[graph.initial_state] = state | |
fetches = [graph.loss, | |
graph.accuracy, | |
graph.final_state, | |
graph.summaries, | |
graph.learning_rate, | |
graph.train] | |
values = session.run(fetches, feed_dict) | |
loss, accuracy, state, summaries, learning_rate = values[:-1] | |
write_summary(summaries) | |
return loss, accuracy, state, learning_rate | |
def make_summary_writer(arguments, session): | |
if not arguments.tensorboard or arguments.dry: | |
return lambda _: _ | |
path = os.path.join(arguments.workspace, 'tensorboard') | |
summary_writer = tf.summary.FileWriter(path, graph=session.graph) | |
global_step = 0 | |
print('Writing TensorBoard logs to {0}'.format(path)) | |
def write_summary(summary): | |
nonlocal global_step | |
summary_writer.add_summary(summary, global_step=global_step) | |
global_step += 1 | |
return write_summary | |
def print_status(variables): | |
status = ('\r Epoch: {:<4} | Iteration: {:<4} | ' | |
'Loss: {:.10f} | Accuracy: {:.8f} | ' | |
'Learning Rate: {:.8f}') | |
status = status.format(variables['epoch_index'], | |
variables['iteration'], | |
variables['loss'], | |
variables['accuracy'], | |
variables['learning_rate']) | |
print(status, end='') | |
def _train_graph(data, graph, arguments, save): | |
losses, accuracies, errors = [], [], [] | |
with tf.Session() as session: | |
tf.global_variables_initializer().run() | |
write_summary = make_summary_writer(arguments, session) | |
epoch_generator = generate_epochs(data.samples, arguments) | |
for epoch_index, epoch in enumerate(epoch_generator, start=1): | |
do_checkpoint = arguments.checkpoint and epoch_index > 1 | |
if do_checkpoint and epoch_index % arguments.checkpoint == 0: | |
error = generate_samples(graph, arguments, data, epoch_index) | |
errors.append(error) | |
stop = False | |
state = None | |
for iteration, (x, y) in enumerate(epoch, start=1): | |
try: | |
values = training_step(graph, x, y, state, write_summary) | |
loss, accuracy, state, learning_rate = values | |
print_status(locals()) | |
losses.append(loss) | |
accuracies.append(accuracy) | |
except KeyboardInterrupt: | |
if stop: break | |
stop = True | |
save(session) | |
if stop: break | |
error = generate_samples(graph, arguments, data, 'final') | |
errors.append(error) | |
return losses, accuracies, errors | |
def make_checkpoint(arguments): | |
file_name = 'checkpoint-{0}'.format(time.strftime('%H-%M-%S')) | |
checkpoint = os.path.join(arguments.workspace, 'checkpoints', file_name) | |
print("Checkpoint is: '{0}'".format(checkpoint)) | |
return checkpoint | |
def train_graph(graph, data, arguments): | |
start = time.time() | |
saver = tf.train.Saver() | |
checkpoint = make_checkpoint(arguments) | |
if arguments.dry: | |
save = lambda _: _ | |
else: | |
save = lambda session: saver.save(session, checkpoint) | |
metrics = _train_graph(data, graph, arguments, save) | |
print('\nTraining time: {0:.3f} s'.format(time.time() - start)) | |
return metrics | |
def parse_arguments(command_line_arguments): | |
parser = argparse.ArgumentParser(description='RNN benchmark tool') | |
parser.add_argument('-e', | |
'--epochs', | |
type=int, | |
default=1000, | |
help='Number of epochs to run') | |
parser.add_argument('-l', | |
'--sequence-length', | |
type=int, | |
default=100, | |
help='Length of the sequence in each batch') | |
parser.add_argument('-b', | |
'--batch-size', | |
type=int, | |
default=1, | |
help='The batch size to vectorize training') | |
parser.add_argument('-s', | |
'--state-size', | |
type=int, | |
default=128, | |
help='The dimension of the state') | |
parser.add_argument('-o', | |
'--model-layers', | |
type=int, | |
default=1, | |
help='The number of (overall) layers of the model') | |
parser.add_argument('--hidden-layers', | |
type=int, | |
default=1, | |
help='The number of hidden layers inside a ' | |
'(deep) RNN or scrappy cell') | |
parser.add_argument('--hidden-units', | |
type=int, | |
default=128, | |
help='The number of hidden units inside a ' | |
'(deep) RNN or scrappy cell') | |
parser.add_argument('--final-layers', | |
type=int, | |
default=1, | |
help='The number of hidden layers used to process the ' | |
'output of the model, in each layer') | |
parser.add_argument('--final-units', | |
type=int, | |
default=128, | |
help='The number of hidden units used to process the ' | |
'output of the model, in each layer') | |
parser.add_argument('--training-points', | |
type=int, | |
default=0, | |
help='The amount of data to train on') | |
parser.add_argument('-r', | |
'--learning-rate', | |
type=float, | |
default=1e-4, | |
help='The learning rate to use') | |
parser.add_argument('--decay', | |
type=float, | |
help='The learning rate decay rate per epoch') | |
parser.add_argument('--accuracy-epsilon', | |
type=float, | |
default=0.01, | |
help='The epsilon within we allow when ' | |
'counting a true positive for a prediction') | |
parser.add_argument('-c', | |
'--checkpoint', | |
type=int, | |
default=100, | |
help='How often to generate samples. If not ' | |
'supplied, generates only at the end.') | |
parser.add_argument('--workspace', | |
default=None, | |
help='The name of the workspace for the run.' | |
'If not supplied, an appropriate one will ' | |
'be generated') | |
parser.add_argument('-d', | |
'--data-points', | |
type=int, | |
help='How much data to generate as input') | |
parser.add_argument('--prediction-start', | |
type=int, | |
default=0, | |
help='At what point in the data to start predicting') | |
parser.add_argument('-p', | |
'--predict', | |
type=int, | |
default=10000, | |
help='How many data points to predict ' | |
'on every checkpoint') | |
parser.add_argument('--dropout', | |
type=float, | |
default=0.0, | |
help='The dropout probability') | |
parser.add_argument('--layer-normalize', | |
action='store_true', | |
help='Whether to use layer normalization') | |
parser.add_argument('--initializer', | |
choices=INITIALIZERS.keys(), | |
default='orthgonal', | |
help='The initializer to use for weights') | |
parser.add_argument('--regularizer', | |
choices=REGULARIZERS.keys(), | |
help='The regularizer to use for weights') | |
parser.add_argument('--use-peepholes', | |
action='store_true', | |
help='Whether to use peepholes') | |
parser.add_argument('--convolution-filters', | |
type=int, | |
help='If set, will apply 1-D convolutions to the ' | |
'signal with the given number of filters.') | |
parser.add_argument('--convolution-size', | |
type=int, | |
default=3, | |
help='If --convolution-filters is set, uses this ' | |
'filter size for the convolution') | |
parser.add_argument('--convolution-stride', | |
type=int, | |
default=1, | |
help='If --convolution-filters is set, uses this ' | |
'stride for the convolution') | |
parser.add_argument('--convolution-padding', | |
type=str, | |
choices=('valid', 'same'), | |
default='same', | |
help='If --convolution-filters is set, uses this ' | |
'padding technique for the convolution') | |
parser.add_argument('--predict-ahead', | |
type=int, | |
default=0, | |
help='The offset into the future for prediction') | |
parser.add_argument('--train-online', | |
action='store_true', | |
help='Whether to do online training during prediction') | |
parser.add_argument('--online-batch-size', | |
type=int, | |
help='How many samples to collect online before ' | |
'performing a single training step') | |
parser.add_argument('--z-shift', | |
action='store_true', | |
help='Whether to z-shift the data') | |
parser.add_argument('--help-prediction', | |
action='store_true', | |
help='Whether to feed in ground truth ' | |
'instead of predictions during prediction') | |
parser.add_argument('--plot', | |
action='store_true', | |
help='Wether to plot the predictions') | |
parser.add_argument('--plot-all', | |
action='store_true', | |
help='Wether to plot everything') | |
parser.add_argument('--tensorboard', | |
action='store_true', | |
help='Wether to enable TensorBoard') | |
parser.add_argument('--dry', | |
action='store_true', | |
help='Dry run (does not store anything to disk)') | |
parser.add_argument('--from-files', | |
action='store_true', | |
help='Assume the streams are files to load, not ' | |
'names of streams to generate internally') | |
parser.add_argument('model', | |
choices=['lstm', 'gru', 'rnn', 'scrappy'], | |
help='The kind of model to run') | |
parser.add_argument('streams', | |
nargs='+', | |
help='The kind of streams to generate') | |
arguments = parser.parse_args(command_line_arguments) | |
if not arguments.from_files: | |
if not all(s in DATA_STREAMS.keys() for s in arguments.streams): | |
streams = ', '.join(DATA_STREAMS.keys()) | |
raise KeyError('Streams must be one of {{{}}}'.format(streams)) | |
if arguments.workspace is None: | |
parts = [arguments.model, | |
'-'.join(arguments.streams), | |
arguments.model_layers, | |
arguments.state_size, | |
arguments.hidden_layers, | |
arguments.hidden_units, | |
arguments.final_layers, | |
arguments.final_units, | |
datetime.datetime.now().isoformat()] | |
arguments.workspace = '-'.join(map(str, parts)) | |
arguments.dropout = 1.0 - arguments.dropout | |
if arguments.initializer: | |
print('Using {0} initialization'.format(arguments.initializer)) | |
arguments.initializer = INITIALIZERS.get(arguments.initializer) | |
if arguments.regularizer: | |
print('Using {0} regularization'.format(arguments.regularizer)) | |
arguments.regularizer = REGULARIZERS[arguments.regularizer] | |
if arguments.train_online and not arguments.online_batch_size: | |
message = 'Setting online batch size to sequence length ({0})' | |
print(message.format(arguments.sequence_length)) | |
arguments.online_batch_size = arguments.sequence_length | |
return arguments | |
def generate_data(streams, data_points): | |
assert data_points is not None | |
message = "Generating {0} data points each with streams: {1}" | |
print(message.format(data_points, ', '.join(streams))) | |
data = [] | |
for t in range(data_points): | |
data.append([DATA_STREAMS[stream](t) for stream in streams]) | |
return data | |
def read_streams_from_disk(streams, data_points): | |
data = [] | |
for stream in streams: | |
with open(stream, 'r') as source: | |
stream_data = [] | |
for count, sample in enumerate(source): | |
if count == data_points: | |
break | |
if sample and not sample.isspace(): | |
stream_data.append(float(sample)) | |
message = "Loaded {0} data points from file '{1}'" | |
print(message.format(len(stream_data), stream)) | |
data.append(stream_data) | |
return list(zip(*data)) | |
def z_shift(data): | |
data = np.array(data) | |
mean = np.mean(data, axis=0) | |
print('Data has mean {0}'.format(', '.join(map(str, mean)))) | |
variance = np.mean(np.square(data), axis=0) - np.square(mean) | |
stddev = np.sqrt(variance) | |
print('Data has stddev {0}'.format(', '.join(map(str, stddev)))) | |
z = (data - mean) / stddev | |
assert np.all(np.mean(z, axis=0) < 1e-9) | |
return z.tolist() | |
def load_data(arguments): | |
streams, data_points = arguments.streams, arguments.data_points | |
if arguments.from_files: | |
data = read_streams_from_disk(streams, data_points) | |
else: | |
data = generate_data(streams, data_points) | |
if arguments.z_shift: | |
print('Z-shifting data') | |
data = z_shift(data) | |
if data_points is None: | |
arguments.data_points = len(data) | |
return Data(data, number_of_streams=len(streams), names=streams) | |
def setup_workspace(arguments): | |
if arguments.dry: | |
return | |
inner_most = os.path.join(arguments.workspace, 'checkpoints') | |
os.makedirs(inner_most, exist_ok=True) | |
inner_most = os.path.join(arguments.workspace, 'predictions') | |
os.makedirs(inner_most, exist_ok=True) | |
print('Created directory {0}'.format(arguments.workspace)) | |
def write_metrics(arguments, loss, accuracy): | |
if arguments.dry: | |
return | |
path = os.path.join(arguments.workspace, 'metrics.csv') | |
print('Writing metrics to {0} ...'.format(path)) | |
with open(path, 'w') as output: | |
for l, a in zip(loss, accuracy): | |
output.write('{0} {1}\n'.format(l, a)) | |
def save_results(arguments, metrics): | |
loss, accuracy, error = metrics | |
if not arguments.dry: | |
write_metrics(arguments, loss, accuracy) | |
if not arguments.plot_all: | |
return | |
plot.figure(1) | |
plot.subplot(111) | |
loss_plot, = plot.plot(loss, 'r-', label='loss') | |
plot.subplot(111) | |
accuracy_plot, = plot.plot(accuracy, 'b-', label='accuracy') | |
plot.title(arguments.workspace + ' (loss/accuracy)') | |
plot.legend(loc='upper center', | |
handles=[loss_plot, accuracy_plot], | |
bbox_to_anchor=(0.95, 1), | |
fontsize='x-large') | |
plot.figure(2) | |
plot.title('Mean error over time') | |
plot.plot(error, 'r-') | |
plot.show() | |
def main(): | |
''' | |
python rnn_tester.py --learning-rate=0.01 --decay=0.99 --epochs=1000 --data-points=5000 --predict=10000 --batch-size=8 --model-layers=3 --state-size=32 --final-layers=2 --final-units=128 --plot --dry --from-files lstm sine.csv | |
''' | |
arguments = parse_arguments(sys.argv[1:]) | |
print(arguments) | |
setup_workspace(arguments) | |
data = load_data(arguments) | |
graph = build_graph(arguments, data.number_of_streams) | |
metrics = train_graph(graph, data, arguments) | |
save_results(arguments, metrics) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment