Last active
September 2, 2020 14:10
-
-
Save stefanthaler/017370e1cb8c21a22b36db3fa625c99f to your computer and use it in GitHub Desktop.
Simple example for a stateful keras LSTM with embedding.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Learning Task: | |
Given a sequence, predict a label based on the first value of the sequence | |
Explanation of stateful LSTM and setup: | |
http://philipperemy.github.io/keras-stateful-lstm/ | |
Exmple: | |
given a sequence [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], predict 1 | |
given a sequence [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], predict 0 | |
""" | |
import numpy as np | |
from numpy.random import choice # for generating trainings data | |
from keras.models import Sequential # model used | |
from keras.layers import Dense, Embedding, LSTM # layers used | |
from keras.utils import generic_utils # show progress | |
""" | |
Hyper parameters | |
""" | |
number_examples = 1000 # number of trainings examples | |
sequence_length = 12 # length of the time series sequence of the prediction task | |
batch_size = 2 # how many sequence to process in parallel | |
time_steps = 3 # lstm length, number of cells, etc. | |
input_dim = 1 # number of different values | |
embedding_size = 5 # size of embedding vector | |
num_epochs = 3 # number to loop over trainings data | |
"""""" | |
Learning Task: | |
Given a sequence, predict a label based on the first value of the sequence | |
Explanation of stateful LSTM and setup: | |
http://philipperemy.github.io/keras-stateful-lstm/ | |
Exmple: | |
given a sequence [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], predict 1 | |
given a sequence [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], predict 0 | |
""" | |
Generate trainings data | |
""" | |
X = np.zeros(shape=(number_examples,sequence_length)) | |
one_indexes = choice(a=number_examples, size=number_examples / 2, replace=False) | |
X[one_indexes, 0] = 1 | |
train_x = X # trainingsdata | |
train_y = X[:,0] # predict data (y) | |
""" | |
Define keras model | |
""" | |
model = Sequential() | |
model.add(Embedding( | |
input_dim=input_dim, | |
output_dim=embedding_size, | |
input_length=time_steps, | |
batch_input_shape=(batch_size,time_steps) | |
)) # => [input_dim, time_steps, embedding_size] | |
model.add(LSTM(10, batch_input_shape=(batch_size,time_steps,embedding_size), return_sequences=False, stateful=True)) | |
model.add(Dense(1, activation='sigmoid')) | |
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) | |
""" | |
Trainingsloop | |
""" | |
for epoch in xrange(num_epochs): | |
training_accuracies = [] | |
training_losses = [] | |
progbar = generic_utils.Progbar(number_examples/batch_size) | |
for row_id in xrange(0, number_examples, batch_size) : # [0,2,4,6,8] | |
for col_id in xrange(batch_size): # [0,1] | |
# get batch. need to be reordered because of stateful LSTM | |
# https://keras.io/layers/recurrent/ "If True, the last state for each sample at index i in a batch will be used as initial state for the sample of index i in the following batch." | |
batch_x = train_x[ | |
row_id:row_id+batch_size, | |
col_id*time_steps:(col_id+1)*time_steps | |
] | |
batch_y = train_y[row_id:row_id+batch_size] | |
# gradient updates | |
batch_loss, batch_accuracy = model.train_on_batch(batch_x, batch_y) | |
# save accuracies | |
training_accuracies.append(batch_accuracy) | |
training_losses.append(batch_loss) | |
model.reset_states() # reset states after each sequence have been processed | |
progbar.add(1, values=[("train loss", np.mean(training_losses)), ("acc", np.mean(training_accuracies))]) | |
print("Epoch %.2d: loss: %0.3f accuracy: %0.3f"%(epoch, np.mean(training_losses),np.mean(training_accuracies))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment