Last active
June 16, 2018 10:30
-
-
Save dlebech/7008b464bffebcce6c9064829fee7cf9 to your computer and use it in GitHub Desktop.
Minimal Keras examples for various purposes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Public Domain CC0 license. https://creativecommons.org/publicdomain/zero/1.0/ | |
# Create a Keras embedding layer with an initial one-hot encoding by using identity initializer | |
import tensorflow as tf | |
import numpy as np | |
# Input sequence consisting of four features (e.g. words) | |
# Let's pretend this is "hello world hello everyone else" | |
# Where hello is then mapped to 1, world = 0, everyone = 2, else = 3, | |
a = np.array([[1, 0, 1, 2, 3]]) | |
# Since the vector consists of five words, the input layer has a shape of 5 | |
inp = tf.keras.layers.Input(shape=(5,)) | |
# There are four unique words so the input dimension is 4 | |
# We want the embedding to one-hot so the output dimension is _also_ 4 | |
# The length of the input is 5, as explained above | |
# Using the "identity" initializer will put 1's where the word appears and 0's elsewhere | |
emb = tf.keras.layers.Embedding(input_dim=4, output_dim=4, input_length=5, embeddings_initializer='identity')(inp) | |
# The model just puts things together. | |
model = tf.keras.models.Model(inputs=[inp], outputs=[emb]) | |
# A model prediction here is just running the input through the embedding layer | |
# With the initial weights (which are identity) | |
print(model.predict(a)) | |
# Prints: | |
# [[[0. 1. 0. 0.] # hello | |
# [1. 0. 0. 0.] # world | |
# [0. 1. 0. 0.] # hello | |
# [0. 0. 1. 0.] # everyone | |
# [0. 0. 0. 1.]]] # else | |
# One-hot encoded using embedding layer! | |
# Note that this one-hot encoding does not stay intact during training, | |
# but in practice, I found that the number stay close to 0 and 1 respectively. | |
# Also, if you _need_ true one-hot encoding, then the embedding layer is probably not appropriate always. | |
# Good discussion: https://github.com/keras-team/keras/issues/4838#issuecomment-269138502 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Public Domain CC0 license. https://creativecommons.org/publicdomain/zero/1.0/ | |
# Sequence to sequence prediction using encoder-decoder network, I guess. | |
# Inspired by https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html | |
import tensorflow as tf | |
import numpy as np | |
# Overfit on the same sentence for illustration purposes. | |
# Interestingly, it seems to require more than one input for this to work. | |
data = [ | |
'All work and no play makes Jack a dull boy', | |
'All work and no play makes Jack a dull boy', | |
'All work and no play makes Jack a dull boy', | |
'All work and no play makes Jack a dull boy' | |
] | |
# Map texts to integer sequences. | |
tokenizer = tf.keras.preprocessing.text.Tokenizer() | |
tokenizer.fit_on_texts(data) | |
sequences = np.array(tokenizer.texts_to_sequences(data)) | |
# The output needs to be 3D for last Dense layer. This is still a bit weird to me. | |
sequence_out = sequences.reshape((sequences.shape[0], sequences.shape[1], 1)) | |
# The number of features needs to be set to the number of words + 1 since 0 is not used by the tokenizer. | |
num_tokens = len(tokenizer.word_index) + 1 | |
# Encoder with embedding layer. The output dimension of 8 is arbitrary | |
inp = tf.keras.layers.Input(shape=(None,)) | |
x = tf.keras.layers.Embedding(input_dim=num_tokens, output_dim=8)(inp) | |
_, state = tf.keras.layers.GRU(32, return_state=True)(x) | |
# Decoder | |
# I don't quite understand the initial_state and return_sequences yet. | |
inp2 = tf.keras.layers.Input(shape=(None,)) | |
x = tf.keras.layers.Embedding(input_dim=num_tokens, output_dim=8)(inp2) | |
x = tf.keras.layers.GRU(32, return_sequences=True)(x, initial_state=state) | |
outp = tf.keras.layers.Dense(num_tokens, activation='softmax')(x) | |
# Creating and compiling is straightforward | |
model = tf.keras.models.Model(inputs=[inp, inp2], outputs=[outp]) | |
model.compile( | |
loss='sparse_categorical_crossentropy', | |
optimizer='rmsprop', | |
metrics=['accuracy'] | |
) | |
model.summary() | |
# Usually achieves 100% accuracy after about 20 epochs. | |
model.fit([sequences, sequences], sequence_out, batch_size=1, epochs=20, callbacks=[ | |
tf.keras.callbacks.TerminateOnNaN(), | |
tf.keras.callbacks.EarlyStopping(monitor='loss', patience=2), | |
]) | |
rev = {v: k for k,v in tokenizer.word_index.items()} | |
print(rev) | |
def print_prediction(sequences): | |
p = model.predict([sequences, sequences]) | |
for i, sentence in enumerate(np.argmax(p, axis=1) + 1): | |
print(i, sentence) | |
print(' '.join(rev.get(int(word), 'N/A') for word in sentence)) | |
print() | |
# This correctly prints the original sentences | |
print_prediction(sequences) | |
# This usually prints the original sentence, even though we use the same word (8) for the entire sentences | |
print_prediction([[8, 8, 8, 8, 8, 8, 8, 8, 8, 8]]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Public Domain CC0 license. https://creativecommons.org/publicdomain/zero/1.0/ | |
# Word prediction using recurrent neural network | |
import numpy as np | |
import tensorflow as tf | |
# Overfit on the same sentence for illustration purposes. | |
data = [ | |
'All work and no play makes Jack a dull boy', | |
] | |
# Map texts to integer sequences. | |
tokenizer = tf.keras.preprocessing.text.Tokenizer() | |
tokenizer.fit_on_texts(data) | |
sequences = np.array(tokenizer.texts_to_sequences(data)) | |
# Create input-output pairs, two input word one output word | |
seq_length = 2 | |
# The number of features needs to be set to the number of words + 1 since 0 is not used by the tokenizer. | |
num_words = len(tokenizer.word_index) + 1 | |
X, y = [], [] | |
for sequence in sequences: | |
for i in range(seq_length, len(sequence)): | |
X.append(sequence[i-seq_length:i]) | |
y.append(sequence[i]) | |
# Make categorical outputs to please the network. | |
# It's also possible to _not_ do this and use sparse_categorical_crossentropy in the output. | |
# That could potentially use less memory, I guess... | |
y = tf.keras.utils.to_categorical(y, num_classes=num_words) | |
# Embedding -> gru -> dense -> dense | |
inp = tf.keras.layers.Input(shape=(seq_length,)) | |
x = tf.keras.layers.Embedding(input_dim=num_words, output_dim=5, input_length=seq_length)(inp) | |
x = tf.keras.layers.GRU(16)(x) | |
x = tf.keras.layers.Dense(16)(x) | |
outp = tf.keras.layers.Dense(num_words, activation='softmax')(x) | |
model = tf.keras.models.Model(inputs=[inp], outputs=[outp]) | |
model.compile( | |
loss='categorical_crossentropy', | |
optimizer='rmsprop', | |
metrics=['accuracy'] | |
) | |
model.summary() | |
# This fit has widely different results, sometimes reaching | |
# less than 50% accuracy after 50 epochs and other times reaching 100% accuracy. | |
model.fit(np.array(X), np.array(y), batch_size=1, epochs=50, callbacks=[ | |
tf.keras.callbacks.TerminateOnNaN(), | |
tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3), | |
]) | |
rev = {v: k for k,v in tokenizer.word_index.items()} | |
print(rev) | |
def print_prediction(sequences): | |
p = model.predict(sequences) | |
words = np.argmax(p, axis=1) | |
for i, word in enumerate(words): | |
seq = ' '.join(rev.get(word) for word in sequences[i]) | |
print(i, 'Input:', '"{}"'.format(seq), 'Next word:', '"{}"'.format(rev.get(word, 'N/A'))) | |
print() | |
# Should hopefully print the correct next word for each of these sentences | |
# I.e. "and", "makes", "dull", "boy" | |
print_prediction(np.array(tokenizer.texts_to_sequences(['All work', 'no play', 'jack a', 'a dull']))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Public Domain CC0 license. https://creativecommons.org/publicdomain/zero/1.0/ | |
# Use Keras functional model to train the XOR function | |
import keras.utils | |
from keras.models import Model | |
from keras.layers import Input, Dense | |
import numpy as np | |
# Define the functional layers | |
inputs = Input(shape=(2,)) | |
dense = Dense(32, activation='relu')(inputs) | |
predictions = Dense(2, activation='softmax')(dense) | |
# Compile the model | |
model = Model(inputs=inputs, outputs=predictions) | |
model.compile(optimizer='rmsprop', | |
loss='categorical_crossentropy', | |
metrics=['accuracy']) | |
# Generate XOR training data | |
x_train = np.random.randint(2, size=(1000, 2)) | |
y_train_raw = np.logical_xor(x_train[:,0], x_train[:,1]).reshape(1000, 1) | |
y_train = keras.utils.to_categorical(y_train_raw, num_classes=2) | |
x_test = np.random.randint(2, size=(10, 2)) | |
y_test_raw = np.logical_xor(x_test[:,0], x_test[:,1]).reshape(10, 1) | |
y_test = keras.utils.to_categorical(y_test_raw, num_classes=2) | |
# Train the model, iterating on the data in batches of 32 samples | |
model.fit(x_train, y_train, epochs=10, batch_size=32) | |
# Test the model | |
print('Evalutation score: ', model.evaluate(x_test, y_test)) | |
print('Input data ', x_test) | |
print('Predictions: ', model.predict(x_test)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Public Domain CC0 license. https://creativecommons.org/publicdomain/zero/1.0/ | |
# Use Keras sequential model to train the XOR function | |
import keras.utils | |
from keras.models import Sequential | |
from keras.layers import Dense | |
import numpy as np | |
model = Sequential() | |
model.add(Dense(32, activation='relu', input_dim=2)) | |
model.add(Dense(2, activation='softmax')) | |
model.compile(optimizer='rmsprop', | |
loss='categorical_crossentropy', | |
metrics=['accuracy']) | |
# Generate XOR training data | |
x_train = np.random.randint(2, size=(1000, 2)) | |
y_train_raw = np.logical_xor(x_train[:,0], x_train[:,1]).reshape(1000, 1) | |
y_train = keras.utils.to_categorical(y_train_raw, num_classes=2) | |
x_test = np.random.randint(2, size=(10, 2)) | |
y_test_raw = np.logical_xor(x_test[:,0], x_test[:,1]).reshape(10, 1) | |
y_test = keras.utils.to_categorical(y_test_raw, num_classes=2) | |
# Train the model, iterating on the data in batches of 32 samples | |
model.fit(x_train, y_train, epochs=10, batch_size=32) | |
# Test the model | |
print('Evalutation score: ', model.evaluate(x_test, y_test)) | |
print('Input data ', x_test) | |
print('Predictions: ', model.predict(x_test)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment