Skip to content

Instantly share code, notes, and snippets.

@dlebech
Last active June 16, 2018 10:30
Show Gist options
  • Save dlebech/7008b464bffebcce6c9064829fee7cf9 to your computer and use it in GitHub Desktop.
Save dlebech/7008b464bffebcce6c9064829fee7cf9 to your computer and use it in GitHub Desktop.
Minimal Keras examples for various purposes
# Public Domain CC0 license. https://creativecommons.org/publicdomain/zero/1.0/
# Create a Keras embedding layer with an initial one-hot encoding by using identity initializer
import tensorflow as tf
import numpy as np
# Input sequence consisting of four features (e.g. words)
# Let's pretend this is "hello world hello everyone else"
# Where hello is then mapped to 1, world = 0, everyone = 2, else = 3,
a = np.array([[1, 0, 1, 2, 3]])
# Since the vector consists of five words, the input layer has a shape of 5
inp = tf.keras.layers.Input(shape=(5,))
# There are four unique words so the input dimension is 4
# We want the embedding to one-hot so the output dimension is _also_ 4
# The length of the input is 5, as explained above
# Using the "identity" initializer will put 1's where the word appears and 0's elsewhere
emb = tf.keras.layers.Embedding(input_dim=4, output_dim=4, input_length=5, embeddings_initializer='identity')(inp)
# The model just puts things together.
model = tf.keras.models.Model(inputs=[inp], outputs=[emb])
# A model prediction here is just running the input through the embedding layer
# With the initial weights (which are identity)
print(model.predict(a))
# Prints:
# [[[0. 1. 0. 0.] # hello
# [1. 0. 0. 0.] # world
# [0. 1. 0. 0.] # hello
# [0. 0. 1. 0.] # everyone
# [0. 0. 0. 1.]]] # else
# One-hot encoded using embedding layer!
# Note that this one-hot encoding does not stay intact during training,
# but in practice, I found that the number stay close to 0 and 1 respectively.
# Also, if you _need_ true one-hot encoding, then the embedding layer is probably not appropriate always.
# Good discussion: https://github.com/keras-team/keras/issues/4838#issuecomment-269138502
# Public Domain CC0 license. https://creativecommons.org/publicdomain/zero/1.0/
# Sequence to sequence prediction using encoder-decoder network, I guess.
# Inspired by https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html
import tensorflow as tf
import numpy as np
# Overfit on the same sentence for illustration purposes.
# Interestingly, it seems to require more than one input for this to work.
data = [
'All work and no play makes Jack a dull boy',
'All work and no play makes Jack a dull boy',
'All work and no play makes Jack a dull boy',
'All work and no play makes Jack a dull boy'
]
# Map texts to integer sequences.
tokenizer = tf.keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(data)
sequences = np.array(tokenizer.texts_to_sequences(data))
# The output needs to be 3D for last Dense layer. This is still a bit weird to me.
sequence_out = sequences.reshape((sequences.shape[0], sequences.shape[1], 1))
# The number of features needs to be set to the number of words + 1 since 0 is not used by the tokenizer.
num_tokens = len(tokenizer.word_index) + 1
# Encoder with embedding layer. The output dimension of 8 is arbitrary
inp = tf.keras.layers.Input(shape=(None,))
x = tf.keras.layers.Embedding(input_dim=num_tokens, output_dim=8)(inp)
_, state = tf.keras.layers.GRU(32, return_state=True)(x)
# Decoder
# I don't quite understand the initial_state and return_sequences yet.
inp2 = tf.keras.layers.Input(shape=(None,))
x = tf.keras.layers.Embedding(input_dim=num_tokens, output_dim=8)(inp2)
x = tf.keras.layers.GRU(32, return_sequences=True)(x, initial_state=state)
outp = tf.keras.layers.Dense(num_tokens, activation='softmax')(x)
# Creating and compiling is straightforward
model = tf.keras.models.Model(inputs=[inp, inp2], outputs=[outp])
model.compile(
loss='sparse_categorical_crossentropy',
optimizer='rmsprop',
metrics=['accuracy']
)
model.summary()
# Usually achieves 100% accuracy after about 20 epochs.
model.fit([sequences, sequences], sequence_out, batch_size=1, epochs=20, callbacks=[
tf.keras.callbacks.TerminateOnNaN(),
tf.keras.callbacks.EarlyStopping(monitor='loss', patience=2),
])
rev = {v: k for k,v in tokenizer.word_index.items()}
print(rev)
def print_prediction(sequences):
p = model.predict([sequences, sequences])
for i, sentence in enumerate(np.argmax(p, axis=1) + 1):
print(i, sentence)
print(' '.join(rev.get(int(word), 'N/A') for word in sentence))
print()
# This correctly prints the original sentences
print_prediction(sequences)
# This usually prints the original sentence, even though we use the same word (8) for the entire sentences
print_prediction([[8, 8, 8, 8, 8, 8, 8, 8, 8, 8]])
# Public Domain CC0 license. https://creativecommons.org/publicdomain/zero/1.0/
# Word prediction using recurrent neural network
import numpy as np
import tensorflow as tf
# Overfit on the same sentence for illustration purposes.
data = [
'All work and no play makes Jack a dull boy',
]
# Map texts to integer sequences.
tokenizer = tf.keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(data)
sequences = np.array(tokenizer.texts_to_sequences(data))
# Create input-output pairs, two input word one output word
seq_length = 2
# The number of features needs to be set to the number of words + 1 since 0 is not used by the tokenizer.
num_words = len(tokenizer.word_index) + 1
X, y = [], []
for sequence in sequences:
for i in range(seq_length, len(sequence)):
X.append(sequence[i-seq_length:i])
y.append(sequence[i])
# Make categorical outputs to please the network.
# It's also possible to _not_ do this and use sparse_categorical_crossentropy in the output.
# That could potentially use less memory, I guess...
y = tf.keras.utils.to_categorical(y, num_classes=num_words)
# Embedding -> gru -> dense -> dense
inp = tf.keras.layers.Input(shape=(seq_length,))
x = tf.keras.layers.Embedding(input_dim=num_words, output_dim=5, input_length=seq_length)(inp)
x = tf.keras.layers.GRU(16)(x)
x = tf.keras.layers.Dense(16)(x)
outp = tf.keras.layers.Dense(num_words, activation='softmax')(x)
model = tf.keras.models.Model(inputs=[inp], outputs=[outp])
model.compile(
loss='categorical_crossentropy',
optimizer='rmsprop',
metrics=['accuracy']
)
model.summary()
# This fit has widely different results, sometimes reaching
# less than 50% accuracy after 50 epochs and other times reaching 100% accuracy.
model.fit(np.array(X), np.array(y), batch_size=1, epochs=50, callbacks=[
tf.keras.callbacks.TerminateOnNaN(),
tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3),
])
rev = {v: k for k,v in tokenizer.word_index.items()}
print(rev)
def print_prediction(sequences):
p = model.predict(sequences)
words = np.argmax(p, axis=1)
for i, word in enumerate(words):
seq = ' '.join(rev.get(word) for word in sequences[i])
print(i, 'Input:', '"{}"'.format(seq), 'Next word:', '"{}"'.format(rev.get(word, 'N/A')))
print()
# Should hopefully print the correct next word for each of these sentences
# I.e. "and", "makes", "dull", "boy"
print_prediction(np.array(tokenizer.texts_to_sequences(['All work', 'no play', 'jack a', 'a dull'])))
# Public Domain CC0 license. https://creativecommons.org/publicdomain/zero/1.0/
# Use Keras functional model to train the XOR function
import keras.utils
from keras.models import Model
from keras.layers import Input, Dense
import numpy as np
# Define the functional layers
inputs = Input(shape=(2,))
dense = Dense(32, activation='relu')(inputs)
predictions = Dense(2, activation='softmax')(dense)
# Compile the model
model = Model(inputs=inputs, outputs=predictions)
model.compile(optimizer='rmsprop',
loss='categorical_crossentropy',
metrics=['accuracy'])
# Generate XOR training data
x_train = np.random.randint(2, size=(1000, 2))
y_train_raw = np.logical_xor(x_train[:,0], x_train[:,1]).reshape(1000, 1)
y_train = keras.utils.to_categorical(y_train_raw, num_classes=2)
x_test = np.random.randint(2, size=(10, 2))
y_test_raw = np.logical_xor(x_test[:,0], x_test[:,1]).reshape(10, 1)
y_test = keras.utils.to_categorical(y_test_raw, num_classes=2)
# Train the model, iterating on the data in batches of 32 samples
model.fit(x_train, y_train, epochs=10, batch_size=32)
# Test the model
print('Evalutation score: ', model.evaluate(x_test, y_test))
print('Input data ', x_test)
print('Predictions: ', model.predict(x_test))
# Public Domain CC0 license. https://creativecommons.org/publicdomain/zero/1.0/
# Use Keras sequential model to train the XOR function
import keras.utils
from keras.models import Sequential
from keras.layers import Dense
import numpy as np
model = Sequential()
model.add(Dense(32, activation='relu', input_dim=2))
model.add(Dense(2, activation='softmax'))
model.compile(optimizer='rmsprop',
loss='categorical_crossentropy',
metrics=['accuracy'])
# Generate XOR training data
x_train = np.random.randint(2, size=(1000, 2))
y_train_raw = np.logical_xor(x_train[:,0], x_train[:,1]).reshape(1000, 1)
y_train = keras.utils.to_categorical(y_train_raw, num_classes=2)
x_test = np.random.randint(2, size=(10, 2))
y_test_raw = np.logical_xor(x_test[:,0], x_test[:,1]).reshape(10, 1)
y_test = keras.utils.to_categorical(y_test_raw, num_classes=2)
# Train the model, iterating on the data in batches of 32 samples
model.fit(x_train, y_train, epochs=10, batch_size=32)
# Test the model
print('Evalutation score: ', model.evaluate(x_test, y_test))
print('Input data ', x_test)
print('Predictions: ', model.predict(x_test))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment