Last active
June 6, 2017 19:01
-
-
Save dmmiller612/37dcdce95132bd017bfacad3c49d7e54 to your computer and use it in GitHub Desktop.
Keras Sequence to Sequence Simple example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
from keras.models import Sequential | |
from keras.layers import Dense, Activation | |
from keras.layers import LSTM, Masking | |
from keras.optimizers import RMSprop | |
import numpy as np | |
""" | |
This gist shows you have to do a simple seq-to-seq with keras with words instead of chars. | |
It is roughly based on this example https://github.com/fchollet/keras/blob/master/examples/lstm_text_generation.py | |
It also shows how masking works with uneven vectors, working with responses, etc | |
""" | |
#Used to generate input | |
questions = [ | |
"Hello, what is your name", | |
"What do you like to do", | |
"What are your favorite foods", | |
"Do you like coding", | |
"Do you enjoy movies" | |
] | |
#responses from bot | |
responses = [ | |
"My name is billy joel END", | |
"I like to play soccer END", | |
"My favorite food is chicken END", | |
"It is not really for me END", | |
"Yes and no it Depends END" | |
] | |
# get the individual words | |
x_words = [j for i in questions for j in i.split()] | |
y_words = [j for i in responses for j in i.split()] | |
# combine the words | |
all_words = x_words | |
all_words.extend(y_words) | |
#remove duplicates | |
all_words = set(all_words) | |
#convert to list for indexing | |
all_words_l = list(all_words) | |
#index with a dictionary for speed | |
all_words = zip(all_words_l, range(len(all_words_l))) | |
all_words = dict(all_words) | |
#used for training | |
new_questions = [] | |
new_answers = [] | |
for i, sentence in enumerate(questions): | |
#take the questions' and responses' words | |
quest = questions[i].split() | |
resp = responses[i].split() | |
#initial append the base for training example 1 | |
new_questions.append(quest) | |
for z, answ in enumerate(resp): | |
#append the response word for previous sentence | |
new_answers.append(answ) | |
if answ != "END": | |
temp_q = quest[:] #copy | |
temp_q.append(answ) | |
new_questions.append(temp_q) | |
quest = temp_q | |
#calculates max length for masking | |
temp = [len(x.split()) for x in questions] | |
temp = np.array(temp) | |
resp_temp = [len(x.split()) for x in responses] | |
resp_temp = np.array(resp_temp) | |
max_len = temp.max() + resp_temp.max() | |
#Vectorize words in a binary sense (not word2vec) | |
X = np.zeros((len(new_questions), max_len, len(all_words_l))) | |
y = np.zeros((len(new_answers), len(all_words_l))) | |
for i, quest in enumerate(new_questions): | |
for t, word in enumerate(quest): | |
X[i, t, all_words[word]] = 1 | |
y[i, all_words[new_answers[i]]] = 1 | |
#Create model | |
model = Sequential() | |
model.add(Masking(mask_value=0., input_shape=(max_len, len(all_words_l)))) | |
model.add(LSTM(128)) | |
model.add(Dense(len(all_words_l))) | |
model.add(Activation('softmax')) | |
optimizer = RMSprop(lr=0.01) | |
model.compile(loss='categorical_crossentropy', optimizer=optimizer) | |
#generate first question | |
to_test = X[:6, :, :] | |
#Fit the data | |
model.fit(X, y, nb_epoch=35, batch_size=20) | |
#generated text | |
predicted = model.predict(to_test) | |
gen = [all_words_l[predicted[i].argmax()] for i in range(len(predicted))] | |
print(gen) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment