Skip to content

Instantly share code, notes, and snippets.

@dmmiller612
Last active June 6, 2017 19:01
Show Gist options
  • Save dmmiller612/37dcdce95132bd017bfacad3c49d7e54 to your computer and use it in GitHub Desktop.
Save dmmiller612/37dcdce95132bd017bfacad3c49d7e54 to your computer and use it in GitHub Desktop.
Keras Sequence to Sequence Simple example
from __future__ import print_function
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM, Masking
from keras.optimizers import RMSprop
import numpy as np
"""
This gist shows you have to do a simple seq-to-seq with keras with words instead of chars.
It is roughly based on this example https://github.com/fchollet/keras/blob/master/examples/lstm_text_generation.py
It also shows how masking works with uneven vectors, working with responses, etc
"""
#Used to generate input
questions = [
"Hello, what is your name",
"What do you like to do",
"What are your favorite foods",
"Do you like coding",
"Do you enjoy movies"
]
#responses from bot
responses = [
"My name is billy joel END",
"I like to play soccer END",
"My favorite food is chicken END",
"It is not really for me END",
"Yes and no it Depends END"
]
# get the individual words
x_words = [j for i in questions for j in i.split()]
y_words = [j for i in responses for j in i.split()]
# combine the words
all_words = x_words
all_words.extend(y_words)
#remove duplicates
all_words = set(all_words)
#convert to list for indexing
all_words_l = list(all_words)
#index with a dictionary for speed
all_words = zip(all_words_l, range(len(all_words_l)))
all_words = dict(all_words)
#used for training
new_questions = []
new_answers = []
for i, sentence in enumerate(questions):
#take the questions' and responses' words
quest = questions[i].split()
resp = responses[i].split()
#initial append the base for training example 1
new_questions.append(quest)
for z, answ in enumerate(resp):
#append the response word for previous sentence
new_answers.append(answ)
if answ != "END":
temp_q = quest[:] #copy
temp_q.append(answ)
new_questions.append(temp_q)
quest = temp_q
#calculates max length for masking
temp = [len(x.split()) for x in questions]
temp = np.array(temp)
resp_temp = [len(x.split()) for x in responses]
resp_temp = np.array(resp_temp)
max_len = temp.max() + resp_temp.max()
#Vectorize words in a binary sense (not word2vec)
X = np.zeros((len(new_questions), max_len, len(all_words_l)))
y = np.zeros((len(new_answers), len(all_words_l)))
for i, quest in enumerate(new_questions):
for t, word in enumerate(quest):
X[i, t, all_words[word]] = 1
y[i, all_words[new_answers[i]]] = 1
#Create model
model = Sequential()
model.add(Masking(mask_value=0., input_shape=(max_len, len(all_words_l))))
model.add(LSTM(128))
model.add(Dense(len(all_words_l)))
model.add(Activation('softmax'))
optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)
#generate first question
to_test = X[:6, :, :]
#Fit the data
model.fit(X, y, nb_epoch=35, batch_size=20)
#generated text
predicted = model.predict(to_test)
gen = [all_words_l[predicted[i].argmax()] for i in range(len(predicted))]
print(gen)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment