Skip to content

Instantly share code, notes, and snippets.

@mehdidc
Last active January 11, 2017 23:46
Show Gist options
  • Save mehdidc/778cfe8e08fff9307103133f9d3bf931 to your computer and use it in GitHub Desktop.
Save mehdidc/778cfe8e08fff9307103133f9d3bf931 to your computer and use it in GitHub Desktop.
# Implementation of a simple character RNN (using LSTM units), based on:
# https://github.com/karpathy/char-rnn
# Source : https://github.com/fchollet/keras/pull/2137
import numpy as np
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout, TimeDistributedDense
from keras.layers.recurrent import LSTM
text = open('input.txt', 'r').read()
char_to_idx = {ch: i for (i, ch) in enumerate(sorted(list(set(text))))}
idx_to_char = {i: ch for (ch, i) in char_to_idx.items()}
vocab_size = len(char_to_idx)
print 'Working on %d characters (%d unique)' % (len(text), vocab_size)
SEQ_LEN = 64
BATCH_SIZE = 16
BATCH_CHARS = len(text) / BATCH_SIZE
LSTM_SIZE = 128
LAYERS = 3
# For training, each subsequent example for a given batch index should be a
# consecutive portion of the text. To achieve this, each batch index operates
# over a disjoint section of the input text.
def read_batches(text):
T = np.asarray([char_to_idx[c] for c in text], dtype=np.int32)
X = np.zeros((BATCH_SIZE, SEQ_LEN, vocab_size))
Y = np.zeros((BATCH_SIZE, SEQ_LEN, vocab_size))
for i in range(0, BATCH_CHARS - SEQ_LEN - 1, SEQ_LEN):
X[:] = 0
Y[:] = 0
for batch_idx in range(BATCH_SIZE):
start = batch_idx * BATCH_CHARS + i
for j in range(SEQ_LEN):
X[batch_idx, j, T[start+j]] = 1
Y[batch_idx, j, T[start+j+1]] = 1
yield X, Y
def build_model(infer):
if infer:
batch_size = seq_len = 1
else:
batch_size = BATCH_SIZE
seq_len = SEQ_LEN
model = Sequential()
model.add(LSTM(LSTM_SIZE,
return_sequences=True,
batch_input_shape=(batch_size, seq_len, vocab_size),
stateful=True))
model.add(Dropout(0.2))
for l in range(LAYERS - 1):
model.add(LSTM(LSTM_SIZE, return_sequences=True, stateful=True))
model.add(Dropout(0.2))
model.add(TimeDistributedDense(vocab_size))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adagrad')
return model
print 'Building model.'
training_model = build_model(infer=False)
test_model = build_model(infer=True)
print '... done'
def sample(epoch, sample_chars=256, primer_text='And the '):
test_model.reset_states()
test_model.load_weights('/tmp/keras_char_rnn.%d.h5' % epoch)
sampled = [char_to_idx[c] for c in primer_text]
for c in primer_text:
batch = np.zeros((1, 1, vocab_size))
batch[0, 0, char_to_idx[c]] = 1
test_model.predict_on_batch(batch)
for i in range(sample_chars):
batch = np.zeros((1, 1, vocab_size))
batch[0, 0, sampled[-1]] = 1
softmax = test_model.predict_on_batch(batch)[0].ravel()
sample = np.random.choice(range(vocab_size), p=softmax)
sampled.append(sample)
print ''.join([idx_to_char[c] for c in sampled])
for epoch in range(100):
for i, (x, y) in enumerate(read_batches(text)):
loss = training_model.train_on_batch(x, y)
print epoch, i, loss
if i % 1000 == 0:
training_model.save_weights('/tmp/keras_char_rnn.%d.h5' % epoch,
overwrite=True)
sample(epoch)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment