Skip to content

Instantly share code, notes, and snippets.

@p-baleine
Last active May 16, 2023 12:28
Show Gist options
  • Save p-baleine/e6f57dd6f89c932eccadec5d78fed0b5 to your computer and use it in GitHub Desktop.
Save p-baleine/e6f57dd6f89c932eccadec5d78fed0b5 to your computer and use it in GitHub Desktop.
Tensorflow's PTB LSTM model for keras
class SmallConfig(object):
"""Small config."""
init_scale = 0.1
learning_rate = 1.0
max_grad_norm = 5
num_layers = 2
num_steps = 20
hidden_size = 200
max_epoch = 4
max_max_epoch = 13
keep_prob = 1.0
lr_decay = 0.5
batch_size = 20
vocab_size = 10000
class MediumConfig(object):
"""Medium config."""
init_scale = 0.05
learning_rate = 1.0
max_grad_norm = 5
num_layers = 2
num_steps = 35
hidden_size = 650
max_epoch = 6
max_max_epoch = 39
keep_prob = 0.5
lr_decay = 0.8
batch_size = 20
vocab_size = 10000
class LargeConfig(object):
"""Large config."""
init_scale = 0.04
learning_rate = 1.0
max_grad_norm = 10
num_layers = 2
num_steps = 35
hidden_size = 1500
max_epoch = 14
max_max_epoch = 55
keep_prob = 0.35
lr_decay = 1 / 1.15
batch_size = 20
vocab_size = 10000
class TestConfig(object):
"""Tiny config, for testing."""
init_scale = 0.1
learning_rate = 1.0
max_grad_norm = 1
num_layers = 1
num_steps = 2
hidden_size = 2
max_epoch = 1
max_max_epoch = 1
keep_prob = 1.0
lr_decay = 0.5
batch_size = 20
vocab_size = 10000
def get_config(model):
if model == 'small':
return SmallConfig()
elif model == 'medium':
return MediumConfig()
elif model == 'large':
return LargeConfig()
elif model == 'test':
return TestConfig()
else:
raise ValueError('Invalid model: {}'.format(model))
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using Theano backend.\n"
]
}
],
"source": [
"from keras.models import model_from_json\n",
"from keras import initializations\n",
"from optimizer import PtbSGD\n",
"from config import get_config\n",
"from ptb_word_lm import uniform\n",
"from keras.preprocessing.sequence import pad_sequences\n",
"import numpy as np\n",
"import click\n",
"import reader\n",
"import pickle\n",
"import sys\n",
"\n",
"def sample(a, temperature=1.0):\n",
" # helper function to sample an index from a probability array\n",
" a = np.log(a) / temperature\n",
" a = np.exp(a) / np.sum(np.exp(a))\n",
" return np.argmax(np.random.multinomial(1, a, 1))\n",
"\n",
"size = 'medium'\n",
"architecture = 'checkpoints/prb_word_lm_medium_architecture.json'\n",
"weights = 'checkpoints/prb_word_lm_medium_27_132.82246700553142.h5'\n",
"length = 20\n",
"\n",
"config = get_config(size)\n",
"initializations.init = uniform(config.init_scale)\n",
"\n",
"with open(architecture) as f:\n",
" model = model_from_json(f.read())\n",
"\n",
"model.load_weights(weights)\n",
"\n",
"optimizer = PtbSGD(lr=config.learning_rate,\n",
" decay=config.lr_decay,\n",
" clipnorm=config.max_grad_norm,\n",
" epoch_size=10, # dummy\n",
" max_epoch=config.max_epoch)\n",
"\n",
"model.compile(loss='categorical_crossentropy', optimizer=optimizer)\n",
"\n",
"with open('vocab.bin', 'rb') as f:\n",
" word_to_id = pickle.load(f)\n",
"id_to_word = {}\n",
"for c, i in word_to_id.items():\n",
" id_to_word[i] = c"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def predict(seed_text):\n",
" sys.stdout.write(seed_text + ' ')\n",
" sentence = [word_to_id[word] for word in seed_text.split(' ')]\n",
"\n",
" for i in range(length):\n",
" preds = model.predict(pad_sequences([sentence] * config.batch_size,\n",
" maxlen=config.num_steps))[0]\n",
" next_index = sample(preds, 1.5)\n",
" next_word = id_to_word[next_index]\n",
" sentence = sentence[1:] + next_index\n",
"\n",
" sys.stdout.write((next_word if next_word != '<eos>' else '.') + ' ')\n",
" sys.stdout.flush()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"cigarettes with schwarz somewhat in leased a caller towers blacks they speaker reach 's <unk> . thomas that presents while 's . "
]
}
],
"source": [
"from ipywidgets import widgets\n",
"from IPython.display import display\n",
"\n",
"text = widgets.Text()\n",
"display(text)\n",
"\n",
"def handle_submit(sender):\n",
" predict(text.value)\n",
"\n",
"text.on_submit(handle_submit)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.1"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
train-theano: data/simple-examples checkpoints
THEANO_FLAGS=device=gpu,floatX=float32,lib.cnmem=1 KERAS_BACKEND=theano \
python ptb_word_lm.py
train-tensorflow: data/simple-examples checkpoints
KERAS_BACKEND=tensorflow python ptb_word_lm.py
data/simple-examples: data/simple-examples.tgz
tar zxvf data/simple-examples.tgz -C data/
data/simple-examples.tgz: data
wget -P data http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
data:
mkdir data
checkpoints:
mkdir checkpoints
clean:
rm -rf data
from keras import backend as K
from keras.optimizers import Optimizer
import numpy as np
class PtbSGD(Optimizer):
def __init__(self, lr=1.0, decay=.5, epoch_size=1000,
max_epoch=4, **kwargs):
super(PtbSGD, self).__init__(**kwargs)
self.__dict__.update(locals())
self.iterations = K.variable(0.)
self.base_lr = K.variable(lr)
self.lr = K.variable(lr)
self.decay = K.variable(decay)
self.epoch_size = K.variable(epoch_size)
self.max_epoch = K.variable(max_epoch)
def get_updates(self, params, constraints, loss):
grads = self.get_gradients(loss, params)
epoch = self.iterations // self.epoch_size
decay = K.pow(self.decay, K.switch(epoch - self.max_epoch > 0.,
epoch - self.max_epoch,
K.variable(0.)))
self.lr = self.base_lr * decay
self.updates = [(self.iterations, self.iterations + 1.)]
for p, g in zip(params, grads):
self.updates.append((p, p - self.lr * g))
return self.updates
def get_config(self):
config = {'base_lr': float(K.get_value(self.base_lr)),
'decay': float(K.get_value(self.decay)),
'epoch_size': float(K.get_value(self.epoch_size)),
'max_epoch': float(K.get_value(self.max_epoch))}
base_config = super(PtbSGD, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def get_lr(self):
return self.lr.eval()
from keras import backend as K
from keras.layers import Dense, Activation, Dropout, LSTM
from keras.layers.embeddings import Embedding
from keras.models import Sequential
from keras.utils.np_utils import to_categorical
from keras.optimizers import SGD
from optimizer import PtbSGD
from config import get_config
import click
import numpy as np
import reader
import time
import pickle
def get_model(epoch_size, config):
"""Return the PTB model."""
batch_size = config.batch_size
num_steps = config.num_steps
num_layers = config.num_layers
size = config.hidden_size
vocab_size = config.vocab_size
learning_rate = config.learning_rate
lr_decay = config.lr_decay
keep_prob = config.keep_prob
max_grad_norm = config.max_grad_norm
max_epoch = config.max_epoch
max_max_epoch = config.max_max_epoch
lstm_parameters = {
"output_dim":size,
"init":uniform(config.init_scale),
"inner_init":uniform(config.init_scale),
"forget_bias_init":"zero",
"stateful":True,
"consume_less":"gpu"
}
model = Sequential()
model.add(Embedding(vocab_size, size,
batch_input_shape=(batch_size, num_steps)))
if keep_prob < 1:
model.add(Dropout(1 - keep_prob))
for i in range(num_layers - 1):
model.add(LSTM(return_sequences=True, **lstm_parameters))
if keep_prob < 1:
model.add(Dropout(1- keep_prob))
model.add(LSTM(return_sequences=False, **lstm_parameters))
if keep_prob < 1:
model.add(Dropout(1 - keep_prob))
model.add(Dense(vocab_size))
model.add(Activation('softmax'))
optimizer = PtbSGD(lr=learning_rate, decay=lr_decay,
clipnorm=max_grad_norm,
epoch_size=epoch_size,
max_epoch=max_epoch)
# lr 1だとネットワークが大きい場合にあっという間にperplexityが発散して行っちゃうんだけど?
# optimizer = SGD(lr=learning_rate, clipnorm=max_grad_norm)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)
return model
def run_epoch(data, model, batch_size, num_steps, vocab_size):
"""Runs the model on the given data."""
epoch_size = ((len(data) // batch_size) - 1) // num_steps
start_time = time.time()
losses = 0.0
iters = 0
model.reset_states()
for step, (x, y) in enumerate(reader.ptb_iterator(data, batch_size, num_steps)):
y = to_categorical(y, nb_classes=vocab_size)
loss = model.train_on_batch(x, y)
losses += loss
iters += num_steps
# print(model.optimizer.get_lr())
print(np.exp(losses / iters))
if step % (epoch_size // 10) == 10:
print('{:.3f} perplexity: {:.3f} speed: {:.0f} wps'.format(
step * 1.0 / epoch_size, np.exp(losses / iters),
iters * batch_size / (time.time() - start_time)
))
return np.exp(losses / iters)
def run_test_epoch(data, model, batch_size, num_steps, vocab_size):
"""Tests the model on the given data."""
epoch_size = ((len(data) // batch_size) - 1) // num_steps
losses = 0.0
iters = 0
model.reset_states()
for step, (x, y) in enumerate(reader.ptb_iterator(data, batch_size, num_steps)):
y = to_categorical(y, nb_classes=vocab_size)
loss = model.test_on_batch(x, y)
losses += loss
iters += num_steps
return np.exp(losses / iters)
def uniform(scale=0.05):
def init(shape, name=None):
return K.variable(np.random.uniform(low=-scale, high=scale, size=shape),
name=name)
return init
@click.command()
@click.option('--size', default='small')
@click.option('--data_path', default='data/simple-examples/data')
def main(size, data_path):
raw_data = reader.ptb_raw_data(data_path)
word_to_id, id_to_word, train_data, valid_data, test_data = raw_data
config = get_config(size)
batch_size = config.batch_size
num_steps = config.num_steps
vocab_size = config.vocab_size
epoch_size = ((len(train_data) // config.batch_size) - 1) // config.num_steps
model = get_model(epoch_size, config)
with open('vocab.bin', 'wb') as f:
pickle.dump(word_to_id, f)
print('Training with {} size'.format(size))
with open('checkpoints/prb_word_lm_{}_architecture.json'.format(size), 'w') as f:
f.write(model.to_json())
# train
for i in range(config.max_max_epoch):
# print("Epoch: {} Learning rate: {}".format(i + 1, model.optimizer.get_lr()))
train_perplexity = run_epoch(train_data, model, batch_size, num_steps, vocab_size)
print('Epoch: {} Train Perplexity: {:.3f}'.format(
i + 1, train_perplexity))
valid_perplexity = run_test_epoch(valid_data, model, batch_size, num_steps, vocab_size)
print('Epoch: {} Valid Perplexity: {:.3f}'.format(
i + 1, valid_perplexity))
print('save weights ...')
model.save_weights('checkpoints/prb_word_lm_{}_{}_{}.h5'.format(
size, i, valid_perplexity))
test_perplexity = run_test_epoch(test_data, model, batch_size, num_steps, vocab_size)
print('Test Perplexity: {:.3f}'.format(test_perplexity))
if __name__ == '__main__':
main()
from collections import Counter
import numpy as np
import os
def _read_words(filename):
with open(filename) as f:
return f.read().replace('\n', '<eos>').split()
def _build_vocab(filename):
data = _read_words(filename)
counter = Counter(data)
count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0]))
words, _ = list(zip(*count_pairs))
word_to_id = dict(zip(words, range(len(words))))
id_to_word = dict((i, v) for v, i in word_to_id.items())
return word_to_id, id_to_word
def _file_to_word_ids(filename, word_to_id):
data = _read_words(filename)
return [word_to_id[word] for word in data]
def ptb_raw_data(data_path):
train_path = os.path.join(data_path, 'ptb.train.txt')
valid_path = os.path.join(data_path, 'ptb.valid.txt')
test_path = os.path.join(data_path, 'ptb.test.txt')
word_to_id, id_to_word = _build_vocab(train_path)
train_data = _file_to_word_ids(train_path, word_to_id)
valid_data = _file_to_word_ids(valid_path, word_to_id)
test_data = _file_to_word_ids(test_path, word_to_id)
return word_to_id, id_to_word, train_data, valid_data, test_data
def ptb_iterator(raw_data, batch_size, num_steps):
raw_data = np.array(raw_data, dtype=np.int32)
data_len = len(raw_data)
batch_len = data_len // batch_size
data = np.zeros([batch_size, batch_len], dtype=np.int32)
for i in range(batch_size):
data[i] = raw_data[batch_len * i:batch_len * (i + 1)]
epoch_size = (batch_len - 1) // num_steps
for i in range(epoch_size):
x = data[:, i*num_steps:(i+1)*num_steps]
y = data[:, i*num_steps+1:(i+1)*num_steps+1]
yield (x, y)
click==6.6
h5py==2.6.0
Keras==1.0.4
numpy==1.11.0
PyYAML==3.11
scipy==0.17.1
six==1.10.0
Theano==0.8.2
from keras.models import model_from_json
from keras import initializations
from optimizer import PtbSGD
from config import get_config
from ptb_word_lm import uniform
from keras.preprocessing.sequence import pad_sequences
import numpy as np
import click
import reader
import pickle
import sys
def sample(a, temperature=1.0):
# helper function to sample an index from a probability array
a = np.log(a) / temperature
a = np.exp(a) / np.sum(np.exp(a))
return np.argmax(np.random.multinomial(1, a, 1))
@click.command()
@click.option('--size', default='small')
@click.option('--architecture')
@click.option('--weights')
@click.option('--seed_text', default='nonexecutive director of')
@click.option('--length', default=20)
def main(size, architecture, weights, seed_text, length):
config = get_config(size)
initializations.init = uniform(config.init_scale)
# custom_objects = {'init': uniform(config.init_scale)}
with open(architecture) as f:
# model = model_from_json(f.read(), custom_objects)
model = model_from_json(f.read())
model.load_weights(weights)
optimizer = PtbSGD(lr=config.learning_rate,
decay=config.lr_decay,
clipnorm=config.max_grad_norm,
epoch_size=10, # dummy
max_epoch=config.max_epoch)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)
with open('vocab.bin', 'rb') as f:
word_to_id = pickle.load(f)
id_to_word = {}
for c, i in word_to_id.items():
id_to_word[i] = c
sys.stdout.write(seed_text + ' ')
sentence = [word_to_id[word] for word in seed_text.split(' ')]
for i in range(length):
# TODO batch_size分用意する必要ないっしょ
preds = model.predict(pad_sequences([sentence] * config.batch_size,
maxlen=config.num_steps))[0]
next_index = sample(preds, 1.5)
next_word = id_to_word[next_index]
sentence = sentence[1:] + next_index
sys.stdout.write((next_word if next_word != '<eos>' else '.') + ' ')
sys.stdout.flush()
print()
if __name__ == '__main__':
main()
@avolkov1
Copy link

avolkov1 commented Nov 5, 2017

@p-baleine Hello. Thanks for posting this implementation in Keras. I can't replicate your results for the metrics. The perplexity numbers I'm getting are way different. Maybe this needs a custom Keras layer for tf.contrib.seq2seq.sequence_loss per original Tensorflow implementation:

        # Use the contrib sequence loss and average over the batches
        loss = tf.contrib.seq2seq.sequence_loss(
            logits,
            input_.targets,
            tf.ones([self.batch_size, self.num_steps], dtype=data_type()),
            average_across_timesteps=False,
            average_across_batch=True)

I ran it with Tensorflow backend (I tried Theano and was getting the same thing though). I'm getting these numbers:

config epochs train valid test
small 13 1.228 1.359 1.363

Test I got data out of bounds error.
Are you able to still run this code with Keras 2.0.8 (or at least some recent version of Keras) and TF 1.2.1 or above? I had to change a few minor things to get it to work because of parameters being renamed and change this:

    for i in range(epoch_size):
        x = data[:, i*num_steps:(i+1)*num_steps]
#        y = data[:, i*num_steps+1:(i+1)*num_steps+1]
        y = data[:, (i+1)*num_steps]  # CHANGED TO LAST STEP??? OTHERWISE ERROR
        yield (x, y)

Using CuDNNLSTM (in Keras 2.0.9) this runs significantly faster.

@hoangcuong2011
Copy link

hoangcuong2011 commented Nov 7, 2019

Greetings,

please correct me If I am wrong: line 51 in ptb_word_lm.py (model.add(LSTM(return_sequences=False, **lstm_parameters))) should be

model.add(LSTM(return_sequences=True, **lstm_parameters))

@Roy-NJU
Copy link

Roy-NJU commented May 16, 2023

When I ran ptb_word_lm.py, there is an error:
Traceback (most recent call last):
File "/Volumes/work/RNN-Models/Tensorflow's PTB LSTM model for keras/ptb_word_lm.py", line 155, in
main()
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/click/core.py", line 1130, in call
return self.main(*args, **kwargs)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/click/core.py", line 1055, in main
rv = self.invoke(ctx)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/click/core.py", line 1404, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/click/core.py", line 760, in invoke
return __callback(*args, **kwargs)
File "/Volumes/work/RNN-Models/Tensorflow's PTB LSTM model for keras/ptb_word_lm.py", line 128, in main
model = get_model(epoch_size, config)
File "/Volumes/work/RNN-Models/Tensorflow's PTB LSTM model for keras/ptb_word_lm.py", line 47, in get_model
model.add(LSTM(return_sequences=True, **lstm_parameters))
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/keras/src/layers/rnn/lstm.py", line 562, in init
super().init(
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/keras/src/layers/rnn/dropout_rnn_cell_mixin.py", line 43, in init
super().init(*args, **kwargs)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/keras/src/layers/rnn/base_rnn.py", line 271, in init
super().init(**kwargs)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/tensorflow/python/trackable/base.py", line 204, in _method_wrapper
result = method(self, *args, **kwargs)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/keras/src/engine/base_layer.py", line 3820, in init
super().init(**kwargs)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/tensorflow/python/trackable/base.py", line 204, in _method_wrapper
result = method(self, *args, **kwargs)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/keras/src/engine/base_layer.py", line 340, in init
generic_utils.validate_kwargs(kwargs, allowed_kwargs)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/keras/src/utils/generic_utils.py", line 514, in validate_kwargs
raise TypeError(error_message, kwarg)
TypeError: ('Keyword argument not understood:', 'consume_less')

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment