Skip to content

Instantly share code, notes, and snippets.

@p-baleine
Last active May 16, 2023 12:28
Show Gist options
  • Save p-baleine/e6f57dd6f89c932eccadec5d78fed0b5 to your computer and use it in GitHub Desktop.
Save p-baleine/e6f57dd6f89c932eccadec5d78fed0b5 to your computer and use it in GitHub Desktop.
Tensorflow's PTB LSTM model for keras
class SmallConfig(object):
"""Small config."""
init_scale = 0.1
learning_rate = 1.0
max_grad_norm = 5
num_layers = 2
num_steps = 20
hidden_size = 200
max_epoch = 4
max_max_epoch = 13
keep_prob = 1.0
lr_decay = 0.5
batch_size = 20
vocab_size = 10000
class MediumConfig(object):
"""Medium config."""
init_scale = 0.05
learning_rate = 1.0
max_grad_norm = 5
num_layers = 2
num_steps = 35
hidden_size = 650
max_epoch = 6
max_max_epoch = 39
keep_prob = 0.5
lr_decay = 0.8
batch_size = 20
vocab_size = 10000
class LargeConfig(object):
"""Large config."""
init_scale = 0.04
learning_rate = 1.0
max_grad_norm = 10
num_layers = 2
num_steps = 35
hidden_size = 1500
max_epoch = 14
max_max_epoch = 55
keep_prob = 0.35
lr_decay = 1 / 1.15
batch_size = 20
vocab_size = 10000
class TestConfig(object):
"""Tiny config, for testing."""
init_scale = 0.1
learning_rate = 1.0
max_grad_norm = 1
num_layers = 1
num_steps = 2
hidden_size = 2
max_epoch = 1
max_max_epoch = 1
keep_prob = 1.0
lr_decay = 0.5
batch_size = 20
vocab_size = 10000
def get_config(model):
if model == 'small':
return SmallConfig()
elif model == 'medium':
return MediumConfig()
elif model == 'large':
return LargeConfig()
elif model == 'test':
return TestConfig()
else:
raise ValueError('Invalid model: {}'.format(model))
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
train-theano: data/simple-examples checkpoints
THEANO_FLAGS=device=gpu,floatX=float32,lib.cnmem=1 KERAS_BACKEND=theano \
python ptb_word_lm.py
train-tensorflow: data/simple-examples checkpoints
KERAS_BACKEND=tensorflow python ptb_word_lm.py
data/simple-examples: data/simple-examples.tgz
tar zxvf data/simple-examples.tgz -C data/
data/simple-examples.tgz: data
wget -P data http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
data:
mkdir data
checkpoints:
mkdir checkpoints
clean:
rm -rf data
from keras import backend as K
from keras.optimizers import Optimizer
import numpy as np
class PtbSGD(Optimizer):
def __init__(self, lr=1.0, decay=.5, epoch_size=1000,
max_epoch=4, **kwargs):
super(PtbSGD, self).__init__(**kwargs)
self.__dict__.update(locals())
self.iterations = K.variable(0.)
self.base_lr = K.variable(lr)
self.lr = K.variable(lr)
self.decay = K.variable(decay)
self.epoch_size = K.variable(epoch_size)
self.max_epoch = K.variable(max_epoch)
def get_updates(self, params, constraints, loss):
grads = self.get_gradients(loss, params)
epoch = self.iterations // self.epoch_size
decay = K.pow(self.decay, K.switch(epoch - self.max_epoch > 0.,
epoch - self.max_epoch,
K.variable(0.)))
self.lr = self.base_lr * decay
self.updates = [(self.iterations, self.iterations + 1.)]
for p, g in zip(params, grads):
self.updates.append((p, p - self.lr * g))
return self.updates
def get_config(self):
config = {'base_lr': float(K.get_value(self.base_lr)),
'decay': float(K.get_value(self.decay)),
'epoch_size': float(K.get_value(self.epoch_size)),
'max_epoch': float(K.get_value(self.max_epoch))}
base_config = super(PtbSGD, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def get_lr(self):
return self.lr.eval()
from keras import backend as K
from keras.layers import Dense, Activation, Dropout, LSTM
from keras.layers.embeddings import Embedding
from keras.models import Sequential
from keras.utils.np_utils import to_categorical
from keras.optimizers import SGD
from optimizer import PtbSGD
from config import get_config
import click
import numpy as np
import reader
import time
import pickle
def get_model(epoch_size, config):
"""Return the PTB model."""
batch_size = config.batch_size
num_steps = config.num_steps
num_layers = config.num_layers
size = config.hidden_size
vocab_size = config.vocab_size
learning_rate = config.learning_rate
lr_decay = config.lr_decay
keep_prob = config.keep_prob
max_grad_norm = config.max_grad_norm
max_epoch = config.max_epoch
max_max_epoch = config.max_max_epoch
lstm_parameters = {
"output_dim":size,
"init":uniform(config.init_scale),
"inner_init":uniform(config.init_scale),
"forget_bias_init":"zero",
"stateful":True,
"consume_less":"gpu"
}
model = Sequential()
model.add(Embedding(vocab_size, size,
batch_input_shape=(batch_size, num_steps)))
if keep_prob < 1:
model.add(Dropout(1 - keep_prob))
for i in range(num_layers - 1):
model.add(LSTM(return_sequences=True, **lstm_parameters))
if keep_prob < 1:
model.add(Dropout(1- keep_prob))
model.add(LSTM(return_sequences=False, **lstm_parameters))
if keep_prob < 1:
model.add(Dropout(1 - keep_prob))
model.add(Dense(vocab_size))
model.add(Activation('softmax'))
optimizer = PtbSGD(lr=learning_rate, decay=lr_decay,
clipnorm=max_grad_norm,
epoch_size=epoch_size,
max_epoch=max_epoch)
# lr 1だとネットワークが大きい場合にあっという間にperplexityが発散して行っちゃうんだけど?
# optimizer = SGD(lr=learning_rate, clipnorm=max_grad_norm)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)
return model
def run_epoch(data, model, batch_size, num_steps, vocab_size):
"""Runs the model on the given data."""
epoch_size = ((len(data) // batch_size) - 1) // num_steps
start_time = time.time()
losses = 0.0
iters = 0
model.reset_states()
for step, (x, y) in enumerate(reader.ptb_iterator(data, batch_size, num_steps)):
y = to_categorical(y, nb_classes=vocab_size)
loss = model.train_on_batch(x, y)
losses += loss
iters += num_steps
# print(model.optimizer.get_lr())
print(np.exp(losses / iters))
if step % (epoch_size // 10) == 10:
print('{:.3f} perplexity: {:.3f} speed: {:.0f} wps'.format(
step * 1.0 / epoch_size, np.exp(losses / iters),
iters * batch_size / (time.time() - start_time)
))
return np.exp(losses / iters)
def run_test_epoch(data, model, batch_size, num_steps, vocab_size):
"""Tests the model on the given data."""
epoch_size = ((len(data) // batch_size) - 1) // num_steps
losses = 0.0
iters = 0
model.reset_states()
for step, (x, y) in enumerate(reader.ptb_iterator(data, batch_size, num_steps)):
y = to_categorical(y, nb_classes=vocab_size)
loss = model.test_on_batch(x, y)
losses += loss
iters += num_steps
return np.exp(losses / iters)
def uniform(scale=0.05):
def init(shape, name=None):
return K.variable(np.random.uniform(low=-scale, high=scale, size=shape),
name=name)
return init
@click.command()
@click.option('--size', default='small')
@click.option('--data_path', default='data/simple-examples/data')
def main(size, data_path):
raw_data = reader.ptb_raw_data(data_path)
word_to_id, id_to_word, train_data, valid_data, test_data = raw_data
config = get_config(size)
batch_size = config.batch_size
num_steps = config.num_steps
vocab_size = config.vocab_size
epoch_size = ((len(train_data) // config.batch_size) - 1) // config.num_steps
model = get_model(epoch_size, config)
with open('vocab.bin', 'wb') as f:
pickle.dump(word_to_id, f)
print('Training with {} size'.format(size))
with open('checkpoints/prb_word_lm_{}_architecture.json'.format(size), 'w') as f:
f.write(model.to_json())
# train
for i in range(config.max_max_epoch):
# print("Epoch: {} Learning rate: {}".format(i + 1, model.optimizer.get_lr()))
train_perplexity = run_epoch(train_data, model, batch_size, num_steps, vocab_size)
print('Epoch: {} Train Perplexity: {:.3f}'.format(
i + 1, train_perplexity))
valid_perplexity = run_test_epoch(valid_data, model, batch_size, num_steps, vocab_size)
print('Epoch: {} Valid Perplexity: {:.3f}'.format(
i + 1, valid_perplexity))
print('save weights ...')
model.save_weights('checkpoints/prb_word_lm_{}_{}_{}.h5'.format(
size, i, valid_perplexity))
test_perplexity = run_test_epoch(test_data, model, batch_size, num_steps, vocab_size)
print('Test Perplexity: {:.3f}'.format(test_perplexity))
if __name__ == '__main__':
main()
from collections import Counter
import numpy as np
import os
def _read_words(filename):
with open(filename) as f:
return f.read().replace('\n', '<eos>').split()
def _build_vocab(filename):
data = _read_words(filename)
counter = Counter(data)
count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0]))
words, _ = list(zip(*count_pairs))
word_to_id = dict(zip(words, range(len(words))))
id_to_word = dict((i, v) for v, i in word_to_id.items())
return word_to_id, id_to_word
def _file_to_word_ids(filename, word_to_id):
data = _read_words(filename)
return [word_to_id[word] for word in data]
def ptb_raw_data(data_path):
train_path = os.path.join(data_path, 'ptb.train.txt')
valid_path = os.path.join(data_path, 'ptb.valid.txt')
test_path = os.path.join(data_path, 'ptb.test.txt')
word_to_id, id_to_word = _build_vocab(train_path)
train_data = _file_to_word_ids(train_path, word_to_id)
valid_data = _file_to_word_ids(valid_path, word_to_id)
test_data = _file_to_word_ids(test_path, word_to_id)
return word_to_id, id_to_word, train_data, valid_data, test_data
def ptb_iterator(raw_data, batch_size, num_steps):
raw_data = np.array(raw_data, dtype=np.int32)
data_len = len(raw_data)
batch_len = data_len // batch_size
data = np.zeros([batch_size, batch_len], dtype=np.int32)
for i in range(batch_size):
data[i] = raw_data[batch_len * i:batch_len * (i + 1)]
epoch_size = (batch_len - 1) // num_steps
for i in range(epoch_size):
x = data[:, i*num_steps:(i+1)*num_steps]
y = data[:, i*num_steps+1:(i+1)*num_steps+1]
yield (x, y)
click==6.6
h5py==2.6.0
Keras==1.0.4
numpy==1.11.0
PyYAML==3.11
scipy==0.17.1
six==1.10.0
Theano==0.8.2
from keras.models import model_from_json
from keras import initializations
from optimizer import PtbSGD
from config import get_config
from ptb_word_lm import uniform
from keras.preprocessing.sequence import pad_sequences
import numpy as np
import click
import reader
import pickle
import sys
def sample(a, temperature=1.0):
# helper function to sample an index from a probability array
a = np.log(a) / temperature
a = np.exp(a) / np.sum(np.exp(a))
return np.argmax(np.random.multinomial(1, a, 1))
@click.command()
@click.option('--size', default='small')
@click.option('--architecture')
@click.option('--weights')
@click.option('--seed_text', default='nonexecutive director of')
@click.option('--length', default=20)
def main(size, architecture, weights, seed_text, length):
config = get_config(size)
initializations.init = uniform(config.init_scale)
# custom_objects = {'init': uniform(config.init_scale)}
with open(architecture) as f:
# model = model_from_json(f.read(), custom_objects)
model = model_from_json(f.read())
model.load_weights(weights)
optimizer = PtbSGD(lr=config.learning_rate,
decay=config.lr_decay,
clipnorm=config.max_grad_norm,
epoch_size=10, # dummy
max_epoch=config.max_epoch)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)
with open('vocab.bin', 'rb') as f:
word_to_id = pickle.load(f)
id_to_word = {}
for c, i in word_to_id.items():
id_to_word[i] = c
sys.stdout.write(seed_text + ' ')
sentence = [word_to_id[word] for word in seed_text.split(' ')]
for i in range(length):
# TODO batch_size分用意する必要ないっしょ
preds = model.predict(pad_sequences([sentence] * config.batch_size,
maxlen=config.num_steps))[0]
next_index = sample(preds, 1.5)
next_word = id_to_word[next_index]
sentence = sentence[1:] + next_index
sys.stdout.write((next_word if next_word != '<eos>' else '.') + ' ')
sys.stdout.flush()
print()
if __name__ == '__main__':
main()
@avolkov1
Copy link

avolkov1 commented Nov 5, 2017

@p-baleine Hello. Thanks for posting this implementation in Keras. I can't replicate your results for the metrics. The perplexity numbers I'm getting are way different. Maybe this needs a custom Keras layer for tf.contrib.seq2seq.sequence_loss per original Tensorflow implementation:

        # Use the contrib sequence loss and average over the batches
        loss = tf.contrib.seq2seq.sequence_loss(
            logits,
            input_.targets,
            tf.ones([self.batch_size, self.num_steps], dtype=data_type()),
            average_across_timesteps=False,
            average_across_batch=True)

I ran it with Tensorflow backend (I tried Theano and was getting the same thing though). I'm getting these numbers:

config epochs train valid test
small 13 1.228 1.359 1.363

Test I got data out of bounds error.
Are you able to still run this code with Keras 2.0.8 (or at least some recent version of Keras) and TF 1.2.1 or above? I had to change a few minor things to get it to work because of parameters being renamed and change this:

    for i in range(epoch_size):
        x = data[:, i*num_steps:(i+1)*num_steps]
#        y = data[:, i*num_steps+1:(i+1)*num_steps+1]
        y = data[:, (i+1)*num_steps]  # CHANGED TO LAST STEP??? OTHERWISE ERROR
        yield (x, y)

Using CuDNNLSTM (in Keras 2.0.9) this runs significantly faster.

@hoangcuong2011
Copy link

hoangcuong2011 commented Nov 7, 2019

Greetings,

please correct me If I am wrong: line 51 in ptb_word_lm.py (model.add(LSTM(return_sequences=False, **lstm_parameters))) should be

model.add(LSTM(return_sequences=True, **lstm_parameters))

@Roy-NJU
Copy link

Roy-NJU commented May 16, 2023

When I ran ptb_word_lm.py, there is an error:
Traceback (most recent call last):
File "/Volumes/work/RNN-Models/Tensorflow's PTB LSTM model for keras/ptb_word_lm.py", line 155, in
main()
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/click/core.py", line 1130, in call
return self.main(*args, **kwargs)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/click/core.py", line 1055, in main
rv = self.invoke(ctx)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/click/core.py", line 1404, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/click/core.py", line 760, in invoke
return __callback(*args, **kwargs)
File "/Volumes/work/RNN-Models/Tensorflow's PTB LSTM model for keras/ptb_word_lm.py", line 128, in main
model = get_model(epoch_size, config)
File "/Volumes/work/RNN-Models/Tensorflow's PTB LSTM model for keras/ptb_word_lm.py", line 47, in get_model
model.add(LSTM(return_sequences=True, **lstm_parameters))
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/keras/src/layers/rnn/lstm.py", line 562, in init
super().init(
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/keras/src/layers/rnn/dropout_rnn_cell_mixin.py", line 43, in init
super().init(*args, **kwargs)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/keras/src/layers/rnn/base_rnn.py", line 271, in init
super().init(**kwargs)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/tensorflow/python/trackable/base.py", line 204, in _method_wrapper
result = method(self, *args, **kwargs)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/keras/src/engine/base_layer.py", line 3820, in init
super().init(**kwargs)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/tensorflow/python/trackable/base.py", line 204, in _method_wrapper
result = method(self, *args, **kwargs)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/keras/src/engine/base_layer.py", line 340, in init
generic_utils.validate_kwargs(kwargs, allowed_kwargs)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/keras/src/utils/generic_utils.py", line 514, in validate_kwargs
raise TypeError(error_message, kwarg)
TypeError: ('Keyword argument not understood:', 'consume_less')

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment