https://github.com/tensorflow/tensorflow/blob/master/tensorflow/models/rnn/ptb/ptb\_word\_lm.py
config | epochs | train | valid | test |
---|---|---|---|---|
small | 13 | 71.43 | 158.90 | 148.77 |
medium | 39 | 82.01 | 134.07 | 125.39 |
large |
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/models/rnn/ptb/ptb\_word\_lm.py
config | epochs | train | valid | test |
---|---|---|---|---|
small | 13 | 71.43 | 158.90 | 148.77 |
medium | 39 | 82.01 | 134.07 | 125.39 |
large |
class SmallConfig(object): | |
"""Small config.""" | |
init_scale = 0.1 | |
learning_rate = 1.0 | |
max_grad_norm = 5 | |
num_layers = 2 | |
num_steps = 20 | |
hidden_size = 200 | |
max_epoch = 4 | |
max_max_epoch = 13 | |
keep_prob = 1.0 | |
lr_decay = 0.5 | |
batch_size = 20 | |
vocab_size = 10000 | |
class MediumConfig(object): | |
"""Medium config.""" | |
init_scale = 0.05 | |
learning_rate = 1.0 | |
max_grad_norm = 5 | |
num_layers = 2 | |
num_steps = 35 | |
hidden_size = 650 | |
max_epoch = 6 | |
max_max_epoch = 39 | |
keep_prob = 0.5 | |
lr_decay = 0.8 | |
batch_size = 20 | |
vocab_size = 10000 | |
class LargeConfig(object): | |
"""Large config.""" | |
init_scale = 0.04 | |
learning_rate = 1.0 | |
max_grad_norm = 10 | |
num_layers = 2 | |
num_steps = 35 | |
hidden_size = 1500 | |
max_epoch = 14 | |
max_max_epoch = 55 | |
keep_prob = 0.35 | |
lr_decay = 1 / 1.15 | |
batch_size = 20 | |
vocab_size = 10000 | |
class TestConfig(object): | |
"""Tiny config, for testing.""" | |
init_scale = 0.1 | |
learning_rate = 1.0 | |
max_grad_norm = 1 | |
num_layers = 1 | |
num_steps = 2 | |
hidden_size = 2 | |
max_epoch = 1 | |
max_max_epoch = 1 | |
keep_prob = 1.0 | |
lr_decay = 0.5 | |
batch_size = 20 | |
vocab_size = 10000 | |
def get_config(model): | |
if model == 'small': | |
return SmallConfig() | |
elif model == 'medium': | |
return MediumConfig() | |
elif model == 'large': | |
return LargeConfig() | |
elif model == 'test': | |
return TestConfig() | |
else: | |
raise ValueError('Invalid model: {}'.format(model)) | |
train-theano: data/simple-examples checkpoints | |
THEANO_FLAGS=device=gpu,floatX=float32,lib.cnmem=1 KERAS_BACKEND=theano \ | |
python ptb_word_lm.py | |
train-tensorflow: data/simple-examples checkpoints | |
KERAS_BACKEND=tensorflow python ptb_word_lm.py | |
data/simple-examples: data/simple-examples.tgz | |
tar zxvf data/simple-examples.tgz -C data/ | |
data/simple-examples.tgz: data | |
wget -P data http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz | |
data: | |
mkdir data | |
checkpoints: | |
mkdir checkpoints | |
clean: | |
rm -rf data |
from keras import backend as K | |
from keras.optimizers import Optimizer | |
import numpy as np | |
class PtbSGD(Optimizer): | |
def __init__(self, lr=1.0, decay=.5, epoch_size=1000, | |
max_epoch=4, **kwargs): | |
super(PtbSGD, self).__init__(**kwargs) | |
self.__dict__.update(locals()) | |
self.iterations = K.variable(0.) | |
self.base_lr = K.variable(lr) | |
self.lr = K.variable(lr) | |
self.decay = K.variable(decay) | |
self.epoch_size = K.variable(epoch_size) | |
self.max_epoch = K.variable(max_epoch) | |
def get_updates(self, params, constraints, loss): | |
grads = self.get_gradients(loss, params) | |
epoch = self.iterations // self.epoch_size | |
decay = K.pow(self.decay, K.switch(epoch - self.max_epoch > 0., | |
epoch - self.max_epoch, | |
K.variable(0.))) | |
self.lr = self.base_lr * decay | |
self.updates = [(self.iterations, self.iterations + 1.)] | |
for p, g in zip(params, grads): | |
self.updates.append((p, p - self.lr * g)) | |
return self.updates | |
def get_config(self): | |
config = {'base_lr': float(K.get_value(self.base_lr)), | |
'decay': float(K.get_value(self.decay)), | |
'epoch_size': float(K.get_value(self.epoch_size)), | |
'max_epoch': float(K.get_value(self.max_epoch))} | |
base_config = super(PtbSGD, self).get_config() | |
return dict(list(base_config.items()) + list(config.items())) | |
def get_lr(self): | |
return self.lr.eval() |
from keras import backend as K | |
from keras.layers import Dense, Activation, Dropout, LSTM | |
from keras.layers.embeddings import Embedding | |
from keras.models import Sequential | |
from keras.utils.np_utils import to_categorical | |
from keras.optimizers import SGD | |
from optimizer import PtbSGD | |
from config import get_config | |
import click | |
import numpy as np | |
import reader | |
import time | |
import pickle | |
def get_model(epoch_size, config): | |
"""Return the PTB model.""" | |
batch_size = config.batch_size | |
num_steps = config.num_steps | |
num_layers = config.num_layers | |
size = config.hidden_size | |
vocab_size = config.vocab_size | |
learning_rate = config.learning_rate | |
lr_decay = config.lr_decay | |
keep_prob = config.keep_prob | |
max_grad_norm = config.max_grad_norm | |
max_epoch = config.max_epoch | |
max_max_epoch = config.max_max_epoch | |
lstm_parameters = { | |
"output_dim":size, | |
"init":uniform(config.init_scale), | |
"inner_init":uniform(config.init_scale), | |
"forget_bias_init":"zero", | |
"stateful":True, | |
"consume_less":"gpu" | |
} | |
model = Sequential() | |
model.add(Embedding(vocab_size, size, | |
batch_input_shape=(batch_size, num_steps))) | |
if keep_prob < 1: | |
model.add(Dropout(1 - keep_prob)) | |
for i in range(num_layers - 1): | |
model.add(LSTM(return_sequences=True, **lstm_parameters)) | |
if keep_prob < 1: | |
model.add(Dropout(1- keep_prob)) | |
model.add(LSTM(return_sequences=False, **lstm_parameters)) | |
if keep_prob < 1: | |
model.add(Dropout(1 - keep_prob)) | |
model.add(Dense(vocab_size)) | |
model.add(Activation('softmax')) | |
optimizer = PtbSGD(lr=learning_rate, decay=lr_decay, | |
clipnorm=max_grad_norm, | |
epoch_size=epoch_size, | |
max_epoch=max_epoch) | |
# lr 1だとネットワークが大きい場合にあっという間にperplexityが発散して行っちゃうんだけど? | |
# optimizer = SGD(lr=learning_rate, clipnorm=max_grad_norm) | |
model.compile(loss='categorical_crossentropy', optimizer=optimizer) | |
return model | |
def run_epoch(data, model, batch_size, num_steps, vocab_size): | |
"""Runs the model on the given data.""" | |
epoch_size = ((len(data) // batch_size) - 1) // num_steps | |
start_time = time.time() | |
losses = 0.0 | |
iters = 0 | |
model.reset_states() | |
for step, (x, y) in enumerate(reader.ptb_iterator(data, batch_size, num_steps)): | |
y = to_categorical(y, nb_classes=vocab_size) | |
loss = model.train_on_batch(x, y) | |
losses += loss | |
iters += num_steps | |
# print(model.optimizer.get_lr()) | |
print(np.exp(losses / iters)) | |
if step % (epoch_size // 10) == 10: | |
print('{:.3f} perplexity: {:.3f} speed: {:.0f} wps'.format( | |
step * 1.0 / epoch_size, np.exp(losses / iters), | |
iters * batch_size / (time.time() - start_time) | |
)) | |
return np.exp(losses / iters) | |
def run_test_epoch(data, model, batch_size, num_steps, vocab_size): | |
"""Tests the model on the given data.""" | |
epoch_size = ((len(data) // batch_size) - 1) // num_steps | |
losses = 0.0 | |
iters = 0 | |
model.reset_states() | |
for step, (x, y) in enumerate(reader.ptb_iterator(data, batch_size, num_steps)): | |
y = to_categorical(y, nb_classes=vocab_size) | |
loss = model.test_on_batch(x, y) | |
losses += loss | |
iters += num_steps | |
return np.exp(losses / iters) | |
def uniform(scale=0.05): | |
def init(shape, name=None): | |
return K.variable(np.random.uniform(low=-scale, high=scale, size=shape), | |
name=name) | |
return init | |
@click.command() | |
@click.option('--size', default='small') | |
@click.option('--data_path', default='data/simple-examples/data') | |
def main(size, data_path): | |
raw_data = reader.ptb_raw_data(data_path) | |
word_to_id, id_to_word, train_data, valid_data, test_data = raw_data | |
config = get_config(size) | |
batch_size = config.batch_size | |
num_steps = config.num_steps | |
vocab_size = config.vocab_size | |
epoch_size = ((len(train_data) // config.batch_size) - 1) // config.num_steps | |
model = get_model(epoch_size, config) | |
with open('vocab.bin', 'wb') as f: | |
pickle.dump(word_to_id, f) | |
print('Training with {} size'.format(size)) | |
with open('checkpoints/prb_word_lm_{}_architecture.json'.format(size), 'w') as f: | |
f.write(model.to_json()) | |
# train | |
for i in range(config.max_max_epoch): | |
# print("Epoch: {} Learning rate: {}".format(i + 1, model.optimizer.get_lr())) | |
train_perplexity = run_epoch(train_data, model, batch_size, num_steps, vocab_size) | |
print('Epoch: {} Train Perplexity: {:.3f}'.format( | |
i + 1, train_perplexity)) | |
valid_perplexity = run_test_epoch(valid_data, model, batch_size, num_steps, vocab_size) | |
print('Epoch: {} Valid Perplexity: {:.3f}'.format( | |
i + 1, valid_perplexity)) | |
print('save weights ...') | |
model.save_weights('checkpoints/prb_word_lm_{}_{}_{}.h5'.format( | |
size, i, valid_perplexity)) | |
test_perplexity = run_test_epoch(test_data, model, batch_size, num_steps, vocab_size) | |
print('Test Perplexity: {:.3f}'.format(test_perplexity)) | |
if __name__ == '__main__': | |
main() |
from collections import Counter | |
import numpy as np | |
import os | |
def _read_words(filename): | |
with open(filename) as f: | |
return f.read().replace('\n', '<eos>').split() | |
def _build_vocab(filename): | |
data = _read_words(filename) | |
counter = Counter(data) | |
count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0])) | |
words, _ = list(zip(*count_pairs)) | |
word_to_id = dict(zip(words, range(len(words)))) | |
id_to_word = dict((i, v) for v, i in word_to_id.items()) | |
return word_to_id, id_to_word | |
def _file_to_word_ids(filename, word_to_id): | |
data = _read_words(filename) | |
return [word_to_id[word] for word in data] | |
def ptb_raw_data(data_path): | |
train_path = os.path.join(data_path, 'ptb.train.txt') | |
valid_path = os.path.join(data_path, 'ptb.valid.txt') | |
test_path = os.path.join(data_path, 'ptb.test.txt') | |
word_to_id, id_to_word = _build_vocab(train_path) | |
train_data = _file_to_word_ids(train_path, word_to_id) | |
valid_data = _file_to_word_ids(valid_path, word_to_id) | |
test_data = _file_to_word_ids(test_path, word_to_id) | |
return word_to_id, id_to_word, train_data, valid_data, test_data | |
def ptb_iterator(raw_data, batch_size, num_steps): | |
raw_data = np.array(raw_data, dtype=np.int32) | |
data_len = len(raw_data) | |
batch_len = data_len // batch_size | |
data = np.zeros([batch_size, batch_len], dtype=np.int32) | |
for i in range(batch_size): | |
data[i] = raw_data[batch_len * i:batch_len * (i + 1)] | |
epoch_size = (batch_len - 1) // num_steps | |
for i in range(epoch_size): | |
x = data[:, i*num_steps:(i+1)*num_steps] | |
y = data[:, i*num_steps+1:(i+1)*num_steps+1] | |
yield (x, y) |
click==6.6 | |
h5py==2.6.0 | |
Keras==1.0.4 | |
numpy==1.11.0 | |
PyYAML==3.11 | |
scipy==0.17.1 | |
six==1.10.0 | |
Theano==0.8.2 |
from keras.models import model_from_json | |
from keras import initializations | |
from optimizer import PtbSGD | |
from config import get_config | |
from ptb_word_lm import uniform | |
from keras.preprocessing.sequence import pad_sequences | |
import numpy as np | |
import click | |
import reader | |
import pickle | |
import sys | |
def sample(a, temperature=1.0): | |
# helper function to sample an index from a probability array | |
a = np.log(a) / temperature | |
a = np.exp(a) / np.sum(np.exp(a)) | |
return np.argmax(np.random.multinomial(1, a, 1)) | |
@click.command() | |
@click.option('--size', default='small') | |
@click.option('--architecture') | |
@click.option('--weights') | |
@click.option('--seed_text', default='nonexecutive director of') | |
@click.option('--length', default=20) | |
def main(size, architecture, weights, seed_text, length): | |
config = get_config(size) | |
initializations.init = uniform(config.init_scale) | |
# custom_objects = {'init': uniform(config.init_scale)} | |
with open(architecture) as f: | |
# model = model_from_json(f.read(), custom_objects) | |
model = model_from_json(f.read()) | |
model.load_weights(weights) | |
optimizer = PtbSGD(lr=config.learning_rate, | |
decay=config.lr_decay, | |
clipnorm=config.max_grad_norm, | |
epoch_size=10, # dummy | |
max_epoch=config.max_epoch) | |
model.compile(loss='categorical_crossentropy', optimizer=optimizer) | |
with open('vocab.bin', 'rb') as f: | |
word_to_id = pickle.load(f) | |
id_to_word = {} | |
for c, i in word_to_id.items(): | |
id_to_word[i] = c | |
sys.stdout.write(seed_text + ' ') | |
sentence = [word_to_id[word] for word in seed_text.split(' ')] | |
for i in range(length): | |
# TODO batch_size分用意する必要ないっしょ | |
preds = model.predict(pad_sequences([sentence] * config.batch_size, | |
maxlen=config.num_steps))[0] | |
next_index = sample(preds, 1.5) | |
next_word = id_to_word[next_index] | |
sentence = sentence[1:] + next_index | |
sys.stdout.write((next_word if next_word != '<eos>' else '.') + ' ') | |
sys.stdout.flush() | |
print() | |
if __name__ == '__main__': | |
main() | |
Greetings,
please correct me If I am wrong: line 51 in ptb_word_lm.py (model.add(LSTM(return_sequences=False, **lstm_parameters))) should be
model.add(LSTM(return_sequences=True, **lstm_parameters))
When I ran ptb_word_lm.py, there is an error:
Traceback (most recent call last):
File "/Volumes/work/RNN-Models/Tensorflow's PTB LSTM model for keras/ptb_word_lm.py", line 155, in
main()
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/click/core.py", line 1130, in call
return self.main(*args, **kwargs)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/click/core.py", line 1055, in main
rv = self.invoke(ctx)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/click/core.py", line 1404, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/click/core.py", line 760, in invoke
return __callback(*args, **kwargs)
File "/Volumes/work/RNN-Models/Tensorflow's PTB LSTM model for keras/ptb_word_lm.py", line 128, in main
model = get_model(epoch_size, config)
File "/Volumes/work/RNN-Models/Tensorflow's PTB LSTM model for keras/ptb_word_lm.py", line 47, in get_model
model.add(LSTM(return_sequences=True, **lstm_parameters))
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/keras/src/layers/rnn/lstm.py", line 562, in init
super().init(
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/keras/src/layers/rnn/dropout_rnn_cell_mixin.py", line 43, in init
super().init(*args, **kwargs)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/keras/src/layers/rnn/base_rnn.py", line 271, in init
super().init(**kwargs)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/tensorflow/python/trackable/base.py", line 204, in _method_wrapper
result = method(self, *args, **kwargs)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/keras/src/engine/base_layer.py", line 3820, in init
super().init(**kwargs)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/tensorflow/python/trackable/base.py", line 204, in _method_wrapper
result = method(self, *args, **kwargs)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/keras/src/engine/base_layer.py", line 340, in init
generic_utils.validate_kwargs(kwargs, allowed_kwargs)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/keras/src/utils/generic_utils.py", line 514, in validate_kwargs
raise TypeError(error_message, kwarg)
TypeError: ('Keyword argument not understood:', 'consume_less')
@p-baleine Hello. Thanks for posting this implementation in Keras. I can't replicate your results for the metrics. The perplexity numbers I'm getting are way different. Maybe this needs a custom Keras layer for
tf.contrib.seq2seq.sequence_loss
per original Tensorflow implementation:I ran it with Tensorflow backend (I tried Theano and was getting the same thing though). I'm getting these numbers:
Test I got data out of bounds error.
Are you able to still run this code with Keras 2.0.8 (or at least some recent version of Keras) and TF 1.2.1 or above? I had to change a few minor things to get it to work because of parameters being renamed and change this:
Using CuDNNLSTM (in Keras 2.0.9) this runs significantly faster.