https://github.com/tensorflow/tensorflow/blob/master/tensorflow/models/rnn/ptb/ptb\_word\_lm.py
| config | epochs | train | valid | test |
|---|---|---|---|---|
| small | 13 | 71.43 | 158.90 | 148.77 |
| medium | 39 | 82.01 | 134.07 | 125.39 |
| large |
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/models/rnn/ptb/ptb\_word\_lm.py
| config | epochs | train | valid | test |
|---|---|---|---|---|
| small | 13 | 71.43 | 158.90 | 148.77 |
| medium | 39 | 82.01 | 134.07 | 125.39 |
| large |
| class SmallConfig(object): | |
| """Small config.""" | |
| init_scale = 0.1 | |
| learning_rate = 1.0 | |
| max_grad_norm = 5 | |
| num_layers = 2 | |
| num_steps = 20 | |
| hidden_size = 200 | |
| max_epoch = 4 | |
| max_max_epoch = 13 | |
| keep_prob = 1.0 | |
| lr_decay = 0.5 | |
| batch_size = 20 | |
| vocab_size = 10000 | |
| class MediumConfig(object): | |
| """Medium config.""" | |
| init_scale = 0.05 | |
| learning_rate = 1.0 | |
| max_grad_norm = 5 | |
| num_layers = 2 | |
| num_steps = 35 | |
| hidden_size = 650 | |
| max_epoch = 6 | |
| max_max_epoch = 39 | |
| keep_prob = 0.5 | |
| lr_decay = 0.8 | |
| batch_size = 20 | |
| vocab_size = 10000 | |
| class LargeConfig(object): | |
| """Large config.""" | |
| init_scale = 0.04 | |
| learning_rate = 1.0 | |
| max_grad_norm = 10 | |
| num_layers = 2 | |
| num_steps = 35 | |
| hidden_size = 1500 | |
| max_epoch = 14 | |
| max_max_epoch = 55 | |
| keep_prob = 0.35 | |
| lr_decay = 1 / 1.15 | |
| batch_size = 20 | |
| vocab_size = 10000 | |
| class TestConfig(object): | |
| """Tiny config, for testing.""" | |
| init_scale = 0.1 | |
| learning_rate = 1.0 | |
| max_grad_norm = 1 | |
| num_layers = 1 | |
| num_steps = 2 | |
| hidden_size = 2 | |
| max_epoch = 1 | |
| max_max_epoch = 1 | |
| keep_prob = 1.0 | |
| lr_decay = 0.5 | |
| batch_size = 20 | |
| vocab_size = 10000 | |
| def get_config(model): | |
| if model == 'small': | |
| return SmallConfig() | |
| elif model == 'medium': | |
| return MediumConfig() | |
| elif model == 'large': | |
| return LargeConfig() | |
| elif model == 'test': | |
| return TestConfig() | |
| else: | |
| raise ValueError('Invalid model: {}'.format(model)) | |
| train-theano: data/simple-examples checkpoints | |
| THEANO_FLAGS=device=gpu,floatX=float32,lib.cnmem=1 KERAS_BACKEND=theano \ | |
| python ptb_word_lm.py | |
| train-tensorflow: data/simple-examples checkpoints | |
| KERAS_BACKEND=tensorflow python ptb_word_lm.py | |
| data/simple-examples: data/simple-examples.tgz | |
| tar zxvf data/simple-examples.tgz -C data/ | |
| data/simple-examples.tgz: data | |
| wget -P data http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz | |
| data: | |
| mkdir data | |
| checkpoints: | |
| mkdir checkpoints | |
| clean: | |
| rm -rf data |
| from keras import backend as K | |
| from keras.optimizers import Optimizer | |
| import numpy as np | |
| class PtbSGD(Optimizer): | |
| def __init__(self, lr=1.0, decay=.5, epoch_size=1000, | |
| max_epoch=4, **kwargs): | |
| super(PtbSGD, self).__init__(**kwargs) | |
| self.__dict__.update(locals()) | |
| self.iterations = K.variable(0.) | |
| self.base_lr = K.variable(lr) | |
| self.lr = K.variable(lr) | |
| self.decay = K.variable(decay) | |
| self.epoch_size = K.variable(epoch_size) | |
| self.max_epoch = K.variable(max_epoch) | |
| def get_updates(self, params, constraints, loss): | |
| grads = self.get_gradients(loss, params) | |
| epoch = self.iterations // self.epoch_size | |
| decay = K.pow(self.decay, K.switch(epoch - self.max_epoch > 0., | |
| epoch - self.max_epoch, | |
| K.variable(0.))) | |
| self.lr = self.base_lr * decay | |
| self.updates = [(self.iterations, self.iterations + 1.)] | |
| for p, g in zip(params, grads): | |
| self.updates.append((p, p - self.lr * g)) | |
| return self.updates | |
| def get_config(self): | |
| config = {'base_lr': float(K.get_value(self.base_lr)), | |
| 'decay': float(K.get_value(self.decay)), | |
| 'epoch_size': float(K.get_value(self.epoch_size)), | |
| 'max_epoch': float(K.get_value(self.max_epoch))} | |
| base_config = super(PtbSGD, self).get_config() | |
| return dict(list(base_config.items()) + list(config.items())) | |
| def get_lr(self): | |
| return self.lr.eval() |
| from keras import backend as K | |
| from keras.layers import Dense, Activation, Dropout, LSTM | |
| from keras.layers.embeddings import Embedding | |
| from keras.models import Sequential | |
| from keras.utils.np_utils import to_categorical | |
| from keras.optimizers import SGD | |
| from optimizer import PtbSGD | |
| from config import get_config | |
| import click | |
| import numpy as np | |
| import reader | |
| import time | |
| import pickle | |
| def get_model(epoch_size, config): | |
| """Return the PTB model.""" | |
| batch_size = config.batch_size | |
| num_steps = config.num_steps | |
| num_layers = config.num_layers | |
| size = config.hidden_size | |
| vocab_size = config.vocab_size | |
| learning_rate = config.learning_rate | |
| lr_decay = config.lr_decay | |
| keep_prob = config.keep_prob | |
| max_grad_norm = config.max_grad_norm | |
| max_epoch = config.max_epoch | |
| max_max_epoch = config.max_max_epoch | |
| lstm_parameters = { | |
| "output_dim":size, | |
| "init":uniform(config.init_scale), | |
| "inner_init":uniform(config.init_scale), | |
| "forget_bias_init":"zero", | |
| "stateful":True, | |
| "consume_less":"gpu" | |
| } | |
| model = Sequential() | |
| model.add(Embedding(vocab_size, size, | |
| batch_input_shape=(batch_size, num_steps))) | |
| if keep_prob < 1: | |
| model.add(Dropout(1 - keep_prob)) | |
| for i in range(num_layers - 1): | |
| model.add(LSTM(return_sequences=True, **lstm_parameters)) | |
| if keep_prob < 1: | |
| model.add(Dropout(1- keep_prob)) | |
| model.add(LSTM(return_sequences=False, **lstm_parameters)) | |
| if keep_prob < 1: | |
| model.add(Dropout(1 - keep_prob)) | |
| model.add(Dense(vocab_size)) | |
| model.add(Activation('softmax')) | |
| optimizer = PtbSGD(lr=learning_rate, decay=lr_decay, | |
| clipnorm=max_grad_norm, | |
| epoch_size=epoch_size, | |
| max_epoch=max_epoch) | |
| # lr 1だとネットワークが大きい場合にあっという間にperplexityが発散して行っちゃうんだけど? | |
| # optimizer = SGD(lr=learning_rate, clipnorm=max_grad_norm) | |
| model.compile(loss='categorical_crossentropy', optimizer=optimizer) | |
| return model | |
| def run_epoch(data, model, batch_size, num_steps, vocab_size): | |
| """Runs the model on the given data.""" | |
| epoch_size = ((len(data) // batch_size) - 1) // num_steps | |
| start_time = time.time() | |
| losses = 0.0 | |
| iters = 0 | |
| model.reset_states() | |
| for step, (x, y) in enumerate(reader.ptb_iterator(data, batch_size, num_steps)): | |
| y = to_categorical(y, nb_classes=vocab_size) | |
| loss = model.train_on_batch(x, y) | |
| losses += loss | |
| iters += num_steps | |
| # print(model.optimizer.get_lr()) | |
| print(np.exp(losses / iters)) | |
| if step % (epoch_size // 10) == 10: | |
| print('{:.3f} perplexity: {:.3f} speed: {:.0f} wps'.format( | |
| step * 1.0 / epoch_size, np.exp(losses / iters), | |
| iters * batch_size / (time.time() - start_time) | |
| )) | |
| return np.exp(losses / iters) | |
| def run_test_epoch(data, model, batch_size, num_steps, vocab_size): | |
| """Tests the model on the given data.""" | |
| epoch_size = ((len(data) // batch_size) - 1) // num_steps | |
| losses = 0.0 | |
| iters = 0 | |
| model.reset_states() | |
| for step, (x, y) in enumerate(reader.ptb_iterator(data, batch_size, num_steps)): | |
| y = to_categorical(y, nb_classes=vocab_size) | |
| loss = model.test_on_batch(x, y) | |
| losses += loss | |
| iters += num_steps | |
| return np.exp(losses / iters) | |
| def uniform(scale=0.05): | |
| def init(shape, name=None): | |
| return K.variable(np.random.uniform(low=-scale, high=scale, size=shape), | |
| name=name) | |
| return init | |
| @click.command() | |
| @click.option('--size', default='small') | |
| @click.option('--data_path', default='data/simple-examples/data') | |
| def main(size, data_path): | |
| raw_data = reader.ptb_raw_data(data_path) | |
| word_to_id, id_to_word, train_data, valid_data, test_data = raw_data | |
| config = get_config(size) | |
| batch_size = config.batch_size | |
| num_steps = config.num_steps | |
| vocab_size = config.vocab_size | |
| epoch_size = ((len(train_data) // config.batch_size) - 1) // config.num_steps | |
| model = get_model(epoch_size, config) | |
| with open('vocab.bin', 'wb') as f: | |
| pickle.dump(word_to_id, f) | |
| print('Training with {} size'.format(size)) | |
| with open('checkpoints/prb_word_lm_{}_architecture.json'.format(size), 'w') as f: | |
| f.write(model.to_json()) | |
| # train | |
| for i in range(config.max_max_epoch): | |
| # print("Epoch: {} Learning rate: {}".format(i + 1, model.optimizer.get_lr())) | |
| train_perplexity = run_epoch(train_data, model, batch_size, num_steps, vocab_size) | |
| print('Epoch: {} Train Perplexity: {:.3f}'.format( | |
| i + 1, train_perplexity)) | |
| valid_perplexity = run_test_epoch(valid_data, model, batch_size, num_steps, vocab_size) | |
| print('Epoch: {} Valid Perplexity: {:.3f}'.format( | |
| i + 1, valid_perplexity)) | |
| print('save weights ...') | |
| model.save_weights('checkpoints/prb_word_lm_{}_{}_{}.h5'.format( | |
| size, i, valid_perplexity)) | |
| test_perplexity = run_test_epoch(test_data, model, batch_size, num_steps, vocab_size) | |
| print('Test Perplexity: {:.3f}'.format(test_perplexity)) | |
| if __name__ == '__main__': | |
| main() |
| from collections import Counter | |
| import numpy as np | |
| import os | |
| def _read_words(filename): | |
| with open(filename) as f: | |
| return f.read().replace('\n', '<eos>').split() | |
| def _build_vocab(filename): | |
| data = _read_words(filename) | |
| counter = Counter(data) | |
| count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0])) | |
| words, _ = list(zip(*count_pairs)) | |
| word_to_id = dict(zip(words, range(len(words)))) | |
| id_to_word = dict((i, v) for v, i in word_to_id.items()) | |
| return word_to_id, id_to_word | |
| def _file_to_word_ids(filename, word_to_id): | |
| data = _read_words(filename) | |
| return [word_to_id[word] for word in data] | |
| def ptb_raw_data(data_path): | |
| train_path = os.path.join(data_path, 'ptb.train.txt') | |
| valid_path = os.path.join(data_path, 'ptb.valid.txt') | |
| test_path = os.path.join(data_path, 'ptb.test.txt') | |
| word_to_id, id_to_word = _build_vocab(train_path) | |
| train_data = _file_to_word_ids(train_path, word_to_id) | |
| valid_data = _file_to_word_ids(valid_path, word_to_id) | |
| test_data = _file_to_word_ids(test_path, word_to_id) | |
| return word_to_id, id_to_word, train_data, valid_data, test_data | |
| def ptb_iterator(raw_data, batch_size, num_steps): | |
| raw_data = np.array(raw_data, dtype=np.int32) | |
| data_len = len(raw_data) | |
| batch_len = data_len // batch_size | |
| data = np.zeros([batch_size, batch_len], dtype=np.int32) | |
| for i in range(batch_size): | |
| data[i] = raw_data[batch_len * i:batch_len * (i + 1)] | |
| epoch_size = (batch_len - 1) // num_steps | |
| for i in range(epoch_size): | |
| x = data[:, i*num_steps:(i+1)*num_steps] | |
| y = data[:, i*num_steps+1:(i+1)*num_steps+1] | |
| yield (x, y) |
| click==6.6 | |
| h5py==2.6.0 | |
| Keras==1.0.4 | |
| numpy==1.11.0 | |
| PyYAML==3.11 | |
| scipy==0.17.1 | |
| six==1.10.0 | |
| Theano==0.8.2 |
| from keras.models import model_from_json | |
| from keras import initializations | |
| from optimizer import PtbSGD | |
| from config import get_config | |
| from ptb_word_lm import uniform | |
| from keras.preprocessing.sequence import pad_sequences | |
| import numpy as np | |
| import click | |
| import reader | |
| import pickle | |
| import sys | |
| def sample(a, temperature=1.0): | |
| # helper function to sample an index from a probability array | |
| a = np.log(a) / temperature | |
| a = np.exp(a) / np.sum(np.exp(a)) | |
| return np.argmax(np.random.multinomial(1, a, 1)) | |
| @click.command() | |
| @click.option('--size', default='small') | |
| @click.option('--architecture') | |
| @click.option('--weights') | |
| @click.option('--seed_text', default='nonexecutive director of') | |
| @click.option('--length', default=20) | |
| def main(size, architecture, weights, seed_text, length): | |
| config = get_config(size) | |
| initializations.init = uniform(config.init_scale) | |
| # custom_objects = {'init': uniform(config.init_scale)} | |
| with open(architecture) as f: | |
| # model = model_from_json(f.read(), custom_objects) | |
| model = model_from_json(f.read()) | |
| model.load_weights(weights) | |
| optimizer = PtbSGD(lr=config.learning_rate, | |
| decay=config.lr_decay, | |
| clipnorm=config.max_grad_norm, | |
| epoch_size=10, # dummy | |
| max_epoch=config.max_epoch) | |
| model.compile(loss='categorical_crossentropy', optimizer=optimizer) | |
| with open('vocab.bin', 'rb') as f: | |
| word_to_id = pickle.load(f) | |
| id_to_word = {} | |
| for c, i in word_to_id.items(): | |
| id_to_word[i] = c | |
| sys.stdout.write(seed_text + ' ') | |
| sentence = [word_to_id[word] for word in seed_text.split(' ')] | |
| for i in range(length): | |
| # TODO batch_size分用意する必要ないっしょ | |
| preds = model.predict(pad_sequences([sentence] * config.batch_size, | |
| maxlen=config.num_steps))[0] | |
| next_index = sample(preds, 1.5) | |
| next_word = id_to_word[next_index] | |
| sentence = sentence[1:] + next_index | |
| sys.stdout.write((next_word if next_word != '<eos>' else '.') + ' ') | |
| sys.stdout.flush() | |
| print() | |
| if __name__ == '__main__': | |
| main() | |
Greetings,
please correct me If I am wrong: line 51 in ptb_word_lm.py (model.add(LSTM(return_sequences=False, **lstm_parameters))) should be
model.add(LSTM(return_sequences=True, **lstm_parameters))
When I ran ptb_word_lm.py, there is an error:
Traceback (most recent call last):
File "/Volumes/work/RNN-Models/Tensorflow's PTB LSTM model for keras/ptb_word_lm.py", line 155, in
main()
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/click/core.py", line 1130, in call
return self.main(*args, **kwargs)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/click/core.py", line 1055, in main
rv = self.invoke(ctx)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/click/core.py", line 1404, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/click/core.py", line 760, in invoke
return __callback(*args, **kwargs)
File "/Volumes/work/RNN-Models/Tensorflow's PTB LSTM model for keras/ptb_word_lm.py", line 128, in main
model = get_model(epoch_size, config)
File "/Volumes/work/RNN-Models/Tensorflow's PTB LSTM model for keras/ptb_word_lm.py", line 47, in get_model
model.add(LSTM(return_sequences=True, **lstm_parameters))
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/keras/src/layers/rnn/lstm.py", line 562, in init
super().init(
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/keras/src/layers/rnn/dropout_rnn_cell_mixin.py", line 43, in init
super().init(*args, **kwargs)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/keras/src/layers/rnn/base_rnn.py", line 271, in init
super().init(**kwargs)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/tensorflow/python/trackable/base.py", line 204, in _method_wrapper
result = method(self, *args, **kwargs)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/keras/src/engine/base_layer.py", line 3820, in init
super().init(**kwargs)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/tensorflow/python/trackable/base.py", line 204, in _method_wrapper
result = method(self, *args, **kwargs)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/keras/src/engine/base_layer.py", line 340, in init
generic_utils.validate_kwargs(kwargs, allowed_kwargs)
File "/Users/lugongzheng/miniforge3/envs/python38/lib/python3.8/site-packages/keras/src/utils/generic_utils.py", line 514, in validate_kwargs
raise TypeError(error_message, kwarg)
TypeError: ('Keyword argument not understood:', 'consume_less')
@p-baleine Hello. Thanks for posting this implementation in Keras. I can't replicate your results for the metrics. The perplexity numbers I'm getting are way different. Maybe this needs a custom Keras layer for
tf.contrib.seq2seq.sequence_lossper original Tensorflow implementation:I ran it with Tensorflow backend (I tried Theano and was getting the same thing though). I'm getting these numbers:
Test I got data out of bounds error.
Are you able to still run this code with Keras 2.0.8 (or at least some recent version of Keras) and TF 1.2.1 or above? I had to change a few minor things to get it to work because of parameters being renamed and change this:
Using CuDNNLSTM (in Keras 2.0.9) this runs significantly faster.