p-baleine · May 16, 2023 12:28 · avolkov1 · Nov 5, 2017 · hoangcuong2011 · Nov 7, 2019
diff --git a/README.md b/README.md
diff --git a/config.py b/config.py
 class SmallConfig(object):
    """Small config."""
    init_scale = 0.1
    learning_rate = 1.0
    max_grad_norm = 5
    num_layers = 2
    num_steps = 20
    hidden_size = 200
    max_epoch = 4
    max_max_epoch = 13
    keep_prob = 1.0
    lr_decay = 0.5
    batch_size = 20
    vocab_size = 10000

 class MediumConfig(object):
    """Medium config."""
    init_scale = 0.05
    learning_rate = 1.0
    max_grad_norm = 5
    num_layers = 2
    num_steps = 35
    hidden_size = 650
    max_epoch = 6
    max_max_epoch = 39
    keep_prob = 0.5
    lr_decay = 0.8
    batch_size = 20
    vocab_size = 10000

 class LargeConfig(object):
    """Large config."""
    init_scale = 0.04
    learning_rate = 1.0
    max_grad_norm = 10
    num_layers = 2
    num_steps = 35
    hidden_size = 1500
    max_epoch = 14
    max_max_epoch = 55
    keep_prob = 0.35
    lr_decay = 1 / 1.15
    batch_size = 20
    vocab_size = 10000

 class TestConfig(object):
    """Tiny config, for testing."""
    init_scale = 0.1
    learning_rate = 1.0
    max_grad_norm = 1
    num_layers = 1
    num_steps = 2
    hidden_size = 2
    max_epoch = 1
    max_max_epoch = 1
    keep_prob = 1.0
    lr_decay = 0.5
    batch_size = 20
    vocab_size = 10000

 def get_config(model):
    if model == 'small':
        return SmallConfig()
    elif model == 'medium':
        return MediumConfig()
    elif model == 'large':
        return LargeConfig()
    elif model == 'test':
        return TestConfig()
    else:
        raise ValueError('Invalid model: {}'.format(model))

diff --git a/LSTM PTB(small).ipynb b/LSTM PTB(small).ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using Theano backend.\n"
     ]
    }
   ],
   "source": [
    "from keras.models import model_from_json\n",
    "from keras import initializations\n",
    "from optimizer import PtbSGD\n",
    "from config import get_config\n",
    "from ptb_word_lm import uniform\n",
    "from keras.preprocessing.sequence import pad_sequences\n",
    "import numpy as np\n",
    "import click\n",
    "import reader\n",
    "import pickle\n",
    "import sys\n",
    "\n",
    "def sample(a, temperature=1.0):\n",
    "    # helper function to sample an index from a probability array\n",
    "    a = np.log(a) / temperature\n",
    "    a = np.exp(a) / np.sum(np.exp(a))\n",
    "    return np.argmax(np.random.multinomial(1, a, 1))\n",
    "\n",
    "size = 'medium'\n",
    "architecture = 'checkpoints/prb_word_lm_medium_architecture.json'\n",
    "weights = 'checkpoints/prb_word_lm_medium_27_132.82246700553142.h5'\n",
    "length = 20\n",
    "\n",
    "config = get_config(size)\n",
    "initializations.init = uniform(config.init_scale)\n",
    "\n",
    "with open(architecture) as f:\n",
    "    model = model_from_json(f.read())\n",
    "\n",
    "model.load_weights(weights)\n",
    "\n",
    "optimizer = PtbSGD(lr=config.learning_rate,\n",
    "                       decay=config.lr_decay,\n",
    "                       clipnorm=config.max_grad_norm,\n",
    "                       epoch_size=10, # dummy\n",
    "                       max_epoch=config.max_epoch)\n",
    "\n",
    "model.compile(loss='categorical_crossentropy', optimizer=optimizer)\n",
    "\n",
    "with open('vocab.bin', 'rb') as f:\n",
    "    word_to_id = pickle.load(f)\n",
    "id_to_word = {}\n",
    "for c, i in word_to_id.items():\n",
    "    id_to_word[i] = c"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def predict(seed_text):\n",
    "    sys.stdout.write(seed_text + ' ')\n",
    "    sentence = [word_to_id[word] for word in seed_text.split(' ')]\n",
    "\n",
    "    for i in range(length):\n",
    "        preds = model.predict(pad_sequences([sentence] * config.batch_size,\n",
    "                                           maxlen=config.num_steps))[0]\n",
    "        next_index = sample(preds, 1.5)\n",
    "        next_word = id_to_word[next_index]\n",
    "        sentence = sentence[1:] + next_index\n",
    "\n",
    "        sys.stdout.write((next_word if next_word != '<eos>' else '.') + ' ')\n",
    "        sys.stdout.flush()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cigarettes with schwarz somewhat in leased a caller towers blacks they speaker reach 's <unk> . thomas that presents while 's . "
     ]
    }
   ],
   "source": [
    "from ipywidgets import widgets\n",
    "from IPython.display import display\n",
    "\n",
    "text = widgets.Text()\n",
    "display(text)\n",
    "\n",
    "def handle_submit(sender):\n",
    "    predict(text.value)\n",
    "\n",
    "text.on_submit(handle_submit)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
diff --git a/Makefile b/Makefile
 train-theano: data/simple-examples checkpoints
 	THEANO_FLAGS=device=gpu,floatX=float32,lib.cnmem=1 KERAS_BACKEND=theano \
 		python ptb_word_lm.py

 train-tensorflow: data/simple-examples checkpoints
 	KERAS_BACKEND=tensorflow python ptb_word_lm.py

 data/simple-examples: data/simple-examples.tgz
 	tar zxvf data/simple-examples.tgz -C data/

 data/simple-examples.tgz: data
 	wget -P data http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz

 data:
 	mkdir data

 checkpoints:
 	mkdir checkpoints

 clean:
 	rm -rf data
diff --git a/optimizer.py b/optimizer.py
 from keras import backend as K
 from keras.optimizers import Optimizer
 import numpy as np

 class PtbSGD(Optimizer):
    def __init__(self, lr=1.0, decay=.5, epoch_size=1000,
                 max_epoch=4, **kwargs):
        super(PtbSGD, self).__init__(**kwargs)
        self.__dict__.update(locals())
        self.iterations = K.variable(0.)
        self.base_lr = K.variable(lr)
        self.lr = K.variable(lr)
        self.decay = K.variable(decay)
        self.epoch_size = K.variable(epoch_size)
        self.max_epoch = K.variable(max_epoch)

    def get_updates(self, params, constraints, loss):
        grads = self.get_gradients(loss, params)
        epoch = self.iterations // self.epoch_size
        decay = K.pow(self.decay, K.switch(epoch - self.max_epoch > 0.,
                                           epoch - self.max_epoch,
                                           K.variable(0.)))
        self.lr = self.base_lr * decay

        self.updates = [(self.iterations, self.iterations + 1.)]
        for p, g in zip(params, grads):
            self.updates.append((p, p - self.lr * g))
        return self.updates

    def get_config(self):
        config = {'base_lr': float(K.get_value(self.base_lr)),
                  'decay': float(K.get_value(self.decay)),
                  'epoch_size': float(K.get_value(self.epoch_size)),
                  'max_epoch': float(K.get_value(self.max_epoch))}
        base_config = super(PtbSGD, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

    def get_lr(self):
        return self.lr.eval()
diff --git a/ptb_word_lm.py b/ptb_word_lm.py
 from keras import backend as K
 from keras.layers import Dense, Activation, Dropout, LSTM
 from keras.layers.embeddings import Embedding
 from keras.models import Sequential
 from keras.utils.np_utils import to_categorical
 from keras.optimizers import SGD
 from optimizer import PtbSGD
 from config import get_config
 import click
 import numpy as np
 import reader
 import time
 import pickle

 def get_model(epoch_size, config):
    """Return the PTB model."""
    batch_size = config.batch_size
    num_steps = config.num_steps
    num_layers = config.num_layers
    size = config.hidden_size
    vocab_size = config.vocab_size
    learning_rate = config.learning_rate
    lr_decay = config.lr_decay
    keep_prob = config.keep_prob
    max_grad_norm = config.max_grad_norm
    max_epoch = config.max_epoch
    max_max_epoch = config.max_max_epoch

    lstm_parameters = {
        "output_dim":size,
        "init":uniform(config.init_scale),
        "inner_init":uniform(config.init_scale),
        "forget_bias_init":"zero",
        "stateful":True,
        "consume_less":"gpu"
    }

    model = Sequential()

    model.add(Embedding(vocab_size, size,
                        batch_input_shape=(batch_size, num_steps)))

    if keep_prob < 1:
        model.add(Dropout(1 - keep_prob))

    for i in range(num_layers - 1):
        model.add(LSTM(return_sequences=True, **lstm_parameters))
        if keep_prob < 1:
            model.add(Dropout(1- keep_prob))

    model.add(LSTM(return_sequences=False, **lstm_parameters))
    if keep_prob < 1:
        model.add(Dropout(1 - keep_prob))

    model.add(Dense(vocab_size))
    model.add(Activation('softmax'))

    optimizer = PtbSGD(lr=learning_rate, decay=lr_decay,
                       clipnorm=max_grad_norm,
                       epoch_size=epoch_size,
                       max_epoch=max_epoch)

    # lr 1だとネットワークが大きい場合にあっという間にperplexityが発散して行っちゃうんだけど？
    # optimizer = SGD(lr=learning_rate, clipnorm=max_grad_norm)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer)

    return model


 def run_epoch(data, model, batch_size, num_steps, vocab_size):
    """Runs the model on the given data."""
    epoch_size = ((len(data) // batch_size) - 1) // num_steps
    start_time = time.time()
    losses = 0.0
    iters = 0

    model.reset_states()

    for step, (x, y) in enumerate(reader.ptb_iterator(data, batch_size, num_steps)):
        y = to_categorical(y, nb_classes=vocab_size)
        loss = model.train_on_batch(x, y)
        losses += loss
        iters += num_steps

        # print(model.optimizer.get_lr())
        print(np.exp(losses / iters))
        if step % (epoch_size // 10) == 10:
            print('{:.3f} perplexity: {:.3f} speed: {:.0f} wps'.format(
                step * 1.0 / epoch_size, np.exp(losses / iters),
                iters * batch_size / (time.time() - start_time)
            ))

    return np.exp(losses / iters)

 def run_test_epoch(data, model, batch_size, num_steps, vocab_size):
    """Tests the model on the given data."""
    epoch_size = ((len(data) // batch_size) - 1) // num_steps
    losses = 0.0
    iters = 0

    model.reset_states()

    for step, (x, y) in enumerate(reader.ptb_iterator(data, batch_size, num_steps)):
        y = to_categorical(y, nb_classes=vocab_size)
        loss = model.test_on_batch(x, y)
        losses += loss
        iters += num_steps

    return np.exp(losses / iters)

 def uniform(scale=0.05):
    def init(shape, name=None):
        return K.variable(np.random.uniform(low=-scale, high=scale, size=shape),
                          name=name)
    return init

 @click.command()
 @click.option('--size', default='small')
 @click.option('--data_path', default='data/simple-examples/data')
 def main(size, data_path):
    raw_data = reader.ptb_raw_data(data_path)
    word_to_id, id_to_word, train_data, valid_data, test_data = raw_data
    config = get_config(size)
    batch_size = config.batch_size
    num_steps = config.num_steps
    vocab_size = config.vocab_size
    epoch_size = ((len(train_data) // config.batch_size) - 1) // config.num_steps
    model = get_model(epoch_size, config)

    with open('vocab.bin', 'wb') as f:
        pickle.dump(word_to_id, f)

    print('Training with {} size'.format(size))

    with open('checkpoints/prb_word_lm_{}_architecture.json'.format(size), 'w') as f:
        f.write(model.to_json())

    # train
    for i in range(config.max_max_epoch):
        # print("Epoch: {} Learning rate: {}".format(i + 1, model.optimizer.get_lr()))
        train_perplexity = run_epoch(train_data, model, batch_size, num_steps, vocab_size)
        print('Epoch: {} Train Perplexity: {:.3f}'.format(
            i + 1, train_perplexity))
        valid_perplexity = run_test_epoch(valid_data, model, batch_size, num_steps, vocab_size)
        print('Epoch: {} Valid Perplexity: {:.3f}'.format(
            i + 1, valid_perplexity))
        print('save weights ...')
        model.save_weights('checkpoints/prb_word_lm_{}_{}_{}.h5'.format(
            size, i, valid_perplexity))

    test_perplexity = run_test_epoch(test_data, model, batch_size, num_steps, vocab_size)
    print('Test Perplexity: {:.3f}'.format(test_perplexity))

 if __name__ == '__main__':
    main()
diff --git a/reader.py b/reader.py
 from collections import Counter
 import numpy as np
 import os

 def _read_words(filename):
    with open(filename) as f:
        return f.read().replace('\n', '<eos>').split()

 def _build_vocab(filename):
    data = _read_words(filename)
    counter = Counter(data)
    count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0]))
    words, _ = list(zip(*count_pairs))
    word_to_id = dict(zip(words, range(len(words))))
    id_to_word = dict((i, v) for v, i in word_to_id.items())
    return word_to_id, id_to_word

 def _file_to_word_ids(filename, word_to_id):
  data = _read_words(filename)
  return [word_to_id[word] for word in data]

 def ptb_raw_data(data_path):
    train_path = os.path.join(data_path, 'ptb.train.txt')
    valid_path = os.path.join(data_path, 'ptb.valid.txt')
    test_path = os.path.join(data_path, 'ptb.test.txt')
    word_to_id, id_to_word = _build_vocab(train_path)
    train_data = _file_to_word_ids(train_path, word_to_id)
    valid_data = _file_to_word_ids(valid_path, word_to_id)
    test_data = _file_to_word_ids(test_path, word_to_id)
    return word_to_id, id_to_word, train_data, valid_data, test_data

 def ptb_iterator(raw_data, batch_size, num_steps):
    raw_data = np.array(raw_data, dtype=np.int32)
    data_len = len(raw_data)
    batch_len = data_len // batch_size
    data = np.zeros([batch_size, batch_len], dtype=np.int32)
    for i in range(batch_size):
        data[i] = raw_data[batch_len * i:batch_len * (i + 1)]
    epoch_size = (batch_len - 1) // num_steps
    for i in range(epoch_size):
        x = data[:, i*num_steps:(i+1)*num_steps]
        y = data[:, i*num_steps+1:(i+1)*num_steps+1]
        yield (x, y)
diff --git a/requirements.txt b/requirements.txt
 click==6.6
 h5py==2.6.0
 Keras==1.0.4
 numpy==1.11.0
 PyYAML==3.11
 scipy==0.17.1
 six==1.10.0
 Theano==0.8.2
diff --git a/sample.py b/sample.py
 from keras.models import model_from_json
 from keras import initializations
 from optimizer import PtbSGD
 from config import get_config
 from ptb_word_lm import uniform
 from keras.preprocessing.sequence import pad_sequences
 import numpy as np
 import click
 import reader
 import pickle
 import sys

 def sample(a, temperature=1.0):
    # helper function to sample an index from a probability array
    a = np.log(a) / temperature
    a = np.exp(a) / np.sum(np.exp(a))
    return np.argmax(np.random.multinomial(1, a, 1))

 @click.command()
 @click.option('--size', default='small')
 @click.option('--architecture')
 @click.option('--weights')
 @click.option('--seed_text', default='nonexecutive director of')
 @click.option('--length', default=20)
 def main(size, architecture, weights, seed_text, length):
    config = get_config(size)
    initializations.init = uniform(config.init_scale)
    # custom_objects = {'init': uniform(config.init_scale)}

    with open(architecture) as f:
        # model = model_from_json(f.read(), custom_objects)
        model = model_from_json(f.read())

    model.load_weights(weights)

    optimizer = PtbSGD(lr=config.learning_rate,
                       decay=config.lr_decay,
                       clipnorm=config.max_grad_norm,
                       epoch_size=10, # dummy
                       max_epoch=config.max_epoch)

    model.compile(loss='categorical_crossentropy', optimizer=optimizer)

    with open('vocab.bin', 'rb') as f:
        word_to_id = pickle.load(f)
    id_to_word = {}
    for c, i in word_to_id.items():
        id_to_word[i] = c

    sys.stdout.write(seed_text + ' ')
    sentence = [word_to_id[word] for word in seed_text.split(' ')]

    for i in range(length):
        # TODO batch_size分用意する必要ないっしょ
        preds = model.predict(pad_sequences([sentence] * config.batch_size,
                                           maxlen=config.num_steps))[0]
        next_index = sample(preds, 1.5)
        next_word = id_to_word[next_index]
        sentence = sentence[1:] + next_index

        sys.stdout.write((next_word if next_word != '<eos>' else '.') + ' ')
        sys.stdout.flush()

    print()

 if __name__ == '__main__':
    main()
	class SmallConfig(object):
	"""Small config."""
	init_scale = 0.1
	learning_rate = 1.0
	max_grad_norm = 5
	num_layers = 2
	num_steps = 20
	hidden_size = 200
	max_epoch = 4
	max_max_epoch = 13
	keep_prob = 1.0
	lr_decay = 0.5
	batch_size = 20
	vocab_size = 10000

	class MediumConfig(object):
	"""Medium config."""
	init_scale = 0.05
	learning_rate = 1.0
	max_grad_norm = 5
	num_layers = 2
	num_steps = 35
	hidden_size = 650
	max_epoch = 6
	max_max_epoch = 39
	keep_prob = 0.5
	lr_decay = 0.8
	batch_size = 20
	vocab_size = 10000

	class LargeConfig(object):
	"""Large config."""
	init_scale = 0.04
	learning_rate = 1.0
	max_grad_norm = 10
	num_layers = 2
	num_steps = 35
	hidden_size = 1500
	max_epoch = 14
	max_max_epoch = 55
	keep_prob = 0.35
	lr_decay = 1 / 1.15
	batch_size = 20
	vocab_size = 10000

	class TestConfig(object):
	"""Tiny config, for testing."""
	init_scale = 0.1
	learning_rate = 1.0
	max_grad_norm = 1
	num_layers = 1
	num_steps = 2
	hidden_size = 2
	max_epoch = 1
	max_max_epoch = 1
	keep_prob = 1.0
	lr_decay = 0.5
	batch_size = 20
	vocab_size = 10000

	def get_config(model):
	if model == 'small':
	return SmallConfig()
	elif model == 'medium':
	return MediumConfig()
	elif model == 'large':
	return LargeConfig()
	elif model == 'test':
	return TestConfig()
	else:
	raise ValueError('Invalid model: {}'.format(model))
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"Using Theano backend.\n"
	]
	}
	],
	"source": [
	"from keras.models import model_from_json\n",
	"from keras import initializations\n",
	"from optimizer import PtbSGD\n",
	"from config import get_config\n",
	"from ptb_word_lm import uniform\n",
	"from keras.preprocessing.sequence import pad_sequences\n",
	"import numpy as np\n",
	"import click\n",
	"import reader\n",
	"import pickle\n",
	"import sys\n",
	"\n",
	"def sample(a, temperature=1.0):\n",
	" # helper function to sample an index from a probability array\n",
	" a = np.log(a) / temperature\n",
	" a = np.exp(a) / np.sum(np.exp(a))\n",
	" return np.argmax(np.random.multinomial(1, a, 1))\n",
	"\n",
	"size = 'medium'\n",
	"architecture = 'checkpoints/prb_word_lm_medium_architecture.json'\n",
	"weights = 'checkpoints/prb_word_lm_medium_27_132.82246700553142.h5'\n",
	"length = 20\n",
	"\n",
	"config = get_config(size)\n",
	"initializations.init = uniform(config.init_scale)\n",
	"\n",
	"with open(architecture) as f:\n",
	" model = model_from_json(f.read())\n",
	"\n",
	"model.load_weights(weights)\n",
	"\n",
	"optimizer = PtbSGD(lr=config.learning_rate,\n",
	" decay=config.lr_decay,\n",
	" clipnorm=config.max_grad_norm,\n",
	" epoch_size=10, # dummy\n",
	" max_epoch=config.max_epoch)\n",
	"\n",
	"model.compile(loss='categorical_crossentropy', optimizer=optimizer)\n",
	"\n",
	"with open('vocab.bin', 'rb') as f:\n",
	" word_to_id = pickle.load(f)\n",
	"id_to_word = {}\n",
	"for c, i in word_to_id.items():\n",
	" id_to_word[i] = c"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"def predict(seed_text):\n",
	" sys.stdout.write(seed_text + ' ')\n",
	" sentence = [word_to_id[word] for word in seed_text.split(' ')]\n",
	"\n",
	" for i in range(length):\n",
	" preds = model.predict(pad_sequences([sentence] * config.batch_size,\n",
	" maxlen=config.num_steps))[0]\n",
	" next_index = sample(preds, 1.5)\n",
	" next_word = id_to_word[next_index]\n",
	" sentence = sentence[1:] + next_index\n",
	"\n",
	" sys.stdout.write((next_word if next_word != '<eos>' else '.') + ' ')\n",
	" sys.stdout.flush()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"cigarettes with schwarz somewhat in leased a caller towers blacks they speaker reach 's <unk> . thomas that presents while 's . "
	]
	}
	],
	"source": [
	"from ipywidgets import widgets\n",
	"from IPython.display import display\n",
	"\n",
	"text = widgets.Text()\n",
	"display(text)\n",
	"\n",
	"def handle_submit(sender):\n",
	" predict(text.value)\n",
	"\n",
	"text.on_submit(handle_submit)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.1"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}
	train-theano: data/simple-examples checkpoints
	THEANO_FLAGS=device=gpu,floatX=float32,lib.cnmem=1 KERAS_BACKEND=theano \
	python ptb_word_lm.py

	train-tensorflow: data/simple-examples checkpoints
	KERAS_BACKEND=tensorflow python ptb_word_lm.py

	data/simple-examples: data/simple-examples.tgz
	tar zxvf data/simple-examples.tgz -C data/

	data/simple-examples.tgz: data
	wget -P data http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz

	data:
	mkdir data

	checkpoints:
	mkdir checkpoints

	clean:
	rm -rf data
	from keras import backend as K
	from keras.optimizers import Optimizer
	import numpy as np

	class PtbSGD(Optimizer):
	def __init__(self, lr=1.0, decay=.5, epoch_size=1000,
	max_epoch=4, **kwargs):
	super(PtbSGD, self).__init__(**kwargs)
	self.__dict__.update(locals())
	self.iterations = K.variable(0.)
	self.base_lr = K.variable(lr)
	self.lr = K.variable(lr)
	self.decay = K.variable(decay)
	self.epoch_size = K.variable(epoch_size)
	self.max_epoch = K.variable(max_epoch)

	def get_updates(self, params, constraints, loss):
	grads = self.get_gradients(loss, params)
	epoch = self.iterations // self.epoch_size
	decay = K.pow(self.decay, K.switch(epoch - self.max_epoch > 0.,
	epoch - self.max_epoch,
	K.variable(0.)))
	self.lr = self.base_lr * decay

	self.updates = [(self.iterations, self.iterations + 1.)]
	for p, g in zip(params, grads):
	self.updates.append((p, p - self.lr * g))
	return self.updates

	def get_config(self):
	config = {'base_lr': float(K.get_value(self.base_lr)),
	'decay': float(K.get_value(self.decay)),
	'epoch_size': float(K.get_value(self.epoch_size)),
	'max_epoch': float(K.get_value(self.max_epoch))}
	base_config = super(PtbSGD, self).get_config()
	return dict(list(base_config.items()) + list(config.items()))

	def get_lr(self):
	return self.lr.eval()
	from keras import backend as K
	from keras.layers import Dense, Activation, Dropout, LSTM
	from keras.layers.embeddings import Embedding
	from keras.models import Sequential
	from keras.utils.np_utils import to_categorical
	from keras.optimizers import SGD
	from optimizer import PtbSGD
	from config import get_config
	import click
	import numpy as np
	import reader
	import time
	import pickle

	def get_model(epoch_size, config):
	"""Return the PTB model."""
	batch_size = config.batch_size
	num_steps = config.num_steps
	num_layers = config.num_layers
	size = config.hidden_size
	vocab_size = config.vocab_size
	learning_rate = config.learning_rate
	lr_decay = config.lr_decay
	keep_prob = config.keep_prob
	max_grad_norm = config.max_grad_norm
	max_epoch = config.max_epoch
	max_max_epoch = config.max_max_epoch

	lstm_parameters = {
	"output_dim":size,
	"init":uniform(config.init_scale),
	"inner_init":uniform(config.init_scale),
	"forget_bias_init":"zero",
	"stateful":True,
	"consume_less":"gpu"
	}

	model = Sequential()

	model.add(Embedding(vocab_size, size,
	batch_input_shape=(batch_size, num_steps)))

	if keep_prob < 1:
	model.add(Dropout(1 - keep_prob))

	for i in range(num_layers - 1):
	model.add(LSTM(return_sequences=True, **lstm_parameters))
	if keep_prob < 1:
	model.add(Dropout(1- keep_prob))

	model.add(LSTM(return_sequences=False, **lstm_parameters))
	if keep_prob < 1:
	model.add(Dropout(1 - keep_prob))

	model.add(Dense(vocab_size))
	model.add(Activation('softmax'))

	optimizer = PtbSGD(lr=learning_rate, decay=lr_decay,
	clipnorm=max_grad_norm,
	epoch_size=epoch_size,
	max_epoch=max_epoch)

	# lr 1だとネットワークが大きい場合にあっという間にperplexityが発散して行っちゃうんだけど？
	# optimizer = SGD(lr=learning_rate, clipnorm=max_grad_norm)
	model.compile(loss='categorical_crossentropy', optimizer=optimizer)

	return model


	def run_epoch(data, model, batch_size, num_steps, vocab_size):
	"""Runs the model on the given data."""
	epoch_size = ((len(data) // batch_size) - 1) // num_steps
	start_time = time.time()
	losses = 0.0
	iters = 0

	model.reset_states()

	for step, (x, y) in enumerate(reader.ptb_iterator(data, batch_size, num_steps)):
	y = to_categorical(y, nb_classes=vocab_size)
	loss = model.train_on_batch(x, y)
	losses += loss
	iters += num_steps

	# print(model.optimizer.get_lr())
	print(np.exp(losses / iters))
	if step % (epoch_size // 10) == 10:
	print('{:.3f} perplexity: {:.3f} speed: {:.0f} wps'.format(
	step * 1.0 / epoch_size, np.exp(losses / iters),
	iters * batch_size / (time.time() - start_time)
	))

	return np.exp(losses / iters)

	def run_test_epoch(data, model, batch_size, num_steps, vocab_size):
	"""Tests the model on the given data."""
	epoch_size = ((len(data) // batch_size) - 1) // num_steps
	losses = 0.0
	iters = 0

	model.reset_states()

	for step, (x, y) in enumerate(reader.ptb_iterator(data, batch_size, num_steps)):
	y = to_categorical(y, nb_classes=vocab_size)
	loss = model.test_on_batch(x, y)
	losses += loss
	iters += num_steps

	return np.exp(losses / iters)

	def uniform(scale=0.05):
	def init(shape, name=None):
	return K.variable(np.random.uniform(low=-scale, high=scale, size=shape),
	name=name)
	return init

	@click.command()
	@click.option('--size', default='small')
	@click.option('--data_path', default='data/simple-examples/data')
	def main(size, data_path):
	raw_data = reader.ptb_raw_data(data_path)
	word_to_id, id_to_word, train_data, valid_data, test_data = raw_data
	config = get_config(size)
	batch_size = config.batch_size
	num_steps = config.num_steps
	vocab_size = config.vocab_size
	epoch_size = ((len(train_data) // config.batch_size) - 1) // config.num_steps
	model = get_model(epoch_size, config)

	with open('vocab.bin', 'wb') as f:
	pickle.dump(word_to_id, f)

	print('Training with {} size'.format(size))

	with open('checkpoints/prb_word_lm_{}_architecture.json'.format(size), 'w') as f:
	f.write(model.to_json())

	# train
	for i in range(config.max_max_epoch):
	# print("Epoch: {} Learning rate: {}".format(i + 1, model.optimizer.get_lr()))
	train_perplexity = run_epoch(train_data, model, batch_size, num_steps, vocab_size)
	print('Epoch: {} Train Perplexity: {:.3f}'.format(
	i + 1, train_perplexity))
	valid_perplexity = run_test_epoch(valid_data, model, batch_size, num_steps, vocab_size)
	print('Epoch: {} Valid Perplexity: {:.3f}'.format(
	i + 1, valid_perplexity))
	print('save weights ...')
	model.save_weights('checkpoints/prb_word_lm_{}_{}_{}.h5'.format(
	size, i, valid_perplexity))

	test_perplexity = run_test_epoch(test_data, model, batch_size, num_steps, vocab_size)
	print('Test Perplexity: {:.3f}'.format(test_perplexity))

	if __name__ == '__main__':
	main()
	from collections import Counter
	import numpy as np
	import os

	def _read_words(filename):
	with open(filename) as f:
	return f.read().replace('\n', '<eos>').split()

	def _build_vocab(filename):
	data = _read_words(filename)
	counter = Counter(data)
	count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0]))
	words, _ = list(zip(*count_pairs))
	word_to_id = dict(zip(words, range(len(words))))
	id_to_word = dict((i, v) for v, i in word_to_id.items())
	return word_to_id, id_to_word

	def _file_to_word_ids(filename, word_to_id):
	data = _read_words(filename)
	return [word_to_id[word] for word in data]

	def ptb_raw_data(data_path):
	train_path = os.path.join(data_path, 'ptb.train.txt')
	valid_path = os.path.join(data_path, 'ptb.valid.txt')
	test_path = os.path.join(data_path, 'ptb.test.txt')
	word_to_id, id_to_word = _build_vocab(train_path)
	train_data = _file_to_word_ids(train_path, word_to_id)
	valid_data = _file_to_word_ids(valid_path, word_to_id)
	test_data = _file_to_word_ids(test_path, word_to_id)
	return word_to_id, id_to_word, train_data, valid_data, test_data

	def ptb_iterator(raw_data, batch_size, num_steps):
	raw_data = np.array(raw_data, dtype=np.int32)
	data_len = len(raw_data)
	batch_len = data_len // batch_size
	data = np.zeros([batch_size, batch_len], dtype=np.int32)
	for i in range(batch_size):
	data[i] = raw_data[batch_len * i:batch_len * (i + 1)]
	epoch_size = (batch_len - 1) // num_steps
	for i in range(epoch_size):
	x = data[:, inum_steps:(i+1)num_steps]
	y = data[:, inum_steps+1:(i+1)num_steps+1]
	yield (x, y)
	click==6.6
	h5py==2.6.0
	Keras==1.0.4
	numpy==1.11.0
	PyYAML==3.11
	scipy==0.17.1
	six==1.10.0
	Theano==0.8.2
	from keras.models import model_from_json
	from keras import initializations
	from optimizer import PtbSGD
	from config import get_config
	from ptb_word_lm import uniform
	from keras.preprocessing.sequence import pad_sequences
	import numpy as np
	import click
	import reader
	import pickle
	import sys

	def sample(a, temperature=1.0):
	# helper function to sample an index from a probability array
	a = np.log(a) / temperature
	a = np.exp(a) / np.sum(np.exp(a))
	return np.argmax(np.random.multinomial(1, a, 1))

	@click.command()
	@click.option('--size', default='small')
	@click.option('--architecture')
	@click.option('--weights')
	@click.option('--seed_text', default='nonexecutive director of')
	@click.option('--length', default=20)
	def main(size, architecture, weights, seed_text, length):
	config = get_config(size)
	initializations.init = uniform(config.init_scale)
	# custom_objects = {'init': uniform(config.init_scale)}

	with open(architecture) as f:
	# model = model_from_json(f.read(), custom_objects)
	model = model_from_json(f.read())

	model.load_weights(weights)

	optimizer = PtbSGD(lr=config.learning_rate,
	decay=config.lr_decay,
	clipnorm=config.max_grad_norm,
	epoch_size=10, # dummy
	max_epoch=config.max_epoch)

	model.compile(loss='categorical_crossentropy', optimizer=optimizer)

	with open('vocab.bin', 'rb') as f:
	word_to_id = pickle.load(f)
	id_to_word = {}
	for c, i in word_to_id.items():
	id_to_word[i] = c

	sys.stdout.write(seed_text + ' ')
	sentence = [word_to_id[word] for word in seed_text.split(' ')]

	for i in range(length):
	# TODO batch_size分用意する必要ないっしょ
	preds = model.predict(pad_sequences([sentence] * config.batch_size,
	maxlen=config.num_steps))[0]
	next_index = sample(preds, 1.5)
	next_word = id_to_word[next_index]
	sentence = sentence[1:] + next_index

	sys.stdout.write((next_word if next_word != '<eos>' else '.') + ' ')
	sys.stdout.flush()

	print()

	if __name__ == '__main__':
	main()