Skip to content

Instantly share code, notes, and snippets.

@wedesoft
Last active November 6, 2017 10:11
Show Gist options
  • Save wedesoft/d1b902813cc99a25c768ba5696d4b073 to your computer and use it in GitHub Desktop.
Save wedesoft/d1b902813cc99a25c768ba5696d4b073 to your computer and use it in GitHub Desktop.
Long short-term memory (LSTM) Tensorflow implementation training on Shakespeare's work
#!/usr/bin/env python3
import sys
import numpy as np
import tensorflow as tf
class CharVec:
def __init__(self, text):
self.chars = np.array([ord(c) for c in sorted(set(text))])
def vector(self, c):
return np.where(self.chars == ord(c), 1, 0).astype(np.float32)
def index(self, arr):
return ''.join([chr(self.chars[i]) for i in arr])
def __call__(self, x):
return np.array([self.vector(c) for c in x])
def __len__(self):
return len(self.chars)
def source_code():
# http://www.gutenberg.org/cache/epub/100/pg100.txt
with open('shakespeare.txt', 'r') as f:
return f.read()[10462:113402]
if __name__ == '__main__':
txt = source_code()
char_vec = CharVec(txt)
count = 50000
conservative = 1.3
with tf.Session() as sess:
saver = tf.train.import_meta_graph('lstm.meta')
saver.restore(sess, 'lstm')
prob = tf.get_collection('prob')[0]
hnext = tf.get_collection('hnext')[0]
cnext = tf.get_collection('cnext')[0]
h = np.full(hnext.shape, 0.0, dtype=np.float32)
c = np.full(cnext.shape, 0.0, dtype=np.float32)
output = txt[0]
for i in range(count):
sys.stdout.write(output)
context = feed_dict={'x:0': char_vec(output), 'h:0': h, 'c:0': c}
prediction = sess.run(prob, feed_dict=context) ** conservative
idx = np.argwhere(np.cumsum(prediction) >= np.sum(prediction) * np.random.rand())[0]
output = char_vec.index([idx])
h = sess.run(hnext, feed_dict=context)
c = sess.run(cnext, feed_dict=context)
#!/usr/bin/env python3
import pytest
from numpy.testing import assert_array_equal
import tensorflow as tf
import numpy as np
from tqdm import tqdm
class CharVec:
def __init__(self, text):
self.chars = np.array([ord(c) for c in sorted(set(text))])
def vector(self, c):
return np.where(self.chars == ord(c), 1, 0).astype(np.float32)
def index(self, arr):
return ''.join([chr(self.chars[i]) for i in arr])
def __call__(self, x):
return np.array([self.vector(c) for c in x])
def __len__(self):
return len(self.chars)
class LSTM:
def __init__(self, n, n_hidden):
# https://en.wikipedia.org/wiki/Long_short-term_memory
self.x = tf.placeholder(tf.float32, [1, n], name='x')
self.h = tf.placeholder(tf.float32, [1, n_hidden], name='h')
self.c = tf.placeholder(tf.float32, [1, n_hidden], name='c')
self.wf = tf.Variable(tf.random_normal([n, n_hidden], stddev=1.0/n))
self.uf = tf.Variable(tf.random_normal([n_hidden, n_hidden], stddev=1.0/n))
self.bf = tf.Variable(tf.constant(0.0, shape=[n_hidden]))
self.wi = tf.Variable(tf.random_normal([n, n_hidden], stddev=1.0/n))
self.ui = tf.Variable(tf.random_normal([n_hidden, n_hidden], stddev=1.0/n))
self.bi = tf.Variable(tf.constant(0.0, shape=[n_hidden]))
self.wo = tf.Variable(tf.random_normal([n, n_hidden], stddev=1.0/n))
self.uo = tf.Variable(tf.random_normal([n_hidden, n_hidden], stddev=1.0/n))
self.bo = tf.Variable(tf.constant(0.0, shape=[n_hidden]))
self.wc = tf.Variable(tf.random_normal([n, n_hidden], stddev=1.0/n))
self.uc = tf.Variable(tf.random_normal([n_hidden, n_hidden], stddev=1.0/n))
self.bc = tf.Variable(tf.constant(0.0, shape=[n_hidden]))
def theta(self):
return [self.wf, self.uf, self.bf,
self.wi, self.ui, self.bi,
self.wo, self.uo, self.bo,
self.wc, self.uc, self.bc]
def __call__(self, x, h, c):
f = tf.sigmoid(tf.matmul(x, self.wf) + tf.matmul(h, self.uf) + self.bf)
i = tf.sigmoid(tf.matmul(x, self.wi) + tf.matmul(h, self.ui) + self.bi)
o = tf.sigmoid(tf.matmul(x, self.wo) + tf.matmul(h, self.uo) + self.bo)
g = tf.tanh(tf.matmul(x, self.wc) + tf.matmul(h, self.uc) + self.bc)
c_ = f * c + i * g
h_ = o * tf.tanh(c_)
return h_, c_
def safe_log(v):
return tf.log(tf.clip_by_value(v, 1e-10, 1.0))
def source_code():
# http://www.gutenberg.org/cache/epub/100/pg100.txt
with open('shakespeare.txt', 'r') as f:
return f.read()[10462:113402]
if __name__ == '__main__':
txt = source_code()
print(txt[:200])
print('...')
print(txt[-200:])
char_vec = CharVec(txt)
n = len(char_vec)
v = char_vec(txt[0:100])
n_iterations = 100000
m = 50
n_hidden = 100
alpha = 1.0
lstm = LSTM(n, n_hidden)
wy = tf.Variable(tf.random_normal([n_hidden, n], stddev=1.0/n))
by = tf.Variable(tf.constant(0.0, shape=[n]))
x = tf.placeholder(tf.float32, [m, n], name='x')
y = tf.placeholder(tf.float32, [m, n], name='y')
out = lambda h: tf.sigmoid(tf.matmul(h, wy) + by)
theta = lstm.theta() + [wy, by]
cost = 0
error = 0
h_, c_ = lstm.h, lstm.c
for i in range(m):
h_, c_ = lstm(x[i:i+1], h_, c_)
h = out(h_)
cost += -tf.reduce_sum(y[i:i+1] * safe_log(h) + (1 - y[i:i+1]) * safe_log(1 - h)) / m
dtheta = tf.gradients(cost, theta)
step = [tf.assign(value, tf.subtract(value, tf.multiply(alpha, dvalue)))
for value, dvalue in zip(theta, dtheta)]
i = 0
saver = tf.train.Saver()
with tf.Session() as session:
session.run(tf.global_variables_initializer())
j_train = 0.0
progress = tqdm(range(n_iterations))
p = 0
for i in progress:
if p + m >= len(txt):
p = 0
if p == 0:
h = np.zeros((1, n_hidden))
c = np.zeros((1, n_hidden))
train = {x: char_vec(txt[p:p+m]), lstm.c: c, lstm.h: h, y:char_vec(txt[p+1:p+1+m])}
j_train = 0.999 * j_train + 0.001 * session.run(cost, feed_dict=train)
if i % 10 == 0:
progress.set_description('%8.6f' % j_train)
session.run(step, feed_dict=train)
h = session.run(h_, feed_dict=train)
c = session.run(c_, feed_dict=train)
p += m
tf.add_to_collection('hnext', lstm(lstm.x, lstm.h, lstm.c)[0])
tf.add_to_collection('cnext', lstm(lstm.x, lstm.h, lstm.c)[1])
tf.add_to_collection('prob', out(lstm(lstm.x, lstm.h, lstm.c)[0]))
saver.save(session, './lstm')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment