GzuPark · March 13, 2017 02:48 · GzuPark · Mar 13, 2017
diff --git a/tf_seq2seq.py b/tf_seq2seq.py
 '''
 # Environment
 # 1. HW: Intel i7-4770 RAM 32G, NVIDIA GeForce GTX 960 RAM 4G
 # 2. SW: Windows10 x64, Pytion 3.5.2, TensorFlow-gpu 1.0.1
 #
 # Reference
 # 1. TensorFlow tutorials   : https://www.tensorflow.org/tutorials/seq2seq
 # 2. RNN Encoder–Decoder    : https://arxiv.org/pdf/1406.1078.pdf
 # 3. Seq2Seq Learning       : https://arxiv.org/pdf/1409.3215.pdf
 # 4. Practical seq2seq      : http://suriyadeepan.github.io/2016-12-31-practical-seq2seq
 # 5. RNN in TensorFlow      : http://r2rt.com/recurrent-neural-networks-in-tensorflow-ii.html
 # 6. Stanford CS20si        : http://web.stanford.edu/class/cs20si/lectures/slides_11.pdf
 # 7. Stanford CS231n        : http://cs231n.github.io/
 # 8. codes  : https://github.com/hunkim/DeepLearningZeroToAll
 #           : https://github.com/farizrahman4u/seq2seq
 #           : https://github.com/ematvey/tensorflow-seq2seq-tutorials
 #           : https://github.com/tensorflow/models/tree/master/tutorials/rnn
 #           : https://github.com/nicolas-ivanov/tf_seq2seq_chatbot
 #           : https://github.com/sherjilozair/char-rnn-tensorflow
 #
 # TensorFlow, Sequence to Sequence, RNN, Encoder, Decoder, seq2seq, s2s
 '''
 import tensorflow as tf
 import numpy as np
 import matplotlib.pyplot as plt
 tf.set_random_seed(777)     # reporducibility
 np.random.seed(77)          # reproducibility

 # dictionary
 alpha = "abcdefghij"
 char_set = sorted(list(set(alpha)))
 char_dic = {w: i + 2 for i, w in enumerate(char_set)}

 # parameters
 eos = 1
 num_class = len(char_dic)
 input_embedded_size = 32
 hidden_size = 32
 dtype = tf.float32
 learning_rate = 0.001
 epoches = 3001
 test_epoch = 1
 batch_size = 16

 # global parameter on feed_gen
 batch_cnt = 0

 # cumulating loss
 cum_loss = []

 # create data
 data = []
 for _ in range((epoches + test_epoch) * batch_size):
    section = []
    for b in range(batch_size):
        w = np.random.randint(2, num_class + 2, np.random.randint(4, 9)).tolist()
        section.append(w)
    data.append(section)


 # to transform batch data
 def batch_transform(batch_data):
    seq_len = [len(seq) for seq in batch_data]
    b_size = len(batch_data)
    max_seq = max(seq_len)
    inp_batch = np.zeros(shape=[b_size, max_seq], dtype=np.int32)
    for i, seq in enumerate(batch_data):
        for j, val in enumerate(seq):
            inp_batch[i, j] = val

    inp_batch_pivot = inp_batch.swapaxes(0, 1)
    return inp_batch_pivot


 # to generate feed_dict each batches
 def feed_gen():
    global batch_cnt
    batch_data = data[batch_cnt]
    batch_cnt += 1
    enc_input = batch_transform(batch_data)
    dec_target = batch_transform([seq + [eos] for seq in batch_data])
    dec_input = batch_transform([[eos] + seq for seq in batch_data])
    return {encoder_input: enc_input, decoder_input: dec_input, decoder_target: dec_target}


 # to trim padding and end of sentence
 def to_alpha(num2char):
    def trim_zero_eos(trim_data):
        data_len = len(trim_data)
        list.reverse(trim_data)
        for idx, num in enumerate(trim_data):
            if num > 1:
                end_num = data_len - idx
                break
            else:
                end_num = data_len

        revised = []
        list.reverse(trim_data)
        for j in range(end_num):
            revised.append(trim_data[j])
        return revised

    rev_data = trim_zero_eos(num2char)
    rev_len = len(rev_data)
    decoded = str()
    for i in range(rev_len):
        decoded += list(char_dic.keys())[list(char_dic.values()).index(rev_data[i])]
    return decoded


 encoder_input = tf.placeholder(shape=(None, None), dtype=tf.int32, name='encoder_input')
 decoder_target = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_target')
 decoder_input = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_input')

 embedded = tf.Variable(tf.random_uniform([num_class, input_embedded_size], -1., 1.), dtype=dtype)
 encoder_input_embedded = tf.nn.embedding_lookup(embedded, encoder_input)
 decoder_input_embedded = tf.nn.embedding_lookup(embedded, decoder_input)

 # encoder part
 encoder_cell = tf.contrib.rnn.LSTMCell(hidden_size)
 encoder_output, encoder_state = tf.nn.dynamic_rnn(encoder_cell, encoder_input_embedded, dtype=dtype, time_major=True)

 # decoder part
 decoder_cell = tf.contrib.rnn.LSTMCell(hidden_size)
 decoder_output, decoder_state = tf.nn.dynamic_rnn(decoder_cell, decoder_input_embedded, initial_state=encoder_state, 
                                                  dtype=dtype, time_major=True, scope="decoder_session")
 decoder_logit = tf.contrib.layers.linear(decoder_output, num_class)

 # loss
 cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
    labels=tf.one_hot(decoder_target, depth=num_class, dtype=dtype),
    logits=decoder_logit)
 loss = tf.reduce_mean(cross_entropy)

 # optimizer
 train = tf.train.AdamOptimizer(learning_rate).minimize(loss)
 prediction = tf.argmax(decoder_logit, axis=2)

 # session start
 sess = tf.InteractiveSession()
 sess.run(tf.global_variables_initializer())

 # training
 for epoch in range(epoches):
    feed = feed_gen()
    t, l = sess.run([train, loss], feed_dict=feed)
    cum_loss.append(l)
    if epoch == 0 or epoch % 100 == 0:
        print('number of batch:', epoch, '\t,', 'loss:', l)

 # predicting
 for epoch in range(test_epoch):
    feed = feed_gen()
    results = sess.run(prediction, feed_dict=feed)
    for p, (test, result) in enumerate(zip(feed[encoder_input].T, results.T)):
        print(p, ':', to_alpha(test.tolist()), '\t->\t', to_alpha(result.tolist()))
        
 # plotting of cumulative loss
 plt.plot(cum_loss)
 plt.xlabel("Epoches")
 plt.ylabel("Loss")
 fname = "plot_loss_adam_1e-3_epoch" + str(epoches) + ".png"
 plt.savefig(fname)
 plt.show()

 '''
 number of batch: 0      , loss: 2.01557
 number of batch: 100    , loss: 1.09725
 number of batch: 200    , loss: 0.962277
 number of batch: 300    , loss: 0.573369
 number of batch: 400    , loss: 0.700255
 number of batch: 500    , loss: 0.589177

 number of batch: 2600   , loss: 0.67308
 number of batch: 2700   , loss: 0.721901
 number of batch: 2800   , loss: 0.606156
 number of batch: 2900   , loss: 0.646664
 number of batch: 3000   , loss: 0.639117
 0 : hcheffc     ->       hchfff
 1 : agii        ->       agcc
 2 : gfcgg       ->       ggggg
 3 : degf        ->       degf
 4 : hjfbebff    ->       hcbbcfff
 5 : hfcg        ->       ffcg
 6 : ebcje       ->       eccee
 7 : bgah        ->       bgah
 8 : cgeeadaa    ->       eeeaaaaa
 9 : fhgh        ->       fhgh
 10 : hdbfbaah   ->       hdbaaaa
 11 : fjdadf     ->       fddaff
 12 : adjiah     ->       acccah
 13 : abhgdjig   ->       ahhgccgg
 14 : cabb       ->       bbbb
 15 : jhhhc      ->       hhhhc
 '''
	'''
	# Environment
	# 1. HW: Intel i7-4770 RAM 32G, NVIDIA GeForce GTX 960 RAM 4G
	# 2. SW: Windows10 x64, Pytion 3.5.2, TensorFlow-gpu 1.0.1
	#
	# Reference
	# 1. TensorFlow tutorials : https://www.tensorflow.org/tutorials/seq2seq
	# 2. RNN Encoder–Decoder : https://arxiv.org/pdf/1406.1078.pdf
	# 3. Seq2Seq Learning : https://arxiv.org/pdf/1409.3215.pdf
	# 4. Practical seq2seq : http://suriyadeepan.github.io/2016-12-31-practical-seq2seq
	# 5. RNN in TensorFlow : http://r2rt.com/recurrent-neural-networks-in-tensorflow-ii.html
	# 6. Stanford CS20si : http://web.stanford.edu/class/cs20si/lectures/slides_11.pdf
	# 7. Stanford CS231n : http://cs231n.github.io/
	# 8. codes : https://github.com/hunkim/DeepLearningZeroToAll
	# : https://github.com/farizrahman4u/seq2seq
	# : https://github.com/ematvey/tensorflow-seq2seq-tutorials
	# : https://github.com/tensorflow/models/tree/master/tutorials/rnn
	# : https://github.com/nicolas-ivanov/tf_seq2seq_chatbot
	# : https://github.com/sherjilozair/char-rnn-tensorflow
	#
	# TensorFlow, Sequence to Sequence, RNN, Encoder, Decoder, seq2seq, s2s
	'''
	import tensorflow as tf
	import numpy as np
	import matplotlib.pyplot as plt
	tf.set_random_seed(777) # reporducibility
	np.random.seed(77) # reproducibility

	# dictionary
	alpha = "abcdefghij"
	char_set = sorted(list(set(alpha)))
	char_dic = {w: i + 2 for i, w in enumerate(char_set)}

	# parameters
	eos = 1
	num_class = len(char_dic)
	input_embedded_size = 32
	hidden_size = 32
	dtype = tf.float32
	learning_rate = 0.001
	epoches = 3001
	test_epoch = 1
	batch_size = 16

	# global parameter on feed_gen
	batch_cnt = 0

	# cumulating loss
	cum_loss = []

	# create data
	data = []
	for _ in range((epoches + test_epoch) * batch_size):
	section = []
	for b in range(batch_size):
	w = np.random.randint(2, num_class + 2, np.random.randint(4, 9)).tolist()
	section.append(w)
	data.append(section)


	# to transform batch data
	def batch_transform(batch_data):
	seq_len = [len(seq) for seq in batch_data]
	b_size = len(batch_data)
	max_seq = max(seq_len)
	inp_batch = np.zeros(shape=[b_size, max_seq], dtype=np.int32)
	for i, seq in enumerate(batch_data):
	for j, val in enumerate(seq):
	inp_batch[i, j] = val

	inp_batch_pivot = inp_batch.swapaxes(0, 1)
	return inp_batch_pivot


	# to generate feed_dict each batches
	def feed_gen():
	global batch_cnt
	batch_data = data[batch_cnt]
	batch_cnt += 1
	enc_input = batch_transform(batch_data)
	dec_target = batch_transform([seq + [eos] for seq in batch_data])
	dec_input = batch_transform([[eos] + seq for seq in batch_data])
	return {encoder_input: enc_input, decoder_input: dec_input, decoder_target: dec_target}


	# to trim padding and end of sentence
	def to_alpha(num2char):
	def trim_zero_eos(trim_data):
	data_len = len(trim_data)
	list.reverse(trim_data)
	for idx, num in enumerate(trim_data):
	if num > 1:
	end_num = data_len - idx
	break
	else:
	end_num = data_len

	revised = []
	list.reverse(trim_data)
	for j in range(end_num):
	revised.append(trim_data[j])
	return revised

	rev_data = trim_zero_eos(num2char)
	rev_len = len(rev_data)
	decoded = str()
	for i in range(rev_len):
	decoded += list(char_dic.keys())[list(char_dic.values()).index(rev_data[i])]
	return decoded


	encoder_input = tf.placeholder(shape=(None, None), dtype=tf.int32, name='encoder_input')
	decoder_target = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_target')
	decoder_input = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_input')

	embedded = tf.Variable(tf.random_uniform([num_class, input_embedded_size], -1., 1.), dtype=dtype)
	encoder_input_embedded = tf.nn.embedding_lookup(embedded, encoder_input)
	decoder_input_embedded = tf.nn.embedding_lookup(embedded, decoder_input)

	# encoder part
	encoder_cell = tf.contrib.rnn.LSTMCell(hidden_size)
	encoder_output, encoder_state = tf.nn.dynamic_rnn(encoder_cell, encoder_input_embedded, dtype=dtype, time_major=True)

	# decoder part
	decoder_cell = tf.contrib.rnn.LSTMCell(hidden_size)
	decoder_output, decoder_state = tf.nn.dynamic_rnn(decoder_cell, decoder_input_embedded, initial_state=encoder_state,
	dtype=dtype, time_major=True, scope="decoder_session")
	decoder_logit = tf.contrib.layers.linear(decoder_output, num_class)

	# loss
	cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
	labels=tf.one_hot(decoder_target, depth=num_class, dtype=dtype),
	logits=decoder_logit)
	loss = tf.reduce_mean(cross_entropy)

	# optimizer
	train = tf.train.AdamOptimizer(learning_rate).minimize(loss)
	prediction = tf.argmax(decoder_logit, axis=2)

	# session start
	sess = tf.InteractiveSession()
	sess.run(tf.global_variables_initializer())

	# training
	for epoch in range(epoches):
	feed = feed_gen()
	t, l = sess.run([train, loss], feed_dict=feed)
	cum_loss.append(l)
	if epoch == 0 or epoch % 100 == 0:
	print('number of batch:', epoch, '\t,', 'loss:', l)

	# predicting
	for epoch in range(test_epoch):
	feed = feed_gen()
	results = sess.run(prediction, feed_dict=feed)
	for p, (test, result) in enumerate(zip(feed[encoder_input].T, results.T)):
	print(p, ':', to_alpha(test.tolist()), '\t->\t', to_alpha(result.tolist()))

	# plotting of cumulative loss
	plt.plot(cum_loss)
	plt.xlabel("Epoches")
	plt.ylabel("Loss")
	fname = "plot_loss_adam_1e-3_epoch" + str(epoches) + ".png"
	plt.savefig(fname)
	plt.show()

	'''
	number of batch: 0 , loss: 2.01557
	number of batch: 100 , loss: 1.09725
	number of batch: 200 , loss: 0.962277
	number of batch: 300 , loss: 0.573369
	number of batch: 400 , loss: 0.700255
	number of batch: 500 , loss: 0.589177

	number of batch: 2600 , loss: 0.67308
	number of batch: 2700 , loss: 0.721901
	number of batch: 2800 , loss: 0.606156
	number of batch: 2900 , loss: 0.646664
	number of batch: 3000 , loss: 0.639117
	0 : hcheffc -> hchfff
	1 : agii -> agcc
	2 : gfcgg -> ggggg
	3 : degf -> degf
	4 : hjfbebff -> hcbbcfff
	5 : hfcg -> ffcg
	6 : ebcje -> eccee
	7 : bgah -> bgah
	8 : cgeeadaa -> eeeaaaaa
	9 : fhgh -> fhgh
	10 : hdbfbaah -> hdbaaaa
	11 : fjdadf -> fddaff
	12 : adjiah -> acccah
	13 : abhgdjig -> ahhgccgg
	14 : cabb -> bbbb
	15 : jhhhc -> hhhhc
	'''