Created
December 13, 2016 11:08
-
-
Save petrbel/6c72395490aa90d68d3d68fb6e5747a4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# #!/usr/bin/env python3 | |
# python 3.5 (anaconda) | |
# TF 0.12 RC from https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.12.0rc0-cp35-cp35m-linux_x86_64.whl | |
# $ python -c "import tensorflow; print(tensorflow.__version__)" | |
# 0.12.0-rc0 | |
# OS: RedHat 7.2 | |
# CPU version only, no GPU, no CUDA, no CUDNN | |
import numpy as np | |
import tensorflow as tf | |
from tensorflow.python.ops.rnn_cell import GRUCell | |
BATCH = 5 # batch size | |
MAX_LEN = 10 # max length of the sequence | |
MLP_HIDDEN_DIM = 128 # number of hidden neurons in the MLP | |
EMBEDDING_DIM = 300 # embedding dimension | |
VOCAB_SIZE = 8 # vocabulary size | |
THREADS = 4 # number of threads to be used | |
STD=0.001 # standard deviation of ariable initializers | |
class SimpleSentiment: | |
def __init__(self, adversarial=False, device='/cpu:0'): | |
self.embeddings = tf.get_variable('word_embeddings', | |
initializer=tf.random_uniform([VOCAB_SIZE, EMBEDDING_DIM], -1.0, 1.0)) | |
with tf.variable_scope('sentiment') as scope: | |
with tf.device(device): | |
# Inputs | |
self.text = tf.placeholder(tf.int32, [BATCH, MAX_LEN]) | |
self.text_len = tf.placeholder(tf.int32, [BATCH]) | |
self.sentiment = tf.placeholder(tf.int32, [BATCH]) | |
# Normal loss | |
loss_normal = self._loss(self.text, self.text_len, self.sentiment) | |
# Define the optimizer | |
# Note: I've tried multiple of optimizers and none helped | |
optimizer = tf.train.AdamOptimizer(learning_rate=0.0005) | |
if adversarial: # Define adversarial loss | |
# Let's acces already defined variables | |
scope.reuse_variables() | |
# Gradients of all variable (according to normal loss) | |
gradients = optimizer.compute_gradients(loss_normal) | |
print(len(gradients), gradients) | |
# gradients of the embeddings | |
emb_gradient = optimizer.compute_gradients(loss_normal, [self.embeddings])[0][0] | |
# this how much we want to shift the embeddings, i.e. going "against" the gradient | |
delta = 0.001*tf.sign(emb_gradient) | |
# let's compute the loss once again but this time we add the delta to the embeddings | |
loss_adversarial = self._loss(self.text, self.text_len, self.sentiment, delta) | |
# new gradient of the whole computational graph | |
adversarial_gradients = optimizer.compute_gradients(loss_adversarial) | |
print(len(adversarial_gradients), adversarial_gradients) # everything is None! | |
# Now we compute an average of old and new gradients | |
new_gradients = [((g + ag)/2, vg) for ((g, vg), (ag, avg)) in zip(gradients, adversarial_gradients)] | |
# and apply them | |
self.training = optimizer.apply_gradients(new_gradients) | |
# Btw this doesn't work either | |
# self.training = optimizer.apply_gradients(adversarial_gradients) | |
self.loss_final = (loss_normal + loss_adversarial) / 2 | |
else: # Normal loss | |
# simply minimize according to the gradients | |
self.loss_final = loss_normal | |
self.training = optimizer.minimize(loss_normal) | |
# Create the session | |
self.session = tf.Session(config=tf.ConfigProto(inter_op_parallelism_threads=THREADS, | |
intra_op_parallelism_threads=THREADS, | |
allow_soft_placement=True)) | |
# init everything (still deprecated way) | |
self.session.run(tf.initialize_all_variables()) | |
def _loss(self, text, text_len, sentiment, emb_delta=0.0): | |
# use embedding | |
# note that emb_delta is zero as long as adversarial=False | |
# if adversarial=False then each embedding is shifted by appropriate emb_delta | |
text = tf.nn.embedding_lookup(self.embeddings + emb_delta, text) | |
text = tf.split(1, text.get_shape()[1], text) | |
text = [t[:,0,:] for t in text] | |
# run gru | |
gru_cell = GRUCell(MLP_HIDDEN_DIM) | |
outputs, state = tf.nn.rnn(cell=gru_cell, | |
inputs=text, | |
sequence_length=text_len, | |
dtype=tf.float32) | |
# define MLP | |
W1 = tf.get_variable(name='MLP_W1', | |
shape=[state.get_shape()[1], MLP_HIDDEN_DIM], | |
initializer=tf.random_normal_initializer(mean=0, stddev=STD)) | |
W2 = tf.get_variable(name='MLP_W2', | |
shape=[MLP_HIDDEN_DIM, 2], | |
initializer=tf.random_normal_initializer(mean=0, stddev=STD)) | |
h1 = tf.get_variable(name='MLP_h1', | |
shape=[MLP_HIDDEN_DIM], | |
initializer=tf.random_normal_initializer(mean=0, stddev=STD)) | |
h2 = tf.get_variable(name='MLP_h2', | |
shape=[2], | |
initializer=tf.random_normal_initializer(mean=0, stddev=STD)) | |
# apply MLP of the last GRU state | |
after_first_layer = tf.nn.relu(tf.matmul(state, W1) + h1) | |
logits = tf.matmul(after_first_layer, W2) + h2 | |
# compute loss via categorial cross entropy | |
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits, sentiment)) | |
return loss | |
def main(): | |
net = SimpleSentiment(adversarial=True) | |
for epoch in range(5): | |
print('Epoch {}'.format(epoch)) | |
for batch in range(3): | |
_, loss_final = net.session.run([net.training, net.loss_final], | |
{net.text: np.array([[3, 1, 1, 2, 1, 0, 0, 0, 0, 0], | |
[3, 4, 1, 2, 1, 4, 4, 0, 0, 0], | |
[1, 1, 1, 2, 0, 0, 0, 0, 0, 0], | |
[3, 3, 3, 2, 1, 7, 0, 0, 0, 0], | |
[7, 1, 5, 2, 4, 2, 2, 2, 1, 7]], dtype='int32'), | |
net.text_len: np.array([5, 7, 4, 6, 10], dtype='int32'), | |
net.sentiment: np.array([0, 0, 1, 1, 0], dtype='int32')}) | |
print('\tBatch {}: {}'.format(batch, loss_final)) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment