Last active
November 18, 2019 23:46
-
-
Save marekgalovic/a1a4073b917ae1b18dc7413436794dca to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
import numpy as np | |
class ConvolutionalAttentionNLI(object): | |
def __init__(self, embeddings_shape, target_classes=2, conv_filter_size=3, conv_projection_size=300, attention_output_size=200, comparison_output_size=100, learning_rate=0.05): | |
self._embeddings_shape = embeddings_shape | |
self._target_classes = target_classes | |
self._conv_filter_size = conv_filter_size | |
self._conv_projection_size = conv_projection_size | |
self._attention_output_size = attention_output_size | |
self._comparison_output_size = comparison_output_size | |
self._learning_rate = learning_rate | |
self._build_graph() | |
def _build_graph(self): | |
self.graph = tf.Graph() | |
with self.graph.as_default(): | |
self._init_embeddings() | |
self._init_placeholders() | |
self._embeddings_lookup() | |
self._convolutional_layer() | |
self._attention_layer() | |
self._comparison_layer() | |
self._aggregation_layer() | |
self._classification_layer() | |
self._init_optimizer() | |
self.metrics = tf.summary.merge_all() | |
self.saver = tf.train.Saver(max_to_keep=None) | |
def _init_embeddings(self): | |
self._embeddings = tf.Variable(tf.zeros(self._embeddings_shape), name='word_embeddings', trainable=False) | |
self.embeddings_placeholder = tf.placeholder(tf.float32, self._embeddings_shape) | |
self.embeddings_init_op = self._embeddings.assign(self.embeddings_placeholder) | |
def _init_placeholders(self): | |
self.X1 = tf.placeholder(tf.int32, [None, None]) | |
self.X2 = tf.placeholder(tf.int32, [None, None]) | |
self.X1_len = tf.placeholder(tf.int32, [None]) | |
self.X2_len = tf.placeholder(tf.int32, [None]) | |
self.targets = tf.placeholder(tf.int64, [None]) | |
self._targets_onehot = tf.one_hot(self.targets, self._target_classes) | |
self.is_training = tf.placeholder(tf.bool) | |
self.dropout = tf.placeholder(tf.float32) | |
def _embeddings_lookup(self): | |
with tf.name_scope('embeddings_lookup'): | |
self._X1_embedded = tf.nn.embedding_lookup(self._embeddings, self.X1) | |
self._X2_embedded = tf.nn.embedding_lookup(self._embeddings, self.X2) | |
def _conv_pad(self, values): | |
with tf.name_scope('convolutional_padding'): | |
pad = tf.zeros([tf.shape(self.X1)[0], 1, self._embeddings_shape[1]]) | |
return tf.concat([pad, values, pad], 1) | |
def _convolutional_layer(self): | |
with tf.name_scope('convolutional_layer'): | |
X1_conv_1 = tf.layers.conv1d( | |
self._conv_pad(self._X1_embedded), | |
self._conv_projection_size, | |
self._conv_filter_size, | |
padding='valid', | |
use_bias=False, | |
name='conv_1', | |
) | |
X2_conv_1 = tf.layers.conv1d( | |
self._conv_pad(self._X2_embedded), | |
self._conv_projection_size, | |
self._conv_filter_size, | |
padding='valid', | |
use_bias=False, | |
name='conv_1', | |
reuse=True | |
) | |
X1_conv_1 = tf.layers.dropout(X1_conv_1, rate=self.dropout, training=self.is_training) | |
X2_conv_1 = tf.layers.dropout(X2_conv_1, rate=self.dropout, training=self.is_training) | |
X1_conv_2 = tf.layers.conv1d( | |
self._conv_pad(X1_conv_1), | |
self._conv_projection_size, | |
self._conv_filter_size, | |
padding='valid', | |
use_bias=False, | |
name='conv_2', | |
) | |
X2_conv_2 = tf.layers.conv1d( | |
self._conv_pad(X2_conv_1), | |
self._conv_projection_size, | |
self._conv_filter_size, | |
padding='valid', | |
use_bias=False, | |
name='conv_2', | |
reuse=True | |
) | |
self._X1_conv = tf.layers.dropout(X1_conv_2, rate=self.dropout, training=self.is_training) | |
self._X2_conv = tf.layers.dropout(X2_conv_2, rate=self.dropout, training=self.is_training) | |
def _attention_layer(self): | |
with tf.name_scope('attention_layer'): | |
e_X1 = tf.layers.dense(self._X1_conv, self._attention_output_size, activation=tf.nn.relu, name='attention_nn') | |
e_X2 = tf.layers.dense(self._X2_conv, self._attention_output_size, activation=tf.nn.relu, name='attention_nn', reuse=True) | |
e = tf.matmul(e_X1, e_X2, transpose_b=True, name='e') | |
self._beta = tf.matmul(self._masked_softmax(e, self.X2_len), self._X2_conv, name='beta') | |
self._alpha = tf.matmul(self._masked_softmax(tf.transpose(e, [0,2,1]), self.X1_len), self._X1_conv, name='alpha') | |
def _masked_softmax(self, values, lengths): | |
with tf.name_scope('MaskedSoftmax'): | |
mask = tf.expand_dims(tf.sequence_mask(lengths, tf.reduce_max(lengths), dtype=tf.float32), -2) | |
inf_mask = (1 - mask) * -np.inf | |
inf_mask = tf.where(tf.is_nan(inf_mask), tf.zeros_like(inf_mask), inf_mask) | |
return tf.nn.softmax(tf.multiply(values, mask) + inf_mask) | |
def _comparison_layer(self): | |
with tf.name_scope('comparison_layer'): | |
X1_comp = tf.layers.dense( | |
tf.concat([self._X1_conv, self._beta], 2), | |
self._comparison_output_size, | |
activation=tf.nn.relu, | |
name='comparison_nn' | |
) | |
self._X1_comp = tf.multiply( | |
tf.layers.dropout(X1_comp, rate=self.dropout, training=self.is_training), | |
tf.expand_dims(tf.sequence_mask(self.X1_len, tf.reduce_max(self.X1_len), dtype=tf.float32), -1) | |
) | |
X2_comp = tf.layers.dense( | |
tf.concat([self._X2_conv, self._alpha], 2), | |
self._comparison_output_size, | |
activation=tf.nn.relu, | |
name='comparison_nn', | |
reuse=True | |
) | |
self._X2_comp = tf.multiply( | |
tf.layers.dropout(X2_comp, rate=self.dropout, training=self.is_training), | |
tf.expand_dims(tf.sequence_mask(self.X2_len, tf.reduce_max(self.X2_len), dtype=tf.float32), -1) | |
) | |
def _aggregation_layer(self): | |
with tf.name_scope('aggregation_layer'): | |
X1_agg = tf.reduce_sum(self._X1_comp, 1) | |
X2_agg = tf.reduce_sum(self._X2_comp, 1) | |
self._agg = tf.concat([X1_agg, X2_agg], 1) | |
def _classification_layer(self): | |
with tf.name_scope('classifier'): | |
L1 = tf.layers.dropout( | |
tf.layers.dense(self._agg, 100, activation=tf.nn.relu, name='L1'), | |
rate=self.dropout, training=self.is_training | |
) | |
self.y = tf.layers.dense(L1, self._target_classes, activation=tf.nn.softmax, name='y') | |
tf.summary.histogram('y', self.y) | |
def _init_optimizer(self): | |
self.loss = tf.losses.softmax_cross_entropy(self._targets_onehot, self.y) | |
self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.targets, tf.argmax(self.y, 1)), tf.float32)) | |
self.optimizer = tf.train.AdagradOptimizer(learning_rate=self._learning_rate).minimize(self.loss) | |
tf.summary.scalar('loss', self.loss) | |
tf.summary.scalar('accuracy', self.accuracy) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
import numpy as np | |
class DecomposableNLI(object): | |
def __init__(self, embeddings_shape, target_classes=2, attention_output_size=200, comparison_output_size=100, learning_rate=0.0001): | |
self._embeddings_shape = embeddings_shape | |
self._target_classes = target_classes | |
self._attention_output_size = attention_output_size | |
self._comparison_output_size = comparison_output_size | |
self._learning_rate = learning_rate | |
self._build_graph() | |
def _build_graph(self): | |
self.graph = tf.Graph() | |
with self.graph.as_default(): | |
self._init_embeddings() | |
self._init_placeholders() | |
self._embeddings_lookup() | |
self._attention_layer() | |
self._comparison_layer() | |
self._aggregation_layer() | |
self._classification_layer() | |
self._init_optimizer() | |
self.metrics = tf.summary.merge_all() | |
self.saver = tf.train.Saver(max_to_keep=None) | |
def _init_embeddings(self): | |
self._embeddings = tf.Variable(tf.zeros(self._embeddings_shape), name='word_embeddings', trainable=False) | |
self.embeddings_placeholder = tf.placeholder(tf.float32, self._embeddings_shape) | |
self.embeddings_init_op = self._embeddings.assign(self.embeddings_placeholder) | |
def _init_placeholders(self): | |
self.X1 = tf.placeholder(tf.int32, [None, None]) | |
self.X2 = tf.placeholder(tf.int32, [None, None]) | |
self.X1_len = tf.placeholder(tf.int32, [None]) | |
self.X2_len = tf.placeholder(tf.int32, [None]) | |
self.targets = tf.placeholder(tf.int64, [None]) | |
self._targets_onehot = tf.one_hot(self.targets, self._target_classes) | |
self.is_training = tf.placeholder(tf.bool) | |
self.dropout = tf.placeholder(tf.float32) | |
def _embeddings_lookup(self): | |
with tf.name_scope('embeddings_lookup'): | |
self._X1_embedded = tf.nn.embedding_lookup(self._embeddings, self.X1) | |
self._X2_embedded = tf.nn.embedding_lookup(self._embeddings, self.X2) | |
def _attention_layer(self): | |
with tf.name_scope('attention_layer'): | |
e_X1 = tf.layers.dense(self._X1_embedded, self._attention_output_size, activation=tf.nn.relu, name='attention_nn') | |
e_X2 = tf.layers.dense(self._X2_embedded, self._attention_output_size, activation=tf.nn.relu, name='attention_nn', reuse=True) | |
e = tf.matmul(e_X1, e_X2, transpose_b=True, name='e') | |
self._beta = tf.matmul(self._masked_softmax(e, self.X2_len), self._X2_embedded, name='beta') | |
self._alpha = tf.matmul(self._masked_softmax(tf.transpose(e, [0,2,1]), self.X1_len), self._X1_embedded, name='alpha') | |
def _masked_softmax(self, values, lengths): | |
with tf.name_scope('MaskedSoftmax'): | |
mask = tf.expand_dims(tf.sequence_mask(lengths, tf.reduce_max(lengths), dtype=tf.float32), -2) | |
inf_mask = (1 - mask) * -np.inf | |
inf_mask = tf.where(tf.is_nan(inf_mask), tf.zeros_like(inf_mask), inf_mask) | |
return tf.nn.softmax(tf.multiply(values, mask) + inf_mask) | |
def _comparison_layer(self): | |
with tf.name_scope('comparison_layer'): | |
X1_comp = tf.layers.dense( | |
tf.concat([self._X1_embedded, self._beta], 2), | |
self._comparison_output_size, | |
activation=tf.nn.relu, | |
name='comparison_nn' | |
) | |
self._X1_comp = tf.multiply( | |
tf.layers.dropout(X1_comp, rate=self.dropout, training=self.is_training), | |
tf.expand_dims(tf.sequence_mask(self.X1_len, tf.reduce_max(self.X1_len), dtype=tf.float32), -1) | |
) | |
X2_comp = tf.layers.dense( | |
tf.concat([self._X2_embedded, self._alpha], 2), | |
self._comparison_output_size, | |
activation=tf.nn.relu, | |
name='comparison_nn', | |
reuse=True | |
) | |
self._X2_comp = tf.multiply( | |
tf.layers.dropout(X2_comp, rate=self.dropout, training=self.is_training), | |
tf.expand_dims(tf.sequence_mask(self.X2_len, tf.reduce_max(self.X2_len), dtype=tf.float32), -1) | |
) | |
def _aggregation_layer(self): | |
with tf.name_scope('aggregation_layer'): | |
X1_agg = tf.reduce_sum(self._X1_comp, 1) | |
X2_agg = tf.reduce_sum(self._X2_comp, 1) | |
self._agg = tf.concat([X1_agg, X2_agg], 1) | |
def _classification_layer(self): | |
with tf.name_scope('classifier'): | |
L1 = tf.layers.dropout( | |
tf.layers.dense(self._agg, 100, activation=tf.nn.relu, name='L1'), | |
rate=self.dropout, training=self.is_training | |
) | |
self.y = tf.layers.dense(L1, self._target_classes, activation=tf.nn.softmax, name='y') | |
tf.summary.histogram('y', self.y) | |
def _init_optimizer(self): | |
self.loss = tf.losses.softmax_cross_entropy(self._targets_onehot, self.y) | |
self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.targets, tf.argmax(self.y, 1)), tf.float32)) | |
self.optimizer = tf.train.AdamOptimizer(learning_rate=self._learning_rate).minimize(self.loss) | |
tf.summary.scalar('loss', self.loss) | |
tf.summary.scalar('accuracy', self.accuracy) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment