marekgalovic · November 18, 2019 23:46
diff --git a/conv_dnli.py b/conv_dnli.py
 import tensorflow as tf
 import numpy as np

 class ConvolutionalAttentionNLI(object):
    
    def __init__(self, embeddings_shape, target_classes=2, conv_filter_size=3, conv_projection_size=300, attention_output_size=200, comparison_output_size=100, learning_rate=0.05):
        self._embeddings_shape = embeddings_shape
        self._target_classes = target_classes
        self._conv_filter_size = conv_filter_size
        self._conv_projection_size = conv_projection_size
        self._attention_output_size = attention_output_size
        self._comparison_output_size = comparison_output_size
        self._learning_rate = learning_rate
        
        self._build_graph()
        
    def _build_graph(self):
        self.graph = tf.Graph()
        with self.graph.as_default():
            self._init_embeddings()
            self._init_placeholders()
            self._embeddings_lookup()
            self._convolutional_layer()
            self._attention_layer()
            self._comparison_layer()
            self._aggregation_layer()
            self._classification_layer()
            self._init_optimizer()
            
            self.metrics = tf.summary.merge_all()
            self.saver = tf.train.Saver(max_to_keep=None)
        
    def _init_embeddings(self):
        self._embeddings = tf.Variable(tf.zeros(self._embeddings_shape), name='word_embeddings', trainable=False)
        self.embeddings_placeholder = tf.placeholder(tf.float32, self._embeddings_shape)
        self.embeddings_init_op = self._embeddings.assign(self.embeddings_placeholder)
    
    def _init_placeholders(self):
        self.X1 = tf.placeholder(tf.int32, [None, None])
        self.X2 = tf.placeholder(tf.int32, [None, None])
        
        self.X1_len = tf.placeholder(tf.int32, [None])
        self.X2_len = tf.placeholder(tf.int32, [None])
        
        self.targets = tf.placeholder(tf.int64, [None])
        self._targets_onehot = tf.one_hot(self.targets, self._target_classes)
        
        self.is_training = tf.placeholder(tf.bool)
        self.dropout = tf.placeholder(tf.float32)
    
    def _embeddings_lookup(self):
        with tf.name_scope('embeddings_lookup'):
            self._X1_embedded = tf.nn.embedding_lookup(self._embeddings, self.X1)
            self._X2_embedded = tf.nn.embedding_lookup(self._embeddings, self.X2)
    
    def _conv_pad(self, values):
        with tf.name_scope('convolutional_padding'):
            pad = tf.zeros([tf.shape(self.X1)[0], 1, self._embeddings_shape[1]])

            return tf.concat([pad, values, pad], 1)
            
    def _convolutional_layer(self):
        with tf.name_scope('convolutional_layer'):
            X1_conv_1 = tf.layers.conv1d(
                self._conv_pad(self._X1_embedded),
                self._conv_projection_size,
                self._conv_filter_size,
                padding='valid',
                use_bias=False,
                name='conv_1',
            )
            
            X2_conv_1 = tf.layers.conv1d(
                self._conv_pad(self._X2_embedded),
                self._conv_projection_size,
                self._conv_filter_size,
                padding='valid',
                use_bias=False,
                name='conv_1',
                reuse=True
            )
            
            X1_conv_1 = tf.layers.dropout(X1_conv_1, rate=self.dropout, training=self.is_training)
            X2_conv_1 = tf.layers.dropout(X2_conv_1, rate=self.dropout, training=self.is_training)
            
            X1_conv_2 = tf.layers.conv1d(
                self._conv_pad(X1_conv_1),
                self._conv_projection_size,
                self._conv_filter_size,
                padding='valid',
                use_bias=False,
                name='conv_2',
            )
            
            X2_conv_2 = tf.layers.conv1d(
                self._conv_pad(X2_conv_1),
                self._conv_projection_size,
                self._conv_filter_size,
                padding='valid',
                use_bias=False,
                name='conv_2',
                reuse=True
            )
            
            self._X1_conv = tf.layers.dropout(X1_conv_2, rate=self.dropout, training=self.is_training)
            self._X2_conv = tf.layers.dropout(X2_conv_2, rate=self.dropout, training=self.is_training)

    def _attention_layer(self):
        with tf.name_scope('attention_layer'):
            e_X1 = tf.layers.dense(self._X1_conv, self._attention_output_size, activation=tf.nn.relu, name='attention_nn')
            e_X2 = tf.layers.dense(self._X2_conv, self._attention_output_size, activation=tf.nn.relu, name='attention_nn', reuse=True)
            
            e = tf.matmul(e_X1, e_X2, transpose_b=True, name='e')
            
            self._beta = tf.matmul(self._masked_softmax(e, self.X2_len), self._X2_conv, name='beta')
            self._alpha = tf.matmul(self._masked_softmax(tf.transpose(e, [0,2,1]), self.X1_len), self._X1_conv, name='alpha')
    
    def _masked_softmax(self, values, lengths):
        with tf.name_scope('MaskedSoftmax'):
            mask = tf.expand_dims(tf.sequence_mask(lengths, tf.reduce_max(lengths), dtype=tf.float32), -2)
    
            inf_mask = (1 - mask) * -np.inf
            inf_mask = tf.where(tf.is_nan(inf_mask), tf.zeros_like(inf_mask), inf_mask)

            return tf.nn.softmax(tf.multiply(values, mask) + inf_mask)
    
    def _comparison_layer(self):
        with tf.name_scope('comparison_layer'):
            X1_comp = tf.layers.dense(
                tf.concat([self._X1_conv, self._beta], 2),
                self._comparison_output_size,
                activation=tf.nn.relu,
                name='comparison_nn'
            )
            self._X1_comp = tf.multiply(
                tf.layers.dropout(X1_comp, rate=self.dropout, training=self.is_training),
                tf.expand_dims(tf.sequence_mask(self.X1_len, tf.reduce_max(self.X1_len), dtype=tf.float32), -1)
            )
            
            X2_comp = tf.layers.dense(
                tf.concat([self._X2_conv, self._alpha], 2),
                self._comparison_output_size,
                activation=tf.nn.relu,
                name='comparison_nn',
                reuse=True
            )
            self._X2_comp = tf.multiply(
                tf.layers.dropout(X2_comp, rate=self.dropout, training=self.is_training),
                tf.expand_dims(tf.sequence_mask(self.X2_len, tf.reduce_max(self.X2_len), dtype=tf.float32), -1)
            )
        
    def _aggregation_layer(self):
        with tf.name_scope('aggregation_layer'):
            X1_agg = tf.reduce_sum(self._X1_comp, 1)
            X2_agg = tf.reduce_sum(self._X2_comp, 1)
            
            self._agg = tf.concat([X1_agg, X2_agg], 1)
    
    def _classification_layer(self):
        with tf.name_scope('classifier'):
            L1 = tf.layers.dropout(
                tf.layers.dense(self._agg, 100, activation=tf.nn.relu, name='L1'),
                rate=self.dropout, training=self.is_training
            )
            
            self.y = tf.layers.dense(L1, self._target_classes, activation=tf.nn.softmax, name='y')
            
            tf.summary.histogram('y', self.y)
    
    def _init_optimizer(self):
        self.loss = tf.losses.softmax_cross_entropy(self._targets_onehot, self.y)
        self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.targets, tf.argmax(self.y, 1)), tf.float32))
        
        self.optimizer = tf.train.AdagradOptimizer(learning_rate=self._learning_rate).minimize(self.loss)
        
        tf.summary.scalar('loss', self.loss)
        tf.summary.scalar('accuracy', self.accuracy)
diff --git a/dnli.py b/dnli.py
 import tensorflow as tf
 import numpy as np

 class DecomposableNLI(object):
    
    def __init__(self, embeddings_shape, target_classes=2, attention_output_size=200, comparison_output_size=100, learning_rate=0.0001):
        self._embeddings_shape = embeddings_shape
        self._target_classes = target_classes
        self._attention_output_size = attention_output_size
        self._comparison_output_size = comparison_output_size
        self._learning_rate = learning_rate
        
        self._build_graph()
        
    def _build_graph(self):
        self.graph = tf.Graph()
        with self.graph.as_default():
            self._init_embeddings()
            self._init_placeholders()
            self._embeddings_lookup()
            self._attention_layer()
            self._comparison_layer()
            self._aggregation_layer()
            self._classification_layer()
            self._init_optimizer()
            
            self.metrics = tf.summary.merge_all()
            self.saver = tf.train.Saver(max_to_keep=None)
        
    def _init_embeddings(self):
        self._embeddings = tf.Variable(tf.zeros(self._embeddings_shape), name='word_embeddings', trainable=False)
        self.embeddings_placeholder = tf.placeholder(tf.float32, self._embeddings_shape)
        self.embeddings_init_op = self._embeddings.assign(self.embeddings_placeholder)
    
    def _init_placeholders(self):
        self.X1 = tf.placeholder(tf.int32, [None, None])
        self.X2 = tf.placeholder(tf.int32, [None, None])
        
        self.X1_len = tf.placeholder(tf.int32, [None])
        self.X2_len = tf.placeholder(tf.int32, [None])
        
        self.targets = tf.placeholder(tf.int64, [None])
        self._targets_onehot = tf.one_hot(self.targets, self._target_classes)
        
        self.is_training = tf.placeholder(tf.bool)
        self.dropout = tf.placeholder(tf.float32)
        
    def _embeddings_lookup(self):
        with tf.name_scope('embeddings_lookup'):
            self._X1_embedded = tf.nn.embedding_lookup(self._embeddings, self.X1)
            self._X2_embedded = tf.nn.embedding_lookup(self._embeddings, self.X2)

    def _attention_layer(self):
        with tf.name_scope('attention_layer'):
            e_X1 = tf.layers.dense(self._X1_embedded, self._attention_output_size, activation=tf.nn.relu, name='attention_nn')
            e_X2 = tf.layers.dense(self._X2_embedded, self._attention_output_size, activation=tf.nn.relu, name='attention_nn', reuse=True)
            
            e = tf.matmul(e_X1, e_X2, transpose_b=True, name='e')
            
            self._beta = tf.matmul(self._masked_softmax(e, self.X2_len), self._X2_embedded, name='beta')
            self._alpha = tf.matmul(self._masked_softmax(tf.transpose(e, [0,2,1]), self.X1_len), self._X1_embedded, name='alpha')
    
    def _masked_softmax(self, values, lengths):
        with tf.name_scope('MaskedSoftmax'):
            mask = tf.expand_dims(tf.sequence_mask(lengths, tf.reduce_max(lengths), dtype=tf.float32), -2)
    
            inf_mask = (1 - mask) * -np.inf
            inf_mask = tf.where(tf.is_nan(inf_mask), tf.zeros_like(inf_mask), inf_mask)

            return tf.nn.softmax(tf.multiply(values, mask) + inf_mask)
    
    def _comparison_layer(self):
        with tf.name_scope('comparison_layer'):
            X1_comp = tf.layers.dense(
                tf.concat([self._X1_embedded, self._beta], 2),
                self._comparison_output_size,
                activation=tf.nn.relu,
                name='comparison_nn'
            )
            self._X1_comp = tf.multiply(
                tf.layers.dropout(X1_comp, rate=self.dropout, training=self.is_training),
                tf.expand_dims(tf.sequence_mask(self.X1_len, tf.reduce_max(self.X1_len), dtype=tf.float32), -1)
            )
            
            X2_comp = tf.layers.dense(
                tf.concat([self._X2_embedded, self._alpha], 2),
                self._comparison_output_size,
                activation=tf.nn.relu,
                name='comparison_nn',
                reuse=True
            )
            self._X2_comp = tf.multiply(
                tf.layers.dropout(X2_comp, rate=self.dropout, training=self.is_training),
                tf.expand_dims(tf.sequence_mask(self.X2_len, tf.reduce_max(self.X2_len), dtype=tf.float32), -1)
            )
        
    def _aggregation_layer(self):
        with tf.name_scope('aggregation_layer'):
            X1_agg = tf.reduce_sum(self._X1_comp, 1)
            X2_agg = tf.reduce_sum(self._X2_comp, 1)
            
            self._agg = tf.concat([X1_agg, X2_agg], 1)
    
    def _classification_layer(self):
        with tf.name_scope('classifier'):
            L1 = tf.layers.dropout(
                tf.layers.dense(self._agg, 100, activation=tf.nn.relu, name='L1'),
                rate=self.dropout, training=self.is_training
            )
            
            self.y = tf.layers.dense(L1, self._target_classes, activation=tf.nn.softmax, name='y')
            
            tf.summary.histogram('y', self.y)
    
    def _init_optimizer(self):
        self.loss = tf.losses.softmax_cross_entropy(self._targets_onehot, self.y)
        self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.targets, tf.argmax(self.y, 1)), tf.float32))
        
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self._learning_rate).minimize(self.loss)
        
        tf.summary.scalar('loss', self.loss)
        tf.summary.scalar('accuracy', self.accuracy)
	import tensorflow as tf
	import numpy as np

	class ConvolutionalAttentionNLI(object):

	def __init__(self, embeddings_shape, target_classes=2, conv_filter_size=3, conv_projection_size=300, attention_output_size=200, comparison_output_size=100, learning_rate=0.05):
	self._embeddings_shape = embeddings_shape
	self._target_classes = target_classes
	self._conv_filter_size = conv_filter_size
	self._conv_projection_size = conv_projection_size
	self._attention_output_size = attention_output_size
	self._comparison_output_size = comparison_output_size
	self._learning_rate = learning_rate

	self._build_graph()

	def _build_graph(self):
	self.graph = tf.Graph()
	with self.graph.as_default():
	self._init_embeddings()
	self._init_placeholders()
	self._embeddings_lookup()
	self._convolutional_layer()
	self._attention_layer()
	self._comparison_layer()
	self._aggregation_layer()
	self._classification_layer()
	self._init_optimizer()

	self.metrics = tf.summary.merge_all()
	self.saver = tf.train.Saver(max_to_keep=None)

	def _init_embeddings(self):
	self._embeddings = tf.Variable(tf.zeros(self._embeddings_shape), name='word_embeddings', trainable=False)
	self.embeddings_placeholder = tf.placeholder(tf.float32, self._embeddings_shape)
	self.embeddings_init_op = self._embeddings.assign(self.embeddings_placeholder)

	def _init_placeholders(self):
	self.X1 = tf.placeholder(tf.int32, [None, None])
	self.X2 = tf.placeholder(tf.int32, [None, None])

	self.X1_len = tf.placeholder(tf.int32, [None])
	self.X2_len = tf.placeholder(tf.int32, [None])

	self.targets = tf.placeholder(tf.int64, [None])
	self._targets_onehot = tf.one_hot(self.targets, self._target_classes)

	self.is_training = tf.placeholder(tf.bool)
	self.dropout = tf.placeholder(tf.float32)

	def _embeddings_lookup(self):
	with tf.name_scope('embeddings_lookup'):
	self._X1_embedded = tf.nn.embedding_lookup(self._embeddings, self.X1)
	self._X2_embedded = tf.nn.embedding_lookup(self._embeddings, self.X2)

	def _conv_pad(self, values):
	with tf.name_scope('convolutional_padding'):
	pad = tf.zeros([tf.shape(self.X1)[0], 1, self._embeddings_shape[1]])

	return tf.concat([pad, values, pad], 1)

	def _convolutional_layer(self):
	with tf.name_scope('convolutional_layer'):
	X1_conv_1 = tf.layers.conv1d(
	self._conv_pad(self._X1_embedded),
	self._conv_projection_size,
	self._conv_filter_size,
	padding='valid',
	use_bias=False,
	name='conv_1',
	)

	X2_conv_1 = tf.layers.conv1d(
	self._conv_pad(self._X2_embedded),
	self._conv_projection_size,
	self._conv_filter_size,
	padding='valid',
	use_bias=False,
	name='conv_1',
	reuse=True
	)

	X1_conv_1 = tf.layers.dropout(X1_conv_1, rate=self.dropout, training=self.is_training)
	X2_conv_1 = tf.layers.dropout(X2_conv_1, rate=self.dropout, training=self.is_training)

	X1_conv_2 = tf.layers.conv1d(
	self._conv_pad(X1_conv_1),
	self._conv_projection_size,
	self._conv_filter_size,
	padding='valid',
	use_bias=False,
	name='conv_2',
	)

	X2_conv_2 = tf.layers.conv1d(
	self._conv_pad(X2_conv_1),
	self._conv_projection_size,
	self._conv_filter_size,
	padding='valid',
	use_bias=False,
	name='conv_2',
	reuse=True
	)

	self._X1_conv = tf.layers.dropout(X1_conv_2, rate=self.dropout, training=self.is_training)
	self._X2_conv = tf.layers.dropout(X2_conv_2, rate=self.dropout, training=self.is_training)

	def _attention_layer(self):
	with tf.name_scope('attention_layer'):
	e_X1 = tf.layers.dense(self._X1_conv, self._attention_output_size, activation=tf.nn.relu, name='attention_nn')
	e_X2 = tf.layers.dense(self._X2_conv, self._attention_output_size, activation=tf.nn.relu, name='attention_nn', reuse=True)

	e = tf.matmul(e_X1, e_X2, transpose_b=True, name='e')

	self._beta = tf.matmul(self._masked_softmax(e, self.X2_len), self._X2_conv, name='beta')
	self._alpha = tf.matmul(self._masked_softmax(tf.transpose(e, [0,2,1]), self.X1_len), self._X1_conv, name='alpha')

	def _masked_softmax(self, values, lengths):
	with tf.name_scope('MaskedSoftmax'):
	mask = tf.expand_dims(tf.sequence_mask(lengths, tf.reduce_max(lengths), dtype=tf.float32), -2)

	inf_mask = (1 - mask) * -np.inf
	inf_mask = tf.where(tf.is_nan(inf_mask), tf.zeros_like(inf_mask), inf_mask)

	return tf.nn.softmax(tf.multiply(values, mask) + inf_mask)

	def _comparison_layer(self):
	with tf.name_scope('comparison_layer'):
	X1_comp = tf.layers.dense(
	tf.concat([self._X1_conv, self._beta], 2),
	self._comparison_output_size,
	activation=tf.nn.relu,
	name='comparison_nn'
	)
	self._X1_comp = tf.multiply(
	tf.layers.dropout(X1_comp, rate=self.dropout, training=self.is_training),
	tf.expand_dims(tf.sequence_mask(self.X1_len, tf.reduce_max(self.X1_len), dtype=tf.float32), -1)
	)

	X2_comp = tf.layers.dense(
	tf.concat([self._X2_conv, self._alpha], 2),
	self._comparison_output_size,
	activation=tf.nn.relu,
	name='comparison_nn',
	reuse=True
	)
	self._X2_comp = tf.multiply(
	tf.layers.dropout(X2_comp, rate=self.dropout, training=self.is_training),
	tf.expand_dims(tf.sequence_mask(self.X2_len, tf.reduce_max(self.X2_len), dtype=tf.float32), -1)
	)

	def _aggregation_layer(self):
	with tf.name_scope('aggregation_layer'):
	X1_agg = tf.reduce_sum(self._X1_comp, 1)
	X2_agg = tf.reduce_sum(self._X2_comp, 1)

	self._agg = tf.concat([X1_agg, X2_agg], 1)

	def _classification_layer(self):
	with tf.name_scope('classifier'):
	L1 = tf.layers.dropout(
	tf.layers.dense(self._agg, 100, activation=tf.nn.relu, name='L1'),
	rate=self.dropout, training=self.is_training
	)

	self.y = tf.layers.dense(L1, self._target_classes, activation=tf.nn.softmax, name='y')

	tf.summary.histogram('y', self.y)

	def _init_optimizer(self):
	self.loss = tf.losses.softmax_cross_entropy(self._targets_onehot, self.y)
	self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.targets, tf.argmax(self.y, 1)), tf.float32))

	self.optimizer = tf.train.AdagradOptimizer(learning_rate=self._learning_rate).minimize(self.loss)

	tf.summary.scalar('loss', self.loss)
	tf.summary.scalar('accuracy', self.accuracy)
	import tensorflow as tf
	import numpy as np

	class DecomposableNLI(object):

	def __init__(self, embeddings_shape, target_classes=2, attention_output_size=200, comparison_output_size=100, learning_rate=0.0001):
	self._embeddings_shape = embeddings_shape
	self._target_classes = target_classes
	self._attention_output_size = attention_output_size
	self._comparison_output_size = comparison_output_size
	self._learning_rate = learning_rate

	self._build_graph()

	def _build_graph(self):
	self.graph = tf.Graph()
	with self.graph.as_default():
	self._init_embeddings()
	self._init_placeholders()
	self._embeddings_lookup()
	self._attention_layer()
	self._comparison_layer()
	self._aggregation_layer()
	self._classification_layer()
	self._init_optimizer()

	self.metrics = tf.summary.merge_all()
	self.saver = tf.train.Saver(max_to_keep=None)

	def _init_embeddings(self):
	self._embeddings = tf.Variable(tf.zeros(self._embeddings_shape), name='word_embeddings', trainable=False)
	self.embeddings_placeholder = tf.placeholder(tf.float32, self._embeddings_shape)
	self.embeddings_init_op = self._embeddings.assign(self.embeddings_placeholder)

	def _init_placeholders(self):
	self.X1 = tf.placeholder(tf.int32, [None, None])
	self.X2 = tf.placeholder(tf.int32, [None, None])

	self.X1_len = tf.placeholder(tf.int32, [None])
	self.X2_len = tf.placeholder(tf.int32, [None])

	self.targets = tf.placeholder(tf.int64, [None])
	self._targets_onehot = tf.one_hot(self.targets, self._target_classes)

	self.is_training = tf.placeholder(tf.bool)
	self.dropout = tf.placeholder(tf.float32)

	def _embeddings_lookup(self):
	with tf.name_scope('embeddings_lookup'):
	self._X1_embedded = tf.nn.embedding_lookup(self._embeddings, self.X1)
	self._X2_embedded = tf.nn.embedding_lookup(self._embeddings, self.X2)

	def _attention_layer(self):
	with tf.name_scope('attention_layer'):
	e_X1 = tf.layers.dense(self._X1_embedded, self._attention_output_size, activation=tf.nn.relu, name='attention_nn')
	e_X2 = tf.layers.dense(self._X2_embedded, self._attention_output_size, activation=tf.nn.relu, name='attention_nn', reuse=True)

	e = tf.matmul(e_X1, e_X2, transpose_b=True, name='e')

	self._beta = tf.matmul(self._masked_softmax(e, self.X2_len), self._X2_embedded, name='beta')
	self._alpha = tf.matmul(self._masked_softmax(tf.transpose(e, [0,2,1]), self.X1_len), self._X1_embedded, name='alpha')

	def _masked_softmax(self, values, lengths):
	with tf.name_scope('MaskedSoftmax'):
	mask = tf.expand_dims(tf.sequence_mask(lengths, tf.reduce_max(lengths), dtype=tf.float32), -2)

	inf_mask = (1 - mask) * -np.inf
	inf_mask = tf.where(tf.is_nan(inf_mask), tf.zeros_like(inf_mask), inf_mask)

	return tf.nn.softmax(tf.multiply(values, mask) + inf_mask)

	def _comparison_layer(self):
	with tf.name_scope('comparison_layer'):
	X1_comp = tf.layers.dense(
	tf.concat([self._X1_embedded, self._beta], 2),
	self._comparison_output_size,
	activation=tf.nn.relu,
	name='comparison_nn'
	)
	self._X1_comp = tf.multiply(
	tf.layers.dropout(X1_comp, rate=self.dropout, training=self.is_training),
	tf.expand_dims(tf.sequence_mask(self.X1_len, tf.reduce_max(self.X1_len), dtype=tf.float32), -1)
	)

	X2_comp = tf.layers.dense(
	tf.concat([self._X2_embedded, self._alpha], 2),
	self._comparison_output_size,
	activation=tf.nn.relu,
	name='comparison_nn',
	reuse=True
	)
	self._X2_comp = tf.multiply(
	tf.layers.dropout(X2_comp, rate=self.dropout, training=self.is_training),
	tf.expand_dims(tf.sequence_mask(self.X2_len, tf.reduce_max(self.X2_len), dtype=tf.float32), -1)
	)

	def _aggregation_layer(self):
	with tf.name_scope('aggregation_layer'):
	X1_agg = tf.reduce_sum(self._X1_comp, 1)
	X2_agg = tf.reduce_sum(self._X2_comp, 1)

	self._agg = tf.concat([X1_agg, X2_agg], 1)

	def _classification_layer(self):
	with tf.name_scope('classifier'):
	L1 = tf.layers.dropout(
	tf.layers.dense(self._agg, 100, activation=tf.nn.relu, name='L1'),
	rate=self.dropout, training=self.is_training
	)

	self.y = tf.layers.dense(L1, self._target_classes, activation=tf.nn.softmax, name='y')

	tf.summary.histogram('y', self.y)

	def _init_optimizer(self):
	self.loss = tf.losses.softmax_cross_entropy(self._targets_onehot, self.y)
	self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.targets, tf.argmax(self.y, 1)), tf.float32))

	self.optimizer = tf.train.AdamOptimizer(learning_rate=self._learning_rate).minimize(self.loss)

	tf.summary.scalar('loss', self.loss)
	tf.summary.scalar('accuracy', self.accuracy)