sub-mod · March 9, 2018 01:26
diff --git a/dcan.py b/dcan.py
 """This file contains some basic model components"""

 import tensorflow as tf
 from tensorflow.python.ops.rnn_cell import DropoutWrapper
 from tensorflow.python.ops import variable_scope as vs
 from tensorflow.python.ops import rnn_cell
 from operator import mul


 class LSTMEncoder(object):

    def __init__(self, hidden_size, keep_prob):
        """
        Inputs:
          hidden_size: int. Hidden size of the RNN
          keep_prob: Tensor containing a single scalar that is the keep probability (for dropout)
        """
        self.hidden_size = hidden_size #200
        self.keep_prob = keep_prob
        self.lstm = tf.nn.rnn_cell.BasicLSTMCell(self.hidden_size, forget_bias=1.0)
        self.lstm = tf.nn.rnn_cell.DropoutWrapper(cell=self.lstm, input_keep_prob=self.keep_prob)
        #self.question_length = 30
        #self.context_length = 600


    def build_graph(self, inputs, masks, type):

        with vs.variable_scope("LSTMEncoder"):
            input_lens = tf.reduce_sum(masks, reduction_indices=1)
            inputs_size=inputs.get_shape().as_list()
            inputs_temp=inputs

            # 1) get encoding from LSTM
            C_or_Q, _ = tf.nn.dynamic_rnn(self.lstm, inputs_temp, sequence_length=input_lens, dtype=tf.float32)


            if type=="question":
                # IF it is Question_hidden
                # Calculate q_dash = tanh(W q + b)
                q_dash = tf.layers.dense(inputs_temp, C_or_Q.get_shape()[2], activation=tf.tanh)
                inputs_temp = q_dash
                sentinel = tf.get_variable("sentinel_q", [1, self.hidden_size], initializer=tf.random_normal_initializer())
            else:
                # IF it is Context_hidden
                sentinel = tf.get_variable("sentinel_c", [1, self.hidden_size], initializer=tf.random_normal_initializer()) # 1,200
                inputs_temp = C_or_Q
            #
            # reshape sentinel
            sentinel = tf.reshape(sentinel, (1, 1, -1)) #1, 1, 200
            # reshape sentinel to add batch
            sentinel = tf.tile(sentinel, (tf.shape(inputs_temp)[0], 1, 1)) #?, 1, 200
            # add sentinel at end
            out = tf.concat([inputs_temp, sentinel], 1) # ?, 601, 200

            out.get_shape().as_list()

            # Apply dropout
            out = tf.nn.dropout(out, self.keep_prob)

            return out


 class CoAttention(object):

    def __init__(self, keep_prob, context_hidden_size, query_hidden_size):
        """
        Inputs:
          keep_prob: tensor containing a single scalar that is the keep probability (for dropout)
          key_vec_size: size of the key vectors. int
          value_vec_size: size of the value vectors. int
        """
        self.keep_prob = keep_prob
        self.context_hidden_size = context_hidden_size
        self.query_hidden_size = query_hidden_size


    def build_graph(self, question_hiddens, context_mask, context_hiddens):

        with vs.variable_scope('Coattention') as scope:
            question_hiddens.get_shape().as_list() #? , 31, 200
            context_hiddens.get_shape().as_list()  #? ,601, 200

            question_length = tf.shape(question_hiddens)[1]
            context_length = tf.shape(context_hiddens)[1]
            keys_dim = tf.shape(context_hiddens)[2]


            Q_tranpose = tf.transpose(question_hiddens, perm=[0, 2, 1]) #?, 200, 31
            # L = D.T * Q = C.T * Q
            L = tf.matmul(context_hiddens, Q_tranpose)  #?, 601, 31

            L_transpose = tf.transpose(L, perm=[0, 2, 1]) #?, 31, 601

            # Q_logits_mask = tf.expand_dims(question_hiddens, 1)
            # C_logits_mask = tf.expand_dims(context_hiddens, 1)# shape (batch_size, 1, num_values)
            # _, attn_dist = masked_softmax(attn_logits, attn_logits_mask, 2) # shape (batch_size, num_keys, num_values). take softmax over values
            #
            # # Use attention distribution to take weighted sum of values
            # output = tf.matmul(attn_dist, values) # shape (batch_size, num_keys, value_vec_size)

            A_D = tf.map_fn(lambda x: tf.nn.softmax(x), L_transpose, dtype=tf.float32) #?, 31, 601

            A_Q = tf.map_fn(lambda x: tf.nn.softmax(x), L, dtype=tf.float32) #?, 601, 31

            C_Q = tf.matmul(tf.transpose(context_hiddens, perm=[0, 2, 1]), A_Q)  #?, 200, 31

            Q_concat_CQ = tf.concat([Q_tranpose, C_Q], axis=1) #?,  400, 31

            C_D = tf.matmul(Q_concat_CQ, A_D) #?, 400, 601

            CO_ATT = tf.concat([context_hiddens, tf.transpose(C_D, perm=[0, 2, 1])], axis=2) #?, 601, 600

            with tf.variable_scope('Coatt_encoder'):
                # LSTM for coattention encoding
                cell_fw = tf.nn.rnn_cell.BasicLSTMCell(self.query_hidden_size, forget_bias=1.0)
                cell_fw = DropoutWrapper(cell_fw, input_keep_prob=self.keep_prob)
                cell_bw = tf.nn.rnn_cell.BasicLSTMCell(self.query_hidden_size, forget_bias=1.0)
                cell_bw = DropoutWrapper(cell_bw, input_keep_prob=self.keep_prob)
                input_lens = tf.reduce_sum(context_mask, reduction_indices=1)
                #?, 601, 400
                (fw_out, bw_out), _ = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, CO_ATT,
                                                       dtype=tf.float32, sequence_length=input_lens+1)
                U_1 = tf.concat([fw_out, bw_out], axis=2)
                
                dims = U_1.get_shape().as_list()
                # Remove the sentinel vector from last row
                U_2 = tf.slice(U_1, [0,0,0], [tf.shape(U_1)[0], dims[1]-1, dims[2]])
                U_2 = tf.reshape(U_2, [tf.shape(U_1)[0], dims[1]-1, dims[2]])
                #?, 601, 400
                U_3 = tf.nn.dropout(U_2, self.keep_prob)

            out = tf.nn.dropout(U_3, self.keep_prob)
            #?, 601, 400
            return out

 def masked_softmax(logits, mask, dim):
    """
    Takes masked softmax over given dimension of logits.

    Inputs:
      logits: Numpy array. We want to take softmax over dimension dim.
      mask: Numpy array of same shape as logits.
        Has 1s where there's real data in logits, 0 where there's padding
      dim: int. dimension over which to take softmax

    Returns:
      masked_logits: Numpy array same shape as logits.
        This is the same as logits, but with 1e30 subtracted
        (i.e. very large negative number) in the padding locations.
      prob_dist: Numpy array same shape as logits.
        The result of taking softmax over masked_logits in given dimension.
        Should be 0 in padding locations.
        Should sum to 1 over given dimension.
    """
    exp_mask = (1 - tf.cast(mask, 'float')) * (-1e30) # -large where there's padding, 0 elsewhere
    masked_logits = tf.add(logits, exp_mask) # where there's padding, set logits to -large
    prob_dist = tf.nn.softmax(masked_logits, dim)
    return masked_logits, prob_dist
	"""This file contains some basic model components"""

	import tensorflow as tf
	from tensorflow.python.ops.rnn_cell import DropoutWrapper
	from tensorflow.python.ops import variable_scope as vs
	from tensorflow.python.ops import rnn_cell
	from operator import mul


	class LSTMEncoder(object):

	def __init__(self, hidden_size, keep_prob):
	"""
	Inputs:
	hidden_size: int. Hidden size of the RNN
	keep_prob: Tensor containing a single scalar that is the keep probability (for dropout)
	"""
	self.hidden_size = hidden_size #200
	self.keep_prob = keep_prob
	self.lstm = tf.nn.rnn_cell.BasicLSTMCell(self.hidden_size, forget_bias=1.0)
	self.lstm = tf.nn.rnn_cell.DropoutWrapper(cell=self.lstm, input_keep_prob=self.keep_prob)
	#self.question_length = 30
	#self.context_length = 600


	def build_graph(self, inputs, masks, type):

	with vs.variable_scope("LSTMEncoder"):
	input_lens = tf.reduce_sum(masks, reduction_indices=1)
	inputs_size=inputs.get_shape().as_list()
	inputs_temp=inputs

	# 1) get encoding from LSTM
	C_or_Q, _ = tf.nn.dynamic_rnn(self.lstm, inputs_temp, sequence_length=input_lens, dtype=tf.float32)


	if type=="question":
	# IF it is Question_hidden
	# Calculate q_dash = tanh(W q + b)
	q_dash = tf.layers.dense(inputs_temp, C_or_Q.get_shape()[2], activation=tf.tanh)
	inputs_temp = q_dash
	sentinel = tf.get_variable("sentinel_q", [1, self.hidden_size], initializer=tf.random_normal_initializer())
	else:
	# IF it is Context_hidden
	sentinel = tf.get_variable("sentinel_c", [1, self.hidden_size], initializer=tf.random_normal_initializer()) # 1,200
	inputs_temp = C_or_Q
	#
	# reshape sentinel
	sentinel = tf.reshape(sentinel, (1, 1, -1)) #1, 1, 200
	# reshape sentinel to add batch
	sentinel = tf.tile(sentinel, (tf.shape(inputs_temp)[0], 1, 1)) #?, 1, 200
	# add sentinel at end
	out = tf.concat([inputs_temp, sentinel], 1) # ?, 601, 200

	out.get_shape().as_list()

	# Apply dropout
	out = tf.nn.dropout(out, self.keep_prob)

	return out


	class CoAttention(object):

	def __init__(self, keep_prob, context_hidden_size, query_hidden_size):
	"""
	Inputs:
	keep_prob: tensor containing a single scalar that is the keep probability (for dropout)
	key_vec_size: size of the key vectors. int
	value_vec_size: size of the value vectors. int
	"""
	self.keep_prob = keep_prob
	self.context_hidden_size = context_hidden_size
	self.query_hidden_size = query_hidden_size


	def build_graph(self, question_hiddens, context_mask, context_hiddens):

	with vs.variable_scope('Coattention') as scope:
	question_hiddens.get_shape().as_list() #? , 31, 200
	context_hiddens.get_shape().as_list() #? ,601, 200

	question_length = tf.shape(question_hiddens)[1]
	context_length = tf.shape(context_hiddens)[1]
	keys_dim = tf.shape(context_hiddens)[2]


	Q_tranpose = tf.transpose(question_hiddens, perm=[0, 2, 1]) #?, 200, 31
	# L = D.T * Q = C.T * Q
	L = tf.matmul(context_hiddens, Q_tranpose) #?, 601, 31

	L_transpose = tf.transpose(L, perm=[0, 2, 1]) #?, 31, 601

	# Q_logits_mask = tf.expand_dims(question_hiddens, 1)
	# C_logits_mask = tf.expand_dims(context_hiddens, 1)# shape (batch_size, 1, num_values)
	# _, attn_dist = masked_softmax(attn_logits, attn_logits_mask, 2) # shape (batch_size, num_keys, num_values). take softmax over values
	#
	# # Use attention distribution to take weighted sum of values
	# output = tf.matmul(attn_dist, values) # shape (batch_size, num_keys, value_vec_size)

	A_D = tf.map_fn(lambda x: tf.nn.softmax(x), L_transpose, dtype=tf.float32) #?, 31, 601

	A_Q = tf.map_fn(lambda x: tf.nn.softmax(x), L, dtype=tf.float32) #?, 601, 31

	C_Q = tf.matmul(tf.transpose(context_hiddens, perm=[0, 2, 1]), A_Q) #?, 200, 31

	Q_concat_CQ = tf.concat([Q_tranpose, C_Q], axis=1) #?, 400, 31

	C_D = tf.matmul(Q_concat_CQ, A_D) #?, 400, 601

	CO_ATT = tf.concat([context_hiddens, tf.transpose(C_D, perm=[0, 2, 1])], axis=2) #?, 601, 600

	with tf.variable_scope('Coatt_encoder'):
	# LSTM for coattention encoding
	cell_fw = tf.nn.rnn_cell.BasicLSTMCell(self.query_hidden_size, forget_bias=1.0)
	cell_fw = DropoutWrapper(cell_fw, input_keep_prob=self.keep_prob)
	cell_bw = tf.nn.rnn_cell.BasicLSTMCell(self.query_hidden_size, forget_bias=1.0)
	cell_bw = DropoutWrapper(cell_bw, input_keep_prob=self.keep_prob)
	input_lens = tf.reduce_sum(context_mask, reduction_indices=1)
	#?, 601, 400
	(fw_out, bw_out), _ = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, CO_ATT,
	dtype=tf.float32, sequence_length=input_lens+1)
	U_1 = tf.concat([fw_out, bw_out], axis=2)

	dims = U_1.get_shape().as_list()
	# Remove the sentinel vector from last row
	U_2 = tf.slice(U_1, [0,0,0], [tf.shape(U_1)[0], dims[1]-1, dims[2]])
	U_2 = tf.reshape(U_2, [tf.shape(U_1)[0], dims[1]-1, dims[2]])
	#?, 601, 400
	U_3 = tf.nn.dropout(U_2, self.keep_prob)

	out = tf.nn.dropout(U_3, self.keep_prob)
	#?, 601, 400
	return out

	def masked_softmax(logits, mask, dim):
	"""
	Takes masked softmax over given dimension of logits.

	Inputs:
	logits: Numpy array. We want to take softmax over dimension dim.
	mask: Numpy array of same shape as logits.
	Has 1s where there's real data in logits, 0 where there's padding
	dim: int. dimension over which to take softmax

	Returns:
	masked_logits: Numpy array same shape as logits.
	This is the same as logits, but with 1e30 subtracted
	(i.e. very large negative number) in the padding locations.
	prob_dist: Numpy array same shape as logits.
	The result of taking softmax over masked_logits in given dimension.
	Should be 0 in padding locations.
	Should sum to 1 over given dimension.
	"""
	exp_mask = (1 - tf.cast(mask, 'float')) * (-1e30) # -large where there's padding, 0 elsewhere
	masked_logits = tf.add(logits, exp_mask) # where there's padding, set logits to -large
	prob_dist = tf.nn.softmax(masked_logits, dim)
	return masked_logits, prob_dist