benoitdescamps · June 11, 2018 16:42
diff --git a/iot_blog_cost b/iot_blog_cost
 def get_cost(target,Q,action_indices):
    """
    Cost-function of the Q-matrix attempting to approximate the reward-function
    :param tf.placeholder target: placeholder for the values of the registered rewards
    :param tf.placeholder Q: output of the Q-matrix the registered state
    :param tf.placeholder action_indices: placeholder for the indices of the registered actions
    :return: tf.tensor mean_squared error the reward vs Q-matrix
    """
    row_indices =  tf.range(tf.shape(action_indices)[0])
    full_indices = tf.stack([row_indices, action_indices], axis=1)
    q_values = tf.gather_nd(Q, full_indices)
    return tf.losses.mean_squared_error(labels=target,predictions=q_values)
	def get_cost(target,Q,action_indices):
	"""
	Cost-function of the Q-matrix attempting to approximate the reward-function
	:param tf.placeholder target: placeholder for the values of the registered rewards
	:param tf.placeholder Q: output of the Q-matrix the registered state
	:param tf.placeholder action_indices: placeholder for the indices of the registered actions
	:return: tf.tensor mean_squared error the reward vs Q-matrix
	"""
	row_indices = tf.range(tf.shape(action_indices)[0])
	full_indices = tf.stack([row_indices, action_indices], axis=1)
	q_values = tf.gather_nd(Q, full_indices)
	return tf.losses.mean_squared_error(labels=target,predictions=q_values)