jangirrishabh · June 14, 2018 11:42
diff --git a/toyCarIRL4.py b/toyCarIRL4.py
    def policyListUpdater(self, W, i):  #add the policyFE list and differences
        tempFE = self.getRLAgentFE(W, i) # get feature expectations of a new policy respective to the input weights
        hyperDistance = np.abs(np.dot(W, np.asarray(self.expertPolicy)-np.asarray(tempFE))) #hyperdistance = t
        self.policiesFE[hyperDistance] = tempFE
        return hyperDistance # t = (weights.tanspose)*(expert-newPolicy)
	def policyListUpdater(self, W, i): #add the policyFE list and differences
	tempFE = self.getRLAgentFE(W, i) # get feature expectations of a new policy respective to the input weights
	hyperDistance = np.abs(np.dot(W, np.asarray(self.expertPolicy)-np.asarray(tempFE))) #hyperdistance = t
	self.policiesFE[hyperDistance] = tempFE
	return hyperDistance # t = (weights.tanspose)*(expert-newPolicy)
No results found