praveen-palanisamy · December 12, 2015 22:49
diff --git a/gistfile1.txt b/gistfile1.txt
 lossScalar = 1 - reward; % This is loss of the chosen action
 lossVector = zeros(1,self.nbActions);
 lossVector(astAction) = lossScalar;
 self.timeStep=self.timeStep+1;
 %The weight update step below depends on the learning policy. This will probably be handled by the NN/RL-net
 self.weights=self.weights.*(exp(-sqrt(log(self.numActions)/self.timeStep)*lossVector))';
	lossScalar = 1 - reward; % This is loss of the chosen action
	lossVector = zeros(1,self.nbActions);
	lossVector(astAction) = lossScalar;
	self.timeStep=self.timeStep+1;
	%The weight update step below depends on the learning policy. This will probably be handled by the NN/RL-net
	self.weights=self.weights.(exp(-sqrt(log(self.numActions)/self.timeStep)lossVector))';