AshNguyen · April 20, 2020 10:32
diff --git a/erl_avr.py b/erl_avr.py
 def average_prob(p1, p2, p3):
    '''
    Takes three different probability vectors in and outputs a randomly
    sampled action from n_action with probability equals the average \
    probability of the input vectors
    '''
    a = range(n_action)
    p = (p1 + p2 + p3)/3
    a = np.random.choice(a=a, p=p)
    return a
 def boltzmann_prob(p1, p2, p3, T=0.5):
    '''
    Takes three different probability vectors in and outputs a randomly
    Sampled action from n_action with probability equals the average
    probability of the normalized exponentiated input vectors, with a
    temperature T controlling the degree of spread for the out vector
    '''
    a = range(n_action)
    boltz_ps = [np.exp(prob/T)/sum(np.exp(prob/T)) for prob in [p1,p2,p3]]
    p = (boltz_ps[0] + boltz_ps[1] + boltz_ps[2])/3
    a = np.random.choice(a=a, p=p)
    return a
	def average_prob(p1, p2, p3):
	'''
	Takes three different probability vectors in and outputs a randomly
	sampled action from n_action with probability equals the average \
	probability of the input vectors
	'''
	a = range(n_action)
	p = (p1 + p2 + p3)/3
	a = np.random.choice(a=a, p=p)
	return a
	def boltzmann_prob(p1, p2, p3, T=0.5):
	'''
	Takes three different probability vectors in and outputs a randomly
	Sampled action from n_action with probability equals the average
	probability of the normalized exponentiated input vectors, with a
	temperature T controlling the degree of spread for the out vector
	'''
	a = range(n_action)
	boltz_ps = [np.exp(prob/T)/sum(np.exp(prob/T)) for prob in [p1,p2,p3]]
	p = (boltz_ps[0] + boltz_ps[1] + boltz_ps[2])/3
	a = np.random.choice(a=a, p=p)
	return a