Chion82 · April 2, 2020 15:25
diff --git a/discounted_rewards.py b/discounted_rewards.py
 import numpy as np
 from scipy.signal import lfilter

 def discount_readable(r, gamma):
    """ Compute the gamma-discounted rewards over an episode
    """
    discounted_r, cumul_r = np.zeros_like(r), 0
    for t in reversed(range(0, len(r))):
        cumul_r = r[t] + cumul_r * gamma
        discounted_r[t] = cumul_r
    return discounted_r

 discount_wtf = lambda x, gamma: lfilter([1],[1,-gamma],x[::-1])[::-1]
	import numpy as np
	from scipy.signal import lfilter

	def discount_readable(r, gamma):
	""" Compute the gamma-discounted rewards over an episode
	"""
	discounted_r, cumul_r = np.zeros_like(r), 0
	for t in reversed(range(0, len(r))):
	cumul_r = r[t] + cumul_r * gamma
	discounted_r[t] = cumul_r
	return discounted_r

	discount_wtf = lambda x, gamma: lfilter([1],[1,-gamma],x[::-1])[::-1]
No results found