Skip to content

Instantly share code, notes, and snippets.

@epignatelli
Created June 30, 2020 11:07
Show Gist options
  • Save epignatelli/1850bcc5b1b10b968fca1e246f839689 to your computer and use it in GitHub Desktop.
Save epignatelli/1850bcc5b1b10b968fca1e246f839689 to your computer and use it in GitHub Desktop.
"""
This gist is an extract from:
https://github.com/epignatelli/reinforcement-learning-an-introduction
"""
def policy_evaluation(env, policy=None, steps=1, discount=1., in_place=False):
"""
Args:
policy (numpy.array): a numpy 3-D numpy array, where the first two dimensions identify a state and the third dimension identifies the actions.
The array stores the probability of taking each action.
steps (int): the number of iterations of the algorithm
discount (float): discount factor for the bellman equations
in_place (bool): if False, the value table is updated after all the new values have been calculated.
if True the state [i, j] will new already new values for the states [< i, < j]
"""
if policy is None:
# uniform random policy
policy = np.ones((*env.state_value.shape, len(ACTIONS))) * 0.25
for k in range(steps):
# cache old values if not in place
values = env.state_value if in_place else np.empty_like(env.state_value)
for i in range(len(env.state_value)):
for j in range(len(env.state_value[i])):
# apply bellman expectation equation to each state
state = (i, j)
value = env.bellman_expectation(state, policy[i, j], discount)
values[i, j] = value * discount
# set the new value table
env.state_value = values
return env.state_value
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment