Skip to content

Instantly share code, notes, and snippets.

@nagataka
Created September 5, 2019 02:12
Show Gist options
  • Save nagataka/81261488a4b922eec26bdb9b8f125d23 to your computer and use it in GitHub Desktop.
Save nagataka/81261488a4b922eec26bdb9b8f125d23 to your computer and use it in GitHub Desktop.
RL book: Grid World example (Figure 4.1)
import gym
import sys
sys.path.append("reinforcement-learning/lib/envs")
import gridworld
import random
import numpy as np
import copy
NUM_EPOCHS = 10000
GAMMA = 1.0
env = gridworld.GridworldEnv()
# https://github.com/openai/gym/blob/master/gym/envs/toy_text/discrete.py#L16
# https://github.com/openai/gym/blob/master/gym/spaces/discrete.py
num_A = env.action_space.n
print(num_A)
num_state = env.observation_space.n
print(num_state)
V = np.zeros(num_state)
print(V)
for epoch in range(NUM_EPOCHS):
state = env.reset()
#env._render()
done = False
steps = 0
while not done:
# Compute eq. (4.5)
E = 0
for a in range(num_A):
sim_env = copy.copy(env)
next_state, r, _, _ = sim_env.step(a)
E += 0.25*(r + GAMMA*V[next_state])
V[state] = E # Update v(s)
# Pick an action to take and make process forward
action = random.randint(0, num_A-1)
next_state, r, done, _ = env.step(action)
#env._render()
state = next_state
#if (steps % 10) == 0:
# print(V)
print("*** V after {} epochs ***".format(NUM_EPOCHS))
print(V.reshape([4,4]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment