Created
April 26, 2022 12:41
-
-
Save SamVanhoutte/f6d3b7e322aee324800e4906b443f90a to your computer and use it in GitHub Desktop.
Reinforcement learning cliff-v0 states
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# function that takes the action and outputs state + reward | |
def make_step(step, episode_reward, cliff): | |
new_state, reward, done, _ = cliff.step(step) | |
print(f'New state {new_state} with extra award {reward} to total {episode_reward}') | |
cliff.render() | |
return episode_reward + reward | |
# walk the entire grid | |
env = gym.make('cliff-v0') | |
state = env.reset() | |
total_reward = 0 | |
# 3 times up | |
for _ in range(3): | |
total_reward = make_step(2, total_reward, env) | |
# all the way to the right | |
for _ in range(11): | |
total_reward = make_step(1, total_reward, env) | |
# one down | |
total_reward = make_step(3, total_reward, env) | |
# to the left | |
for _ in range(11): | |
total_reward = make_step(0, total_reward, env) | |
# one down | |
total_reward = make_step(3, total_reward, env) | |
# to the right | |
for _ in range(11): | |
total_reward = make_step(1, total_reward, env) | |
# the last step down | |
total_reward = make_step(3, total_reward, env) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
New state 4 with extra award -1 to total 0 | |
New state 8 with extra award -1 to total -1 | |
New state 12 with extra award -1 to total -2 | |
New state 13 with extra award -1 to total -3 | |
New state 14 with extra award -1 to total -4 | |
New state 15 with extra award -1 to total -5 | |
New state 16 with extra award -1 to total -6 | |
New state 17 with extra award -1 to total -7 | |
New state 18 with extra award -1 to total -8 | |
New state 19 with extra award -1 to total -9 | |
New state 20 with extra award -1 to total -10 | |
New state 21 with extra award -1 to total -11 | |
New state 22 with extra award -1 to total -12 | |
New state 23 with extra award -1 to total -13 | |
New state 19 with extra award -1 to total -14 | |
New state 18 with extra award -1 to total -15 | |
New state 17 with extra award -1 to total -16 | |
New state 16 with extra award -1 to total -17 | |
New state 15 with extra award -1 to total -18 | |
New state 14 with extra award -1 to total -19 | |
New state 13 with extra award -1 to total -20 | |
New state 12 with extra award -1 to total -21 | |
New state 11 with extra award -1 to total -22 | |
New state 10 with extra award -1 to total -23 | |
New state 9 with extra award -1 to total -24 | |
New state 8 with extra award -1 to total -25 | |
New state 4 with extra award -1 to total -26 | |
New state 5 with extra award -1 to total -27 | |
New state 6 with extra award -1 to total -28 | |
New state 7 with extra award -1 to total -29 | |
New state 8 with extra award -1 to total -30 | |
New state 9 with extra award -1 to total -31 | |
New state 10 with extra award -1 to total -32 | |
New state 11 with extra award -1 to total -33 | |
New state 12 with extra award -1 to total -34 | |
New state 13 with extra award -1 to total -35 | |
New state 14 with extra award -1 to total -36 | |
New state 15 with extra award -1 to total -37 | |
New state 11 with extra award 0 to total -38 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment