SamVanhoutte · April 26, 2022 12:41
diff --git a/cliff-states.py b/cliff-states.py
 # function that takes the action and outputs state + reward
 def make_step(step, episode_reward, cliff):
    new_state, reward, done, _ =  cliff.step(step)
    print(f'New state {new_state} with extra award {reward} to total {episode_reward}')
    cliff.render()
    return episode_reward + reward
  
 # walk the entire grid
 env = gym.make('cliff-v0')
 state = env.reset()
 total_reward = 0

 # 3 times up
 for _ in range(3):
    total_reward = make_step(2, total_reward, env)
 # all the way to the right
 for _ in range(11):
    total_reward = make_step(1, total_reward, env)
 # one down
 total_reward = make_step(3, total_reward, env)
 # to the left
 for _ in range(11):
    total_reward = make_step(0, total_reward, env)
 # one down
 total_reward = make_step(3, total_reward, env)
 # to the right
 for _ in range(11):
    total_reward = make_step(1, total_reward, env)
 # the last step down
 total_reward = make_step(3, total_reward, env)
diff --git a/output.txt b/output.txt
 New state 4 with extra award -1 to total 0
 New state 8 with extra award -1 to total -1
 New state 12 with extra award -1 to total -2
 New state 13 with extra award -1 to total -3
 New state 14 with extra award -1 to total -4
 New state 15 with extra award -1 to total -5
 New state 16 with extra award -1 to total -6
 New state 17 with extra award -1 to total -7
 New state 18 with extra award -1 to total -8
 New state 19 with extra award -1 to total -9
 New state 20 with extra award -1 to total -10
 New state 21 with extra award -1 to total -11
 New state 22 with extra award -1 to total -12
 New state 23 with extra award -1 to total -13
 New state 19 with extra award -1 to total -14
 New state 18 with extra award -1 to total -15
 New state 17 with extra award -1 to total -16
 New state 16 with extra award -1 to total -17
 New state 15 with extra award -1 to total -18
 New state 14 with extra award -1 to total -19
 New state 13 with extra award -1 to total -20
 New state 12 with extra award -1 to total -21
 New state 11 with extra award -1 to total -22
 New state 10 with extra award -1 to total -23
 New state 9 with extra award -1 to total -24
 New state 8 with extra award -1 to total -25
 New state 4 with extra award -1 to total -26
 New state 5 with extra award -1 to total -27
 New state 6 with extra award -1 to total -28
 New state 7 with extra award -1 to total -29
 New state 8 with extra award -1 to total -30
 New state 9 with extra award -1 to total -31
 New state 10 with extra award -1 to total -32
 New state 11 with extra award -1 to total -33
 New state 12 with extra award -1 to total -34
 New state 13 with extra award -1 to total -35
 New state 14 with extra award -1 to total -36
 New state 15 with extra award -1 to total -37
 New state 11 with extra award 0 to total -38
	# function that takes the action and outputs state + reward
	def make_step(step, episode_reward, cliff):
	new_state, reward, done, _ = cliff.step(step)
	print(f'New state {new_state} with extra award {reward} to total {episode_reward}')
	cliff.render()
	return episode_reward + reward

	# walk the entire grid
	env = gym.make('cliff-v0')
	state = env.reset()
	total_reward = 0

	# 3 times up
	for _ in range(3):
	total_reward = make_step(2, total_reward, env)
	# all the way to the right
	for _ in range(11):
	total_reward = make_step(1, total_reward, env)
	# one down
	total_reward = make_step(3, total_reward, env)
	# to the left
	for _ in range(11):
	total_reward = make_step(0, total_reward, env)
	# one down
	total_reward = make_step(3, total_reward, env)
	# to the right
	for _ in range(11):
	total_reward = make_step(1, total_reward, env)
	# the last step down
	total_reward = make_step(3, total_reward, env)
	New state 4 with extra award -1 to total 0
	New state 8 with extra award -1 to total -1
	New state 12 with extra award -1 to total -2
	New state 13 with extra award -1 to total -3
	New state 14 with extra award -1 to total -4
	New state 15 with extra award -1 to total -5
	New state 16 with extra award -1 to total -6
	New state 17 with extra award -1 to total -7
	New state 18 with extra award -1 to total -8
	New state 19 with extra award -1 to total -9
	New state 20 with extra award -1 to total -10
	New state 21 with extra award -1 to total -11
	New state 22 with extra award -1 to total -12
	New state 23 with extra award -1 to total -13
	New state 19 with extra award -1 to total -14
	New state 18 with extra award -1 to total -15
	New state 17 with extra award -1 to total -16
	New state 16 with extra award -1 to total -17
	New state 15 with extra award -1 to total -18
	New state 14 with extra award -1 to total -19
	New state 13 with extra award -1 to total -20
	New state 12 with extra award -1 to total -21
	New state 11 with extra award -1 to total -22
	New state 10 with extra award -1 to total -23
	New state 9 with extra award -1 to total -24
	New state 8 with extra award -1 to total -25
	New state 4 with extra award -1 to total -26
	New state 5 with extra award -1 to total -27
	New state 6 with extra award -1 to total -28
	New state 7 with extra award -1 to total -29
	New state 8 with extra award -1 to total -30
	New state 9 with extra award -1 to total -31
	New state 10 with extra award -1 to total -32
	New state 11 with extra award -1 to total -33
	New state 12 with extra award -1 to total -34
	New state 13 with extra award -1 to total -35
	New state 14 with extra award -1 to total -36
	New state 15 with extra award -1 to total -37
	New state 11 with extra award 0 to total -38