Skip to content

Instantly share code, notes, and snippets.

@ethanabrooks
Created November 15, 2022 14:08
Show Gist options
  • Save ethanabrooks/5d4ac39e559dd50ce985dd4582d283b1 to your computer and use it in GitHub Desktop.
Save ethanabrooks/5d4ac39e559dd50ce985dd4582d283b1 to your computer and use it in GitHub Desktop.
ICPI prompt
state, reward = reset()
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 6 and state != 4
state = left()
assert reward == 0.0
assert state == 5 and state != 4
state = left()
assert reward == 0.0
assert state == 4 and state == 4
reward = try_goal(state)
assert reward == 1.0
state, reward = reset()
assert state == 1 and state != 4
state = right()
assert reward == 0.0
assert state == 2 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 0 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = right()
assert reward == 0.0
assert state == 1 and state != 4
state = right()
assert reward == 0.0
assert state == 2 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 3 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 5 and state != 4
state = left()
assert reward == 0.0
state, reward = reset()
assert state == 3 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 0 and state != 4
state = right()
assert reward == 0.0
assert state == 1 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 4 and state == 4
reward = try_goal(state)
assert reward == 1.0
state, reward = reset()
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = right()
assert reward == 0.0
assert state == 1 and state != 4
state = right()
assert reward == 0.0
assert state == 2 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = right()
assert reward == 0.0
assert state == 1 and state != 4
state = right()
assert reward == 0.0
assert state == 2 and state != 4
state = right()
assert reward == 0.0
assert state == 3 and state != 4
state = right()
assert reward == 0.0
assert state == 4 and state == 4
reward = try_goal(state)
assert reward == 1.0
state, reward = reset()
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = right()
assert reward == 0.0
assert state == 1 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 0 and state != 4
state = right()
assert reward == 0.0
assert state == 1 and state != 4
state = right()
assert reward == 0.0
assert state == 2 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 6 and state != 4
state = left()
assert reward == 0.0
assert state == 5 and state != 4
state = left()
assert reward == 0.0
state, reward = reset()
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = right()
assert reward == 0.0
assert state == 1 and state != 4
state = right()
assert reward == 0.0
assert state == 2 and state != 4
state = right()
assert reward == 0.0
assert state == 3 and state != 4
state = right()
assert reward == 0.0
assert state == 4 and state == 4
reward = try_goal(state)
assert reward == 1.0
state, reward = reset()
assert state == 0 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 2 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 6 and state != 4
state = left()
assert reward == 0.0
state, reward = reset()
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = right()
assert reward == 0.0
assert state == 1 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 1 and state != 4
state = right()
assert reward == 0.0
assert state == 2 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 3 and state != 4
state = right()
assert reward == 0.0
assert state == 4 and state == 4
reward = try_goal(state)
assert reward == 1.0
state, reward = reset()
assert state == 4 and state == 4
reward = try_goal(state)
assert reward == 1.0
state, reward = reset()
assert state == 1 and state != 4
state = right()
assert reward == 0.0
assert state == 2 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = right()
assert reward == 0.0
assert state == 1 and state != 4
state = right()
assert reward == 0.0
assert state == 2 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 2 and state != 4
state = right()
assert reward == 0.0
assert state == 3 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 1 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 1 and state != 4
state = right()
assert reward == 0.0
assert state == 2 and state != 4
state = right()
assert reward == 0.0
assert state == 3 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 6 and state != 4
state = left()
assert reward == 0.0
assert state == 5 and state != 4
state = left()
assert reward == 0.0
assert state == 4 and state == 4
reward = try_goal(state)
assert reward == 1.0
state, reward = reset()
assert state == 5 and state != 4
state = left()
assert reward == 0.0
assert state == 4 and state == 4
reward = try_goal(state)
assert reward == 1.0
state, reward = reset()
assert state == 3 and state != 4
state = right()
assert reward == 0.0
assert state == 4 and state == 4
reward = try_goal(state)
assert reward == 1.0
state, reward = reset()
assert state == 6 and state != 4
state = left()
assert reward == 0.0
state, reward = reset()
assert state == 0 and state != 4
state = right()
assert reward == 0.0
assert state == 1 and state != 4
state = right()
assert reward == 0.0
assert state == 2 and state != 4
state = right()
assert reward == 0.0
assert state == 3 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 5 and state != 4
state = left()
assert reward == 0.0
state, reward = reset()
assert state == 3 and state != 4
state = right()
assert reward == 0.0
assert state == 4 and state == 4
reward = try_goal(state)
assert reward == 1.0
state, reward = reset()
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = left()
assert reward == 0.0
state, reward = reset()
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = left()
assert reward == 0.0
state, reward = reset()
assert state == 6 and state != 4
state = left()
assert reward == 0.0
state, reward = reset()
assert state == 0 and state != 4
state = right()
assert reward == 0.0
assert state == 1 and state != 4
state = right()
assert reward == 0.0
assert state == 2 and state != 4
state = right()
assert reward == 0.0
assert state == 3 and state != 4
state = right()
assert reward == 0.0
assert state == 4 and state == 4
reward = try_goal(state)
assert reward == 1.0
state, reward = reset()
assert state == 1 and state != 4
state = right()
assert reward == 0.0
assert state == 2 and state != 4
state = right()
assert reward == 0.0
assert state == 3 and state != 4
state = right()
assert reward == 0.0
assert state == 4 and state == 4
reward = try_goal(state)
assert reward == 1.0
state, reward = reset()
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = left()
assert reward == 0.0
state, reward = reset()
assert state == 4 and state == 4
reward = try_goal(state)
assert reward == 1.0
state, reward = reset()
assert state == 4 and state == 4
reward = try_goal(state)
assert reward == 1.0
state, reward = reset()
assert state == 0 and state != 4
state = right()
assert reward == 0.0
assert state == 1 and state != 4
state = right()
assert reward == 0.0
assert state == 2 and state != 4
state = right()
assert reward == 0.0
assert state == 3 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = right()
assert reward == 0.0
assert state == 1 and state != 4
state = right()
assert reward == 0.0
assert state == 2 and state != 4
state = right()
assert reward == 0.0
assert state == 3 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 2 and state != 4
state = right()
assert reward == 0.0
assert state == 3 and state != 4
state = right()
assert reward == 0.0
assert state == 4 and state == 4
reward = try_goal(state)
assert reward == 1.0
state, reward = reset()
assert state == 1 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 0 and state != 4
state = right()
assert reward == 0.0
assert state == 1 and state != 4
state = right()
assert reward == 0.0
assert state == 2 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 1 and state != 4
state = right()
assert reward == 0.0
assert state == 2 and state != 4
state = right()
assert reward == 0.0
assert state == 3 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 3 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 6 and state != 4
state = left()
assert reward == 0.0
assert state == 5 and state != 4
state = left()
assert reward == 0.0
state, reward = reset()
assert state == 1 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 2 and state != 4
state = right()
assert reward == 0.0
assert state == 3 and state != 4
state = right()
assert reward == 0.0
assert state == 4 and state == 4
reward = try_goal(state)
assert reward == 1.0
state, reward = reset()
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = right()
assert reward == 0.0
assert state == 1 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 4 and state == 4
reward = try_goal(state)
assert reward == 1.0
state, reward = reset()
assert state == 2 and state != 4
state = right()
assert reward == 0.0
assert state == 3 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 0 and state != 4
state = right()
assert reward == 0.0
assert state == 1 and state != 4
state = right()
assert reward == 0.0
assert state == 2 and state != 4
state = right()
assert reward == 0.0
assert state == 3 and state != 4
state = right()
assert reward == 0.0
assert state == 4 and state == 4
reward = try_goal(state)
assert reward == 1.0
state, reward = reset()
assert state == 2 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 5 and state != 4
state = left()
assert reward == 0.0
state, reward = reset()
assert state == 1 and state != 4
state = right()
assert reward == 0.0
assert state == 2 and state != 4
state = right()
assert reward == 0.0
assert state == 3 and state != 4
state = right()
assert reward == 0.0
assert state == 4 and state == 4
reward = try_goal(state)
assert reward == 1.0
state, reward = reset()
assert state == 5 and state != 4
state = left()
assert reward == 0.0
assert state == 4 and state == 4
reward = try_goal(state)
assert reward == 1.0
state, reward = reset()
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = right()
assert reward == 0.0
assert state == 1 and state != 4
state = right()
assert reward == 0.0
assert state == 2 and state != 4
state = right()
assert reward == 0.0
assert state == 3 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 4 and state == 4
reward = try_goal(state)
assert reward == 1.0
state, reward = reset()
assert state == 6 and state != 4
state = left()
assert reward == 0.0
assert state == 5 and state != 4
state = left()
assert reward == 0.0
state, reward = reset()
assert state == 0 and state != 4
state = left()
assert reward == 0.0
state, reward = reset()
assert state == 5 and state != 4
state = left()
assert reward == 0.0
state, reward = reset()
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = right()
assert reward == 0.0
assert state == 1 and state != 4
state = right()
assert reward == 0.0
assert state == 2 and state != 4
state = right()
assert reward == 0.0
assert state == 3 and state != 4
state = right()
assert reward == 0.0
assert state == 4 and state == 4
reward = try_goal(state)
assert reward == 1.0
state, reward = reset()
assert state == 4 and state == 4
reward = try_goal(state)
assert reward == 1.0
state, reward = reset()
assert state == 6 and state != 4
state = left()
assert reward == 0.0
assert state == 5 and state != 4
state = left()
assert reward == 0.0
assert state == 4 and state == 4
reward = try_goal(state)
assert reward == 1.0
state, reward = reset()
assert state == 2 and state != 4
state = right()
assert reward == 0.0
assert state == 3 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 4 and state == 4
reward = try_goal(state)
assert reward == 1.0
state, reward = reset()
assert state == 2 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = left()
assert reward == 0.0
assert state == 0 and state != 4
state = right()
assert reward == 0.0
assert state == 1 and state != 4
state = right()
assert reward == 0.0
assert state == 2 and state != 4
state = right()
assert reward == 0.0
assert state == 3 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 1 and state != 4
state = right()
assert reward == 0.0
assert state == 2 and state != 4
state = right()
assert reward == 0.0
assert state == 3 and state != 4
state = right()
assert reward == 0.0
assert state == 4 and state == 4
reward = try_goal(state)
assert reward == 1.0
state, reward = reset()
assert state == 0 and state != 4
state = left()
assert reward == 0.0
state, reward = reset()
assert state == 0 and state != 4
state = left()
assert reward == 0.0
state, reward = reset()
assert state == 5 and state != 4
state = left()
assert reward == 0.0
assert state == 4 and state == 4
reward = try_goal(state)
assert reward == 1.0
state, reward = reset()
assert state == 0 and state != 4
state = right()
assert reward == 0.0
assert state == 1 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == 2 and state != 4
state = right()
assert reward == 0.0
assert state == 3 and state != 4
state = right()
assert reward == 0.0
assert state == 4 and state == 4
reward = try_goal(state)
assert reward == 1.0
state, reward = reset()
assert state == 5 and state != 4
state = left()
assert reward == 0.0
assert state == 4 and state == 4
reward = try_goal(state)
assert reward == 1.0
state, reward = reset()
assert state == 1 and state != 4
state = right()
assert reward == 0.0
assert state == 2 and state != 4
state = right()
assert reward == 0.0
assert state == 3 and state != 4
state = right()
assert reward == 0.0
state, reward = reset()
assert state == -1 and state != 4
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment