Skip to content

Instantly share code, notes, and snippets.

@sherjilozair
Created June 25, 2018 03:38
Show Gist options
  • Save sherjilozair/6a9feab865aa8b49d102c45d77dc1d57 to your computer and use it in GitHub Desktop.
Save sherjilozair/6a9feab865aa8b49d102c45d77dc1d57 to your computer and use it in GitHub Desktop.
import numpy as np
import itertools
import gym
import random
env = gym.make("BowlingNoFrameskip-v4")
def enact(step, partitions, actions):
step = step % partitions[-1]
for p, a in zip(partitions, actions):
if step < p:
return a
def evaluate(params):
done = False
i = 0
G = 0
env.reset()
while not done:
#env.render()
i += 1
action = enact(i, params[0], params[1])
_, reward, done, _ = env.step(action)
G += reward
return G
returns = []
optimal_value = -1
n_episodes = 0
while True:
n_episodes+= 1
partitions = [random.choice(range(10, 1000, 10)) for i in range(3)]
actions = [random.choice(range(3)) for i in range(3)]
partitions.sort()
if 1 not in actions:
continue
print(n_episodes, "evaluating...", [partitions, actions], '...', end='')
val = evaluate([partitions, actions])
if val > optimal_value:
optimal_value = val
optimal_policy = [partitions, actions]
print("has value", val, '; optimal:', optimal_value)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment