Skip to content

Instantly share code, notes, and snippets.

@AndyNovo
Created May 7, 2016 18:57
Show Gist options
  • Save AndyNovo/1c88c0f9b7d96bf7d8798dc08a6c6560 to your computer and use it in GitHub Desktop.
Save AndyNovo/1c88c0f9b7d96bf7d8798dc08a6c6560 to your computer and use it in GitHub Desktop.
import gym
env = gym.make('CartPole-v0')
obs=env.reset()
maxs=[obs[i] for i in range(len(obs))]
mins=[obs[i] for i in range(len(obs))]
def strat(style):
global maxs, mins
ss=lambda x: 0
if style=="1":
ss=lambda x: 1
if style=="a":
ss=lambda x: x%2
if style=="r":
ss=lambda x: env.action_space.sample()
obs=env.reset()
for i in xrange(1000):
move=ss(i)
obs,r,d,i=env.step(move)
if d:
return
for i in range(len(obs)):
if obs[i]>maxs[i]:
maxs[i] = obs[i]
if obs[i]<mins[i]:
mins[i]=obs[i]
for i in xrange(2000):
strat("0")
for i in xrange(2000):
strat("a")
for i in xrange(2000):
strat("1")
for i in xrange(2000):
strat("r")
print maxs
print mins
def test(wts, maxrounds=300):
global maxs, mins
ss=lambda x: sum([wts[i]*x[i]/maxs[i] for i in range(len(x))]) > 0
rwd=0
obs=env.reset()
for i in xrange(maxrounds):
move=1 if ss(obs) else 0
obs,r,d,i=env.step(move)
rwd += r
if d:
return rwd
return rwd
themax=0.0
import random
roughs = [-1 + .2*i for i in range(11)]
random.shuffle(roughs)
for x1 in roughs:
for x2 in roughs:
for x3 in roughs:
for x4 in roughs:
rwd1 = test([x1, x2, x3, x4])
rwd2 = test([x1, x2, x3, x4])
rwd3 = test([x1, x2, x3, x4])
rwd = (rwd1 + rwd2 + rwd3)
if rwd > themax:
wts = [x1, x2, x3, x4]
themax=rwd
print rwd, wts
trial = [test(wts, 200) for i in range(100)]
print sum(trial)/len(trial)
print wts
def ai():
global wts, maxs
obs=env.reset()
rwd=0
for i in range(200):
move=1 if sum([obs[i]*wts[i]/maxs[i] for i in range(len(obs))]) > 0 else 0
obs,r,d,i=env.step(move)
rwd+=r
if d:
return rwd
return rwd
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment