mks-m · September 29, 2016 00:44
diff --git a/cart_pole.py b/cart_pole.py
 import gym
 import numpy as np
 import math

 def atg01(x):
  return 0.5 + math.atan(x) / math.pi

 env = gym.make('CartPole-v0')

 best = 1
 best_cs = (np.random.rand(4) * 2 - 1)
 learn_rate = 0.1

 for _ in xrange(200):
  env.reset()
  current = 0
  current_act = env.action_space.sample()
  current_cs = best_cs + (np.random.rand(4) * 2 - 1) * learn_rate

  tries = 0
  done = False
  while not done and tries < 1000:
    state, inc, done, _ = env.step(current_act)
    current_dot = atg01(np.dot(state, current_cs))
    current_act = int(round(current_dot))
    current += inc
    tries += 1
    # env.render()

  proximity = float(current) / float(best)
  if proximity >= 1:
    best = current
    proximity = 1.0

  proximity *= proximity * proximity
  learn_rate = 1.0 - proximity
  best_cs = current_cs * proximity + best_cs * (1.0 - proximity)

  print (current, best, "%.2f" % learn_rate, best_cs)
	import gym
	import numpy as np
	import math

	def atg01(x):
	return 0.5 + math.atan(x) / math.pi

	env = gym.make('CartPole-v0')

	best = 1
	best_cs = (np.random.rand(4) * 2 - 1)
	learn_rate = 0.1

	for _ in xrange(200):
	env.reset()
	current = 0
	current_act = env.action_space.sample()
	current_cs = best_cs + (np.random.rand(4) * 2 - 1) * learn_rate

	tries = 0
	done = False
	while not done and tries < 1000:
	state, inc, done, _ = env.step(current_act)
	current_dot = atg01(np.dot(state, current_cs))
	current_act = int(round(current_dot))
	current += inc
	tries += 1
	# env.render()

	proximity = float(current) / float(best)
	if proximity >= 1:
	best = current
	proximity = 1.0

	proximity = proximity proximity
	learn_rate = 1.0 - proximity
	best_cs = current_cs * proximity + best_cs * (1.0 - proximity)

	print (current, best, "%.2f" % learn_rate, best_cs)