klosowsk · July 2, 2016 02:57
diff --git a/CartPole-v0_ilovelinux_reproduced_klosowski.py b/CartPole-v0_ilovelinux_reproduced_klosowski.py
 # CartPole-v0

 # A pole is attached by an un-actuated joint to a cart,
 # which moves along a frictionless track. The system 
 # is controlled by applying a force of +1 or -1 to the 
 # cart. The pendulum starts upright, and the goal is to 
 # prevent it from falling over. A reward of +1 is provided 
 # for every timestep that the pole remains upright. 
 # The episode ends when the pole is more than 15 degrees 
 # from vertical, or the cart moves more than 2.4 units from 
 # the center.

 import gym
 import numpy

 env = gym.make('CartPole-v0')

 # This line creates a param value as a ramdon array[4]
 param = numpy.random.rand(4) - 0.5

 # This line creates a monitor
 env.monitor.start('/tmp/OpenAI-CartPole', force=True)

 best_reward = 0

 for x in range(1000):
 	observation = env.reset()
 	total_reward = 0
 	
 	while True:
 		action = numpy.dot(param, observation)
 		action = 1 if action > 0 else 0
 		observation, reward, done, info = env.step(action)
 		total_reward += reward
 		if done:
 			if total_reward > best_reward:
 				best_reward = total_reward
 				param += observation
 			elif total_reward < 200:
 				param -= observation
 			break

 env.monitor.close()

 # Code based in the code posted by @ilovelinux
	# CartPole-v0

	# A pole is attached by an un-actuated joint to a cart,
	# which moves along a frictionless track. The system
	# is controlled by applying a force of +1 or -1 to the
	# cart. The pendulum starts upright, and the goal is to
	# prevent it from falling over. A reward of +1 is provided
	# for every timestep that the pole remains upright.
	# The episode ends when the pole is more than 15 degrees
	# from vertical, or the cart moves more than 2.4 units from
	# the center.

	import gym
	import numpy

	env = gym.make('CartPole-v0')

	# This line creates a param value as a ramdon array[4]
	param = numpy.random.rand(4) - 0.5

	# This line creates a monitor
	env.monitor.start('/tmp/OpenAI-CartPole', force=True)

	best_reward = 0

	for x in range(1000):
	observation = env.reset()
	total_reward = 0

	while True:
	action = numpy.dot(param, observation)
	action = 1 if action > 0 else 0
	observation, reward, done, info = env.step(action)
	total_reward += reward
	if done:
	if total_reward > best_reward:
	best_reward = total_reward
	param += observation
	elif total_reward < 200:
	param -= observation
	break

	env.monitor.close()

	# Code based in the code posted by @ilovelinux