MikeShi42 · September 18, 2018 02:50
diff --git a/main.py b/main.py
 import gym
 import numpy as np

 env = gym.make('CartPole-v1')

 def play(env, policy):
  observation = env.reset()
  
  done = False
  score = 0
  observations = []
  
  for _ in range(5000):
    observations += [observation.tolist()] # Record the observations for normalization and replay
    
    if done: # If the simulation was over last iteration, exit loop
      break
        
    # Pick an action according to the policy matrix
    outcome = np.dot(policy, observation)
    action = 1 if outcome > 0 else 0
    
    # Make the action, record reward
    observation, reward, done, info = env.step(action)
    score += reward

  return score, observations
	import gym
	import numpy as np

	env = gym.make('CartPole-v1')

	def play(env, policy):
	observation = env.reset()

	done = False
	score = 0
	observations = []

	for _ in range(5000):
	observations += [observation.tolist()] # Record the observations for normalization and replay

	if done: # If the simulation was over last iteration, exit loop
	break

	# Pick an action according to the policy matrix
	outcome = np.dot(policy, observation)
	action = 1 if outcome > 0 else 0

	# Make the action, record reward
	observation, reward, done, info = env.step(action)
	score += reward

	return score, observations
No results found