lorenzotinfena · April 13, 2022 17:20
diff --git a/main.py b/main.py
 import gym
 env = gym.make('CartPole-v1')
 # initialize metrics
 total_reward = 0
 steps = 0
 current_state = env.reset() # obtain first state
 done = False
 while not done: # when done is True the episode ends
    action = env.action_space.sample() # get a random action from A of the environment
    next_state, reward, done, _ = env.step(action) # perform the action
    print(f'Transition from state {current_state} to state {next_state}, '
    + f'I earned reward: {reward} and now the episode is done is {done}')
    # update metrics
    total_reward += reward
    steps += 1
    current_state = next_state # update current_state
 print(f'Episode done in {steps} steps, total reward {total_reward}')
	import gym
	env = gym.make('CartPole-v1')
	# initialize metrics
	total_reward = 0
	steps = 0
	current_state = env.reset() # obtain first state
	done = False
	while not done: # when done is True the episode ends
	action = env.action_space.sample() # get a random action from A of the environment
	next_state, reward, done, _ = env.step(action) # perform the action
	print(f'Transition from state {current_state} to state {next_state}, '
	+ f'I earned reward: {reward} and now the episode is done is {done}')
	# update metrics
	total_reward += reward
	steps += 1
	current_state = next_state # update current_state
	print(f'Episode done in {steps} steps, total reward {total_reward}')