harshkumarchourasia · October 25, 2024 12:37
diff --git a/record_episode.py b/record_episode.py
 import gym
 
 # Initialize the CartPole environment with rendering mode set to 'rgb_array'
 env = gym.make('CartPole-v0', render_mode="rgb_array")
 
 # Wrap the environment with the RecordVideo wrapper to record videos
 # The episode_trigger lambda function ensures that a video is recorded every 10 episodes
 env = gym.wrappers.RecordVideo(env, "./vid", episode_trigger=lambda episode_id: episode_id % 10 == 0)
 
 # Reset the environment to get the initial observation and additional info
 observation, info = env.reset()
 
 # Simulate 100 episodes
 for i in range(100):
    while True:
        # Sample a random action from the environment's action space
        action = env.action_space.sample()  # Agent's policy can replace this for specific decision-making logic
 
        # Step the environment using the sampled action
        observation, reward, terminated, truncated, info = env.step(action)
 
        # Break the loop if the episode is complete
        if terminated or truncated:
            break
 
    # Reset the environment for the next episode once the current episode is done
    observation, info = env.reset()
 
 # Properly close the environment to free resources
 env.close()
	import gym

	# Initialize the CartPole environment with rendering mode set to 'rgb_array'
	env = gym.make('CartPole-v0', render_mode="rgb_array")

	# Wrap the environment with the RecordVideo wrapper to record videos
	# The episode_trigger lambda function ensures that a video is recorded every 10 episodes
	env = gym.wrappers.RecordVideo(env, "./vid", episode_trigger=lambda episode_id: episode_id % 10 == 0)

	# Reset the environment to get the initial observation and additional info
	observation, info = env.reset()

	# Simulate 100 episodes
	for i in range(100):
	while True:
	# Sample a random action from the environment's action space
	action = env.action_space.sample() # Agent's policy can replace this for specific decision-making logic

	# Step the environment using the sampled action
	observation, reward, terminated, truncated, info = env.step(action)

	# Break the loop if the episode is complete
	if terminated or truncated:
	break

	# Reset the environment for the next episode once the current episode is done
	observation, info = env.reset()

	# Properly close the environment to free resources
	env.close()