Skip to content

Instantly share code, notes, and snippets.

@iandanforth
Created June 6, 2018 02:57
Show Gist options
  • Save iandanforth/bbce05af83fb482f4ffc3fb8570fe50d to your computer and use it in GitHub Desktop.
Save iandanforth/bbce05af83fb482f4ffc3fb8570fe50d to your computer and use it in GitHub Desktop.
A port of the cart-pole OpenAI gym environment to Pymunk
"""
Classic cart-pole system.
Pymunk version by Ian Danforth
"""
import math
import gym
import pygame
import pymunk
import pymunk.pygame_util
import numpy as np
from pymunk.vec2d import Vec2d
from gym import spaces, logger
from gym.utils import seeding
from . import cartpole_utils as utils
class PymunkCartPoleEnv(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second': 50
}
def __init__(self):
# Pygame and display setup
self.screen = None
self.draw_options = None
self.screen_width = 600
self.screen_height = 400
self.clock = pygame.time.Clock()
self.seed()
self._initPymunk()
# Action Space
# force_mag here is 50x force_mag in standard cartpole because
# this force is divided by the frames per second when applied by pymunk
self.force_mag = 1500.0
self.min_action = -1.0
self.max_action = 1.0
self.action_space = spaces.Box(
low=self.min_action,
high=self.max_action,
shape=(1,)
)
# Observation Space
# Angle at which to fail the episode
self.theta_threshold_radians = 12 * 2 * math.pi / 360
self.x_threshold = 2.4
# Angle limit set to 2 * theta_threshold_radians so failing observation
# is still within bounds
high = np.array([
self.x_threshold * 2,
np.finfo(np.float32).max,
self.theta_threshold_radians * 2,
np.finfo(np.float32).max])
self.observation_space = spaces.Box(-high, high)
self.steps_beyond_done = None
def _initPymunk(self):
# Simulation space
self.space = pymunk.Space()
self.space.gravity = (0.0, -980.0)
self.space.iterations = 20 # Double default
rand_offsets = self.np_random.uniform(low=-0.5, high=0.5, size=(4,))
# Track
track_pos_y = 100
# Track outside of view area
padding = 400
self.track_body, self.track_shape = utils.addTrack(
self.screen_width,
self.space,
track_pos_y,
padding
)
# Cart
cart_width = 60
cart_height = 30
cart_mass = 1.0
cart_x = (self.screen_width / 2) + rand_offsets[0]
self.cart_body, self.cart_shape = utils.addCart(
self.space,
cart_width,
cart_height,
cart_mass,
cart_x,
track_pos_y
)
self.cart_body.velocity = Vec2d(rand_offsets[1], 0.0)
# Pole
pole_length = 110
pole_mass = 0.1
self.pole_body, self.pole_shape = utils.addPole(
self.screen_width,
self.space,
pole_length,
pole_mass,
track_pos_y,
cart_height
)
self.pole_body.angle = self.np_random.uniform(low=-0.05, high=0.05)
self.pole_body.angular_velocity = self.np_random.uniform(
low=-0.05,
high=0.05
)
# Constraints
self.constraints = utils.addConstraints(
self.space,
self.cart_shape,
self.track_shape,
self.pole_shape
)
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def step(self, action):
"""
- Take action
- Step the physics of the world
- Check for 'done' conditions
- Return reward as appropriate
Note: render() must be called at least once before
this method is called otherwise pymunk breaks.
# e.g. OverflowError: Python int too large to convert to C long
"""
force = self.force_mag * action
self.cart_body.apply_force_at_local_point(
force,
self.cart_body.center_of_gravity
)
tau = math.pi * 2
theta = self.pole_body.angle % tau
if theta >= math.pi:
theta = theta - tau
x = self.cart_body.position[0]
# Out of bounds failure
done = x < 0.0 or x > self.screen_width
# Angular failure
if not done:
done = theta < -self.theta_threshold_radians \
or theta > self.theta_threshold_radians
done = bool(done)
if not done:
reward = 1.0
elif self.steps_beyond_done is None:
# Pole just fell!
self.steps_beyond_done = 0
reward = 1.0
else:
if self.steps_beyond_done == 0:
logger.warn("""
You are calling 'step()' even though this environment has already returned
done = True. You should always call 'reset()' once you receive 'done = True'
Any further steps are undefined behavior.
""")
self.steps_beyond_done += 1
reward = 0.0
self.space.step(1 / 50.0)
cart_x_velocity = self.cart_body.velocity[0]
pole_ang_velocity = self.pole_body.angular_velocity
obs = (
x,
cart_x_velocity,
theta,
pole_ang_velocity
)
return obs, reward, done, {}
def render(self, mode='human'):
if self.screen == None:
print('Setting up screen')
pygame.init()
self.screen = pygame.display.set_mode(
(self.screen_width, self.screen_height)
)
pygame.display.set_caption("pymunk_cartpole.py")
# Debug draw setup (called in render())
self.draw_options = pymunk.pygame_util.DrawOptions(self.screen)
self.draw_options.flags = 3
utils.handlePygameEvents()
# Redraw all objects
self.screen.fill((255, 255, 255))
self.space.debug_draw(self.draw_options)
pygame.display.flip()
self.clock.tick(50)
def reset(self):
if self.space:
del self.space
self._initPymunk()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment