Created
June 6, 2018 02:57
-
-
Save iandanforth/bbce05af83fb482f4ffc3fb8570fe50d to your computer and use it in GitHub Desktop.
A port of the cart-pole OpenAI gym environment to Pymunk
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Classic cart-pole system. | |
Pymunk version by Ian Danforth | |
""" | |
import math | |
import gym | |
import pygame | |
import pymunk | |
import pymunk.pygame_util | |
import numpy as np | |
from pymunk.vec2d import Vec2d | |
from gym import spaces, logger | |
from gym.utils import seeding | |
from . import cartpole_utils as utils | |
class PymunkCartPoleEnv(gym.Env): | |
metadata = { | |
'render.modes': ['human', 'rgb_array'], | |
'video.frames_per_second': 50 | |
} | |
def __init__(self): | |
# Pygame and display setup | |
self.screen = None | |
self.draw_options = None | |
self.screen_width = 600 | |
self.screen_height = 400 | |
self.clock = pygame.time.Clock() | |
self.seed() | |
self._initPymunk() | |
# Action Space | |
# force_mag here is 50x force_mag in standard cartpole because | |
# this force is divided by the frames per second when applied by pymunk | |
self.force_mag = 1500.0 | |
self.min_action = -1.0 | |
self.max_action = 1.0 | |
self.action_space = spaces.Box( | |
low=self.min_action, | |
high=self.max_action, | |
shape=(1,) | |
) | |
# Observation Space | |
# Angle at which to fail the episode | |
self.theta_threshold_radians = 12 * 2 * math.pi / 360 | |
self.x_threshold = 2.4 | |
# Angle limit set to 2 * theta_threshold_radians so failing observation | |
# is still within bounds | |
high = np.array([ | |
self.x_threshold * 2, | |
np.finfo(np.float32).max, | |
self.theta_threshold_radians * 2, | |
np.finfo(np.float32).max]) | |
self.observation_space = spaces.Box(-high, high) | |
self.steps_beyond_done = None | |
def _initPymunk(self): | |
# Simulation space | |
self.space = pymunk.Space() | |
self.space.gravity = (0.0, -980.0) | |
self.space.iterations = 20 # Double default | |
rand_offsets = self.np_random.uniform(low=-0.5, high=0.5, size=(4,)) | |
# Track | |
track_pos_y = 100 | |
# Track outside of view area | |
padding = 400 | |
self.track_body, self.track_shape = utils.addTrack( | |
self.screen_width, | |
self.space, | |
track_pos_y, | |
padding | |
) | |
# Cart | |
cart_width = 60 | |
cart_height = 30 | |
cart_mass = 1.0 | |
cart_x = (self.screen_width / 2) + rand_offsets[0] | |
self.cart_body, self.cart_shape = utils.addCart( | |
self.space, | |
cart_width, | |
cart_height, | |
cart_mass, | |
cart_x, | |
track_pos_y | |
) | |
self.cart_body.velocity = Vec2d(rand_offsets[1], 0.0) | |
# Pole | |
pole_length = 110 | |
pole_mass = 0.1 | |
self.pole_body, self.pole_shape = utils.addPole( | |
self.screen_width, | |
self.space, | |
pole_length, | |
pole_mass, | |
track_pos_y, | |
cart_height | |
) | |
self.pole_body.angle = self.np_random.uniform(low=-0.05, high=0.05) | |
self.pole_body.angular_velocity = self.np_random.uniform( | |
low=-0.05, | |
high=0.05 | |
) | |
# Constraints | |
self.constraints = utils.addConstraints( | |
self.space, | |
self.cart_shape, | |
self.track_shape, | |
self.pole_shape | |
) | |
def seed(self, seed=None): | |
self.np_random, seed = seeding.np_random(seed) | |
return [seed] | |
def step(self, action): | |
""" | |
- Take action | |
- Step the physics of the world | |
- Check for 'done' conditions | |
- Return reward as appropriate | |
Note: render() must be called at least once before | |
this method is called otherwise pymunk breaks. | |
# e.g. OverflowError: Python int too large to convert to C long | |
""" | |
force = self.force_mag * action | |
self.cart_body.apply_force_at_local_point( | |
force, | |
self.cart_body.center_of_gravity | |
) | |
tau = math.pi * 2 | |
theta = self.pole_body.angle % tau | |
if theta >= math.pi: | |
theta = theta - tau | |
x = self.cart_body.position[0] | |
# Out of bounds failure | |
done = x < 0.0 or x > self.screen_width | |
# Angular failure | |
if not done: | |
done = theta < -self.theta_threshold_radians \ | |
or theta > self.theta_threshold_radians | |
done = bool(done) | |
if not done: | |
reward = 1.0 | |
elif self.steps_beyond_done is None: | |
# Pole just fell! | |
self.steps_beyond_done = 0 | |
reward = 1.0 | |
else: | |
if self.steps_beyond_done == 0: | |
logger.warn(""" | |
You are calling 'step()' even though this environment has already returned | |
done = True. You should always call 'reset()' once you receive 'done = True' | |
Any further steps are undefined behavior. | |
""") | |
self.steps_beyond_done += 1 | |
reward = 0.0 | |
self.space.step(1 / 50.0) | |
cart_x_velocity = self.cart_body.velocity[0] | |
pole_ang_velocity = self.pole_body.angular_velocity | |
obs = ( | |
x, | |
cart_x_velocity, | |
theta, | |
pole_ang_velocity | |
) | |
return obs, reward, done, {} | |
def render(self, mode='human'): | |
if self.screen == None: | |
print('Setting up screen') | |
pygame.init() | |
self.screen = pygame.display.set_mode( | |
(self.screen_width, self.screen_height) | |
) | |
pygame.display.set_caption("pymunk_cartpole.py") | |
# Debug draw setup (called in render()) | |
self.draw_options = pymunk.pygame_util.DrawOptions(self.screen) | |
self.draw_options.flags = 3 | |
utils.handlePygameEvents() | |
# Redraw all objects | |
self.screen.fill((255, 255, 255)) | |
self.space.debug_draw(self.draw_options) | |
pygame.display.flip() | |
self.clock.tick(50) | |
def reset(self): | |
if self.space: | |
del self.space | |
self._initPymunk() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment