iandanforth · June 6, 2018 02:57
diff --git a/pymunk_cartpole.py b/pymunk_cartpole.py
 """
 Classic cart-pole system.

 Pymunk version by Ian Danforth
 """

 import math
 import gym
 import pygame
 import pymunk
 import pymunk.pygame_util
 import numpy as np
 from pymunk.vec2d import Vec2d
 from gym import spaces, logger
 from gym.utils import seeding

 from . import cartpole_utils as utils


 class PymunkCartPoleEnv(gym.Env):
    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second': 50
    }

    def __init__(self):
        # Pygame and display setup
        self.screen = None
        self.draw_options = None
        self.screen_width = 600
        self.screen_height = 400
        self.clock = pygame.time.Clock()
        self.seed()

        self._initPymunk()

        # Action Space

        # force_mag here is 50x force_mag in standard cartpole because
        # this force is divided by the frames per second when applied by pymunk
        self.force_mag = 1500.0
        self.min_action = -1.0
        self.max_action = 1.0
        self.action_space = spaces.Box(
            low=self.min_action,
            high=self.max_action,
            shape=(1,)
        )

        # Observation Space
        # Angle at which to fail the episode
        self.theta_threshold_radians = 12 * 2 * math.pi / 360
        self.x_threshold = 2.4

        # Angle limit set to 2 * theta_threshold_radians so failing observation
        # is still within bounds
        high = np.array([
            self.x_threshold * 2,
            np.finfo(np.float32).max,
            self.theta_threshold_radians * 2,
            np.finfo(np.float32).max])

        self.observation_space = spaces.Box(-high, high)

        self.steps_beyond_done = None

    def _initPymunk(self):
        # Simulation space
        self.space = pymunk.Space()
        self.space.gravity = (0.0, -980.0)
        self.space.iterations = 20  # Double default
        rand_offsets = self.np_random.uniform(low=-0.5, high=0.5, size=(4,))

        # Track
        track_pos_y = 100
        # Track outside of view area
        padding = 400
        self.track_body, self.track_shape = utils.addTrack(
            self.screen_width,
            self.space,
            track_pos_y,
            padding
        )

        # Cart
        cart_width = 60
        cart_height = 30
        cart_mass = 1.0
        cart_x = (self.screen_width / 2) + rand_offsets[0]
        self.cart_body, self.cart_shape = utils.addCart(
            self.space,
            cart_width,
            cart_height,
            cart_mass,
            cart_x,
            track_pos_y
        )
        self.cart_body.velocity = Vec2d(rand_offsets[1], 0.0)

        # Pole
        pole_length = 110
        pole_mass = 0.1
        self.pole_body, self.pole_shape = utils.addPole(
            self.screen_width,
            self.space,
            pole_length,
            pole_mass,
            track_pos_y,
            cart_height
        )
        self.pole_body.angle = self.np_random.uniform(low=-0.05, high=0.05)
        self.pole_body.angular_velocity = self.np_random.uniform(
            low=-0.05,
            high=0.05
        )

        # Constraints
        self.constraints = utils.addConstraints(
            self.space,
            self.cart_shape,
            self.track_shape,
            self.pole_shape
        )

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action):
        """
         - Take action
         - Step the physics of the world
         - Check for 'done' conditions
         - Return reward as appropriate

        Note: render() must be called at least once before
        this method is called otherwise pymunk breaks.
        # e.g. OverflowError: Python int too large to convert to C long
        """
        force = self.force_mag * action
        self.cart_body.apply_force_at_local_point(
            force,
            self.cart_body.center_of_gravity
        )
        tau = math.pi * 2
        theta = self.pole_body.angle % tau
        if theta >= math.pi:
            theta = theta - tau
        x = self.cart_body.position[0]
        # Out of bounds failure
        done = x < 0.0 or x > self.screen_width
        # Angular failure
        if not done:
            done = theta < -self.theta_threshold_radians \
                   or theta > self.theta_threshold_radians
        done = bool(done)

        if not done:
            reward = 1.0
        elif self.steps_beyond_done is None:
            # Pole just fell!
            self.steps_beyond_done = 0
            reward = 1.0
        else:
            if self.steps_beyond_done == 0:
                logger.warn("""
 You are calling 'step()' even though this environment has already returned
 done = True. You should always call 'reset()' once you receive 'done = True'
 Any further steps are undefined behavior.
                """)
            self.steps_beyond_done += 1
            reward = 0.0

        self.space.step(1 / 50.0)

        cart_x_velocity = self.cart_body.velocity[0]
        pole_ang_velocity = self.pole_body.angular_velocity
        obs = (
            x,
            cart_x_velocity,
            theta,
            pole_ang_velocity
        )

        return obs, reward, done, {}

    def render(self, mode='human'):
        if self.screen == None:
            print('Setting up screen')
            pygame.init()
            self.screen = pygame.display.set_mode(
                (self.screen_width, self.screen_height)
            )
            pygame.display.set_caption("pymunk_cartpole.py")
            # Debug draw setup (called in render())
            self.draw_options = pymunk.pygame_util.DrawOptions(self.screen)
            self.draw_options.flags = 3

        utils.handlePygameEvents()

        # Redraw all objects
        self.screen.fill((255, 255, 255))
        self.space.debug_draw(self.draw_options)
        pygame.display.flip()
        self.clock.tick(50)

    def reset(self):
        if self.space:
            del self.space
        self._initPymunk()
	"""
	Classic cart-pole system.

	Pymunk version by Ian Danforth
	"""

	import math
	import gym
	import pygame
	import pymunk
	import pymunk.pygame_util
	import numpy as np
	from pymunk.vec2d import Vec2d
	from gym import spaces, logger
	from gym.utils import seeding

	from . import cartpole_utils as utils


	class PymunkCartPoleEnv(gym.Env):
	metadata = {
	'render.modes': ['human', 'rgb_array'],
	'video.frames_per_second': 50
	}

	def __init__(self):
	# Pygame and display setup
	self.screen = None
	self.draw_options = None
	self.screen_width = 600
	self.screen_height = 400
	self.clock = pygame.time.Clock()
	self.seed()

	self._initPymunk()

	# Action Space

	# force_mag here is 50x force_mag in standard cartpole because
	# this force is divided by the frames per second when applied by pymunk
	self.force_mag = 1500.0
	self.min_action = -1.0
	self.max_action = 1.0
	self.action_space = spaces.Box(
	low=self.min_action,
	high=self.max_action,
	shape=(1,)
	)

	# Observation Space
	# Angle at which to fail the episode
	self.theta_threshold_radians = 12 * 2 * math.pi / 360
	self.x_threshold = 2.4

	# Angle limit set to 2 * theta_threshold_radians so failing observation
	# is still within bounds
	high = np.array([
	self.x_threshold * 2,
	np.finfo(np.float32).max,
	self.theta_threshold_radians * 2,
	np.finfo(np.float32).max])

	self.observation_space = spaces.Box(-high, high)

	self.steps_beyond_done = None

	def _initPymunk(self):
	# Simulation space
	self.space = pymunk.Space()
	self.space.gravity = (0.0, -980.0)
	self.space.iterations = 20 # Double default
	rand_offsets = self.np_random.uniform(low=-0.5, high=0.5, size=(4,))

	# Track
	track_pos_y = 100
	# Track outside of view area
	padding = 400
	self.track_body, self.track_shape = utils.addTrack(
	self.screen_width,
	self.space,
	track_pos_y,
	padding
	)

	# Cart
	cart_width = 60
	cart_height = 30
	cart_mass = 1.0
	cart_x = (self.screen_width / 2) + rand_offsets[0]
	self.cart_body, self.cart_shape = utils.addCart(
	self.space,
	cart_width,
	cart_height,
	cart_mass,
	cart_x,
	track_pos_y
	)
	self.cart_body.velocity = Vec2d(rand_offsets[1], 0.0)

	# Pole
	pole_length = 110
	pole_mass = 0.1
	self.pole_body, self.pole_shape = utils.addPole(
	self.screen_width,
	self.space,
	pole_length,
	pole_mass,
	track_pos_y,
	cart_height
	)
	self.pole_body.angle = self.np_random.uniform(low=-0.05, high=0.05)
	self.pole_body.angular_velocity = self.np_random.uniform(
	low=-0.05,
	high=0.05
	)

	# Constraints
	self.constraints = utils.addConstraints(
	self.space,
	self.cart_shape,
	self.track_shape,
	self.pole_shape
	)

	def seed(self, seed=None):
	self.np_random, seed = seeding.np_random(seed)
	return [seed]

	def step(self, action):
	"""
	- Take action
	- Step the physics of the world
	- Check for 'done' conditions
	- Return reward as appropriate

	Note: render() must be called at least once before
	this method is called otherwise pymunk breaks.
	# e.g. OverflowError: Python int too large to convert to C long
	"""
	force = self.force_mag * action
	self.cart_body.apply_force_at_local_point(
	force,
	self.cart_body.center_of_gravity
	)
	tau = math.pi * 2
	theta = self.pole_body.angle % tau
	if theta >= math.pi:
	theta = theta - tau
	x = self.cart_body.position[0]
	# Out of bounds failure
	done = x < 0.0 or x > self.screen_width
	# Angular failure
	if not done:
	done = theta < -self.theta_threshold_radians \
	or theta > self.theta_threshold_radians
	done = bool(done)

	if not done:
	reward = 1.0
	elif self.steps_beyond_done is None:
	# Pole just fell!
	self.steps_beyond_done = 0
	reward = 1.0
	else:
	if self.steps_beyond_done == 0:
	logger.warn("""
	You are calling 'step()' even though this environment has already returned
	done = True. You should always call 'reset()' once you receive 'done = True'
	Any further steps are undefined behavior.
	""")
	self.steps_beyond_done += 1
	reward = 0.0

	self.space.step(1 / 50.0)

	cart_x_velocity = self.cart_body.velocity[0]
	pole_ang_velocity = self.pole_body.angular_velocity
	obs = (
	x,
	cart_x_velocity,
	theta,
	pole_ang_velocity
	)

	return obs, reward, done, {}

	def render(self, mode='human'):
	if self.screen == None:
	print('Setting up screen')
	pygame.init()
	self.screen = pygame.display.set_mode(
	(self.screen_width, self.screen_height)
	)
	pygame.display.set_caption("pymunk_cartpole.py")
	# Debug draw setup (called in render())
	self.draw_options = pymunk.pygame_util.DrawOptions(self.screen)
	self.draw_options.flags = 3

	utils.handlePygameEvents()

	# Redraw all objects
	self.screen.fill((255, 255, 255))
	self.space.debug_draw(self.draw_options)
	pygame.display.flip()
	self.clock.tick(50)

	def reset(self):
	if self.space:
	del self.space
	self._initPymunk()