alexlimh · May 14, 2020 22:45
diff --git a/threedfourroom.py b/threedfourroom.py
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

 import collections

 from dm_control import mujoco
 from dm_control.rl import control
 from dm_control.suite import base
 from dm_control.suite import common
 from dm_control.suite.utils import randomizers
 from dm_control.utils import rewards
 from dm_control.utils import containers
 from dm_control.utils import xml_tools
 from dm_control.utils import io as resources

 import numpy as np
 from lxml import etree
 from six.moves import range


 _DEFAULT_TIME_LIMIT = 100
 SUITE = containers.TaggedTasks()

 def get_model_and_assets():
  """Returns a tuple containing the model XML string and a dict of assets."""
  return _make_model(), common.ASSETS

 @SUITE.add()
 def smooth(
    time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
  physics = Physics.from_xml_string(*get_model_and_assets())
  task = FourRoom(random=random, sparse=False)
  environment_kwargs = environment_kwargs or {}
  return control.Environment(
      physics, task,
      time_limit=time_limit,
      **environment_kwargs)

 @SUITE.add()
 def sparse(
    time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
  physics = Physics.from_xml_string(*get_model_and_assets())
  task = FourRoom(random=random, sparse=True)
  environment_kwargs = environment_kwargs or {}
  return control.Environment(
      physics, task,
      time_limit=time_limit,
      **environment_kwargs)

 def _make_model():
  """Generates an xml string defining a cart with `n_poles` bodies."""
  xml_string = common.read_model('threedfourroom.xml')
  mjcf = etree.fromstring(xml_string)
  return etree.tostring(mjcf, pretty_print=True)


 class Physics(mujoco.Physics):
  """Physics simulation with additional features for the Reacher domain."""

  def tool_to_target(self):
    """Returns the vector from target to finger in global coordinates."""
    target = self.named.data.geom_xpos['target', :2]
    tool = self.named.data.geom_xpos['toolgeom', :2]
    return target - tool

  def tool_to_target_dist(self):
    """Returns the signed distance between the finger and target surface."""
    return np.linalg.norm(self.tool_to_target())

  def camera_angle(self):
    tool = self.named.data.geom_xpos['toolgeom', :2]
    cam_target = self.named.data.geom_xpos['cam_target', :2]
    delta = cam_target - tool
    angle = -np.arctan2(delta[0], delta[1])
    return angle


 class FourRoom(base.Task):
  """A reacher `Task` to reach the target."""

  def __init__(self, random=None, sparse=False):
    """Initialize an instance of `Reacher`.

    Args:
      target_size: A `float`, tolerance to determine whether finger reached the
          target.
      random: Optional, either a `numpy.random.RandomState` instance, an
        integer seed for creating a new `RandomState`, or None to select a seed
        automatically (default).
    """
    super(FourRoom, self).__init__(random=random)
    self._sparse = sparse

  def initialize_episode(self, physics, difficulty=None):
    """Sets the state of the environment at the start of each episode."""
    #randomizers.randomize_limited_and_rotational_joints(physics, self.random)
    #w1, w2 = 0, 0
    #physics.named.model.geom_pos['wall_1a', 'y'] = w1 + 0.6
    #physics.named.model.geom_pos['wall_1b', 'y'] = w1 - 0.4
    #physics.named.model.geom_pos['wall_2a', 'y'] = w2 + 0.3
    #physics.named.model.geom_pos['wall_2b', 'y'] = w2 - 0.7
    #physics.named.model.geom_pos['target', 'y'] = self.random.uniform(-0.25, 0.25)
    super(FourRoom, self).initialize_episode(physics)

  def get_observation(self, physics):
    """Returns an observation of the state and the target position."""
    obs = collections.OrderedDict()
    obs['position'] = self.named.data.geom_xpos['toolgeom', :2] # physics.position()
    obs['to_target'] = physics.tool_to_target()
    obs['velocity'] = physics.velocity()
    obs['camera_angle'] = physics.camera_angle()
    return obs

  def get_reward(self, physics):
    if self._sparse:
      radii = physics.named.model.geom_size[['target', 'toolgeom'], 0].sum()
      return rewards.tolerance(physics.tool_to_target_dist(), (0, radii))
    else:
      max_distance = 0.67  # Estimated empirically.
      cur_distance = physics.tool_to_target_dist()
      return 1 - np.clip(cur_distance / max_distance, 0, 1)
diff --git a/threedfourroom.xml b/threedfourroom.xml
 <mujoco model="two-link planar reacher">
  <include file="./common/skybox.xml"/>
  <include file="./common/visual.xml"/>
  <include file="./common/materials.xml"/>
  <option timestep="0.1" />

  <default>
    <general ctrllimited="true"/>
    <default class="tool">
      <joint type="hinge" limited="false" range="-1 1" damping="2"/>
      <geom material="self"/>
    </default>
  </default>

  <worldbody>
    <light name="light" pos="0 0 1"/>
    <camera name="fixed" pos="0 0 1.5" quat="1 0 0 0"/>
    <geom name="ground" rgba="0 0 0 1" type="plane" pos="0 0 0" size="1 1 1" material="grid"/>
    <geom name="wall_n" type="box" pos=" 0    0.3  0.02" zaxis=" 0 -1  0" size="0.3 0.05 0.015" material="decoration"/>
    <geom name="wall_e" type="box" pos=" 0.3  0    0.02" zaxis="-1  0  0" size="0.05 0.31 0.015" material="decoration"/>
    <geom name="wall_s" type="box" pos=" 0   -0.3  0.02" zaxis=" 0  -1  0" size="0.3 0.05 0.015" material="decoration"/>
    <geom name="wall_w" type="box" pos="-0.3  0    0.02" zaxis=" -1  0  0" size="0.05 0.31 0.015" material="decoration"/>

    <geom name="wall_1a" type="box" pos="0 0.27 .02" zaxis="-1 0 0"  size=".05 .03 .015" material="decoration" />
    <geom name="wall_1b" type="box" pos="0.27 0 .02" zaxis="0 1 0"  size=".03 .05 .015" material="decoration" />
    <geom name="wall_2a" type="box" pos="0 -0.27 .02" zaxis="-1 0 0"  size=".05 .03 .015" material="decoration" />
    <geom name="wall_2b" type="box" pos="-0.27 0 .02" zaxis="0 1 0"  size=".03 .05 .015" material="decoration" />
    <geom name="wall_3a" type="box" pos=" 0 0 .02" zaxis="-1 0 0"  size=".05 .15 .015" material="decoration" />
    <geom name="wall_3b" type="box" pos=" 0 0 .02" zaxis="0 1 0"  size=".15 .05 .015" material="decoration" />
    
    <body name="cam_target" pos="0 0 0.02">
              <joint name="cam_target_yaw" pos="0 0 0" axis="0 0 1"/>
              <geom name="cam_target" rgba="0 0 0 0" type="box" size="0.018 0.018 0.02" pos="-0.55 0 0"/>
    </body>
    
    <body name="torso" pos="-0.25 -0.25 0.02" childclass="tool" >
      <geom name="toolgeom" rgba="1 1 1 1" type="box" size="0.02 0.02 0.02" pos="0 0 0" />
      <camera name="eyes" pos="0 0 0" xyaxes="1 0 0 0 0 1" mode="targetbodycom" target="cam_target"/>

      <joint name="tool_x" type="slide" pos="0 0 0" axis="1 0 0" range="-1 1" damping="0.3" armature='0.0'/>
      <joint name="tool_y" type="slide" pos="0 0 0" axis="0 1 0" range="-1 1" damping="0.3" armature='0.0'/>

    </body>
    <geom name="target" pos="0.25 0.25 0.03" material="target" type="sphere" size="0.03" contype="0" conaffinity="0"/>
  </worldbody>

  <actuator>
    <motor name="x" joint="tool_x" ctrlrange="-1 1" gear="0.05" ctrllimited="true" />
    <motor name="y" joint="tool_y" ctrlrange="-1 1" gear="0.05" ctrllimited="true" />
    <motor name="cam_target" joint="cam_target_yaw" ctrlrange="-1 1" gear="0.05"/>
  </actuator>
 </mujoco>
	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function

	import collections

	from dm_control import mujoco
	from dm_control.rl import control
	from dm_control.suite import base
	from dm_control.suite import common
	from dm_control.suite.utils import randomizers
	from dm_control.utils import rewards
	from dm_control.utils import containers
	from dm_control.utils import xml_tools
	from dm_control.utils import io as resources

	import numpy as np
	from lxml import etree
	from six.moves import range


	_DEFAULT_TIME_LIMIT = 100
	SUITE = containers.TaggedTasks()

	def get_model_and_assets():
	"""Returns a tuple containing the model XML string and a dict of assets."""
	return _make_model(), common.ASSETS

	@SUITE.add()
	def smooth(
	time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
	physics = Physics.from_xml_string(*get_model_and_assets())
	task = FourRoom(random=random, sparse=False)
	environment_kwargs = environment_kwargs or {}
	return control.Environment(
	physics, task,
	time_limit=time_limit,
	**environment_kwargs)

	@SUITE.add()
	def sparse(
	time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
	physics = Physics.from_xml_string(*get_model_and_assets())
	task = FourRoom(random=random, sparse=True)
	environment_kwargs = environment_kwargs or {}
	return control.Environment(
	physics, task,
	time_limit=time_limit,
	**environment_kwargs)

	def _make_model():
	"""Generates an xml string defining a cart with `n_poles` bodies."""
	xml_string = common.read_model('threedfourroom.xml')
	mjcf = etree.fromstring(xml_string)
	return etree.tostring(mjcf, pretty_print=True)


	class Physics(mujoco.Physics):
	"""Physics simulation with additional features for the Reacher domain."""

	def tool_to_target(self):
	"""Returns the vector from target to finger in global coordinates."""
	target = self.named.data.geom_xpos['target', :2]
	tool = self.named.data.geom_xpos['toolgeom', :2]
	return target - tool

	def tool_to_target_dist(self):
	"""Returns the signed distance between the finger and target surface."""
	return np.linalg.norm(self.tool_to_target())

	def camera_angle(self):
	tool = self.named.data.geom_xpos['toolgeom', :2]
	cam_target = self.named.data.geom_xpos['cam_target', :2]
	delta = cam_target - tool
	angle = -np.arctan2(delta[0], delta[1])
	return angle


	class FourRoom(base.Task):
	"""A reacher `Task` to reach the target."""

	def __init__(self, random=None, sparse=False):
	"""Initialize an instance of `Reacher`.

	Args:
	target_size: A `float`, tolerance to determine whether finger reached the
	target.
	random: Optional, either a `numpy.random.RandomState` instance, an
	integer seed for creating a new `RandomState`, or None to select a seed
	automatically (default).
	"""
	super(FourRoom, self).__init__(random=random)
	self._sparse = sparse

	def initialize_episode(self, physics, difficulty=None):
	"""Sets the state of the environment at the start of each episode."""
	#randomizers.randomize_limited_and_rotational_joints(physics, self.random)
	#w1, w2 = 0, 0
	#physics.named.model.geom_pos['wall_1a', 'y'] = w1 + 0.6
	#physics.named.model.geom_pos['wall_1b', 'y'] = w1 - 0.4
	#physics.named.model.geom_pos['wall_2a', 'y'] = w2 + 0.3
	#physics.named.model.geom_pos['wall_2b', 'y'] = w2 - 0.7
	#physics.named.model.geom_pos['target', 'y'] = self.random.uniform(-0.25, 0.25)
	super(FourRoom, self).initialize_episode(physics)

	def get_observation(self, physics):
	"""Returns an observation of the state and the target position."""
	obs = collections.OrderedDict()
	obs['position'] = self.named.data.geom_xpos['toolgeom', :2] # physics.position()
	obs['to_target'] = physics.tool_to_target()
	obs['velocity'] = physics.velocity()
	obs['camera_angle'] = physics.camera_angle()
	return obs

	def get_reward(self, physics):
	if self._sparse:
	radii = physics.named.model.geom_size[['target', 'toolgeom'], 0].sum()
	return rewards.tolerance(physics.tool_to_target_dist(), (0, radii))
	else:
	max_distance = 0.67 # Estimated empirically.
	cur_distance = physics.tool_to_target_dist()
	return 1 - np.clip(cur_distance / max_distance, 0, 1)
	<mujoco model="two-link planar reacher">
	<include file="./common/skybox.xml"/>
	<include file="./common/visual.xml"/>
	<include file="./common/materials.xml"/>
	<option timestep="0.1" />

	<default>
	<general ctrllimited="true"/>
	<default class="tool">
	<joint type="hinge" limited="false" range="-1 1" damping="2"/>
	<geom material="self"/>
	</default>
	</default>

	<worldbody>
	<light name="light" pos="0 0 1"/>
	<camera name="fixed" pos="0 0 1.5" quat="1 0 0 0"/>
	<geom name="ground" rgba="0 0 0 1" type="plane" pos="0 0 0" size="1 1 1" material="grid"/>
	<geom name="wall_n" type="box" pos=" 0 0.3 0.02" zaxis=" 0 -1 0" size="0.3 0.05 0.015" material="decoration"/>
	<geom name="wall_e" type="box" pos=" 0.3 0 0.02" zaxis="-1 0 0" size="0.05 0.31 0.015" material="decoration"/>
	<geom name="wall_s" type="box" pos=" 0 -0.3 0.02" zaxis=" 0 -1 0" size="0.3 0.05 0.015" material="decoration"/>
	<geom name="wall_w" type="box" pos="-0.3 0 0.02" zaxis=" -1 0 0" size="0.05 0.31 0.015" material="decoration"/>

	<geom name="wall_1a" type="box" pos="0 0.27 .02" zaxis="-1 0 0" size=".05 .03 .015" material="decoration" />
	<geom name="wall_1b" type="box" pos="0.27 0 .02" zaxis="0 1 0" size=".03 .05 .015" material="decoration" />
	<geom name="wall_2a" type="box" pos="0 -0.27 .02" zaxis="-1 0 0" size=".05 .03 .015" material="decoration" />
	<geom name="wall_2b" type="box" pos="-0.27 0 .02" zaxis="0 1 0" size=".03 .05 .015" material="decoration" />
	<geom name="wall_3a" type="box" pos=" 0 0 .02" zaxis="-1 0 0" size=".05 .15 .015" material="decoration" />
	<geom name="wall_3b" type="box" pos=" 0 0 .02" zaxis="0 1 0" size=".15 .05 .015" material="decoration" />

	<body name="cam_target" pos="0 0 0.02">
	<joint name="cam_target_yaw" pos="0 0 0" axis="0 0 1"/>
	<geom name="cam_target" rgba="0 0 0 0" type="box" size="0.018 0.018 0.02" pos="-0.55 0 0"/>
	</body>

	<body name="torso" pos="-0.25 -0.25 0.02" childclass="tool" >
	<geom name="toolgeom" rgba="1 1 1 1" type="box" size="0.02 0.02 0.02" pos="0 0 0" />
	<camera name="eyes" pos="0 0 0" xyaxes="1 0 0 0 0 1" mode="targetbodycom" target="cam_target"/>

	<joint name="tool_x" type="slide" pos="0 0 0" axis="1 0 0" range="-1 1" damping="0.3" armature='0.0'/>
	<joint name="tool_y" type="slide" pos="0 0 0" axis="0 1 0" range="-1 1" damping="0.3" armature='0.0'/>

	</body>
	<geom name="target" pos="0.25 0.25 0.03" material="target" type="sphere" size="0.03" contype="0" conaffinity="0"/>
	</worldbody>

	<actuator>
	<motor name="x" joint="tool_x" ctrlrange="-1 1" gear="0.05" ctrllimited="true" />
	<motor name="y" joint="tool_y" ctrlrange="-1 1" gear="0.05" ctrllimited="true" />
	<motor name="cam_target" joint="cam_target_yaw" ctrlrange="-1 1" gear="0.05"/>
	</actuator>
	</mujoco>