Last active
October 16, 2018 04:00
-
-
Save jeasinema/3477b1327b1225789e946911ef6edd91 to your computer and use it in GitHub Desktop.
Wrap DeepMind dm_control into OpenAI gym
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding:UTF-8 -*- | |
# File Name : dm_control_wrapper.py | |
# Creation Date : 09-10-2018 | |
# Created By : Jeasine Ma [jeasinema[at]gmail[dot]com] | |
import glfw | |
try: | |
glfw.init() | |
except: | |
pass | |
from dm_control import suite | |
from dm_control.suite import humanoid_CMU | |
import gym.spaces as spaces | |
from gym.envs.registration import EnvSpec | |
import numpy as np | |
import cv2 | |
class Env_DM_Control(object): | |
def __init__(self, name, img_size=84, camera_id='side', max_step=-1): | |
self.env_name = name | |
self.img_size = img_size | |
self.camera_id = camera_id | |
self.max_step = max_step | |
if self.env_name == 'Humanoid_CMU': | |
self.env = humanoid_CMU.run() | |
else: | |
domain, task = self.env_name.split('+') | |
self.env = suite.load(domain_name=domain, task_name=task) | |
self.control_min = self.env.action_spec().minimum[0] | |
self.control_max = self.env.action_spec().maximum[0] | |
self.control_shape = self.env.action_spec().shape | |
self._action_space = spaces.Box(self.control_min, self.control_max, self.control_shape) | |
total_size = 0 | |
for i, j in self.env.observation_spec().items(): | |
total_size += j.shape[0] if len(j.shape) > 0 else 1 | |
self._observation_space = spaces.Box(-np.inf, np.inf, (total_size, )) | |
self.step_count = 0 | |
self.reward_range = (-np.inf, np.inf) | |
self.metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 67} | |
self.spec = EnvSpec('Humanoid-v2', max_episode_steps=1000, timestep_limit=1000) | |
@property | |
def action_space(self): | |
return self._action_space | |
@property | |
def observation_space(self): | |
return self._observation_space | |
@property | |
def physics(self): | |
return self.env.physics | |
def reset(self): | |
obs = self.env.reset().observation | |
l = [] | |
for i, j in obs.items(): | |
l.append(j if len(j.shape) > 0 else j.reshape(1)) | |
return np.concatenate(l) | |
def step(self, action): | |
ret = self.env.step(action) | |
l = [] | |
for i, j in ret.observation.items(): | |
l.append(j if len(j.shape) > 0 else j.reshape(1)) | |
state = np.concatenate(l) | |
reward = ret.reward | |
done = (ret.step_type == 2) or (self.step_count == self.max_step) | |
info = {} | |
self.step_count += 1 | |
if done: | |
self.step_count = 0 | |
return state, reward, done, info | |
def render(self): | |
height = width = self.img_size | |
camera_id = self.camera_id | |
if camera_id: | |
img = self.env.physics.render(height, width, camera_id=camera_id) | |
else: | |
img = self.env.physics.render(height, width) | |
return img | |
def seed(self, seed): | |
if self.env_name == 'Humanoid_CMU': | |
self.env = humanoid_CMU.run(random=seed) | |
else: | |
domain, task = self.env_name.split('+') | |
self.env = suite.load(domain_name=domain, task_name=task, task_kwargs={'random':seed}) | |
def close(self): | |
pass | |
if __name__ == '__main__': | |
env = Env_DM_Control('cartpole/swingup') | |
env.reset() | |
print(env.action_space) | |
print(env.observation_space) | |
while True: | |
env.render() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment