Skip to content

Instantly share code, notes, and snippets.

@heiner
Last active June 18, 2021 10:44
Show Gist options
  • Select an option

  • Save heiner/6bb0b0c8629fce26b628e76afbe7356d to your computer and use it in GitHub Desktop.

Select an option

Save heiner/6bb0b0c8629fce26b628e76afbe7356d to your computer and use it in GitHub Desktop.
class HardCartPole:
def __init__(self):
self._env = gym.make("CartPole-v0")
self._reward = 0.0
def reset(self):
self._reward = 0.0
return self._env.reset()
def step(self, action):
obs, reward, done, info = self._env.step(action)
self._reward += reward
reward = 0.0
if done:
reward = self._reward
return obs, reward, done, info
def render(self):
return self._env.render()
class MarshmallowEnv:
def __init__(self, max_episode_steps=200):
self._max_episode_steps = max_episode_steps
def reset(self):
self._obs = np.zeros(4)
self._steps = 0
return self._obs
def step(self, action):
done = False
if int(action) == 0:
done = True
self._steps += 1
self._obs[:] = np.log(self._steps + 1)
if self._steps >= self._max_episode_steps:
done = True
reward = float(self._steps) if done else 0.0
return self._obs, reward, done, {}
def render(self):
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment