Skip to content

Instantly share code, notes, and snippets.

@llSourcell
Created December 18, 2020 15:41
Show Gist options
  • Save llSourcell/7c0d06ee611e62173a8af319d5e4c009 to your computer and use it in GitHub Desktop.
Save llSourcell/7c0d06ee611e62173a8af319d5e4c009 to your computer and use it in GitHub Desktop.
replace 'manual_control.py' in the following repo with this version: https://github.com/maximecb/gym-miniworld
#!/usr/bin/env python3
"""
This script allows you to manually control the simulator
using the keyboard arrows.
"""
import sys
import argparse
import pyglet
import math
from pyglet.window import key
from pyglet import clock
import numpy as np
import gym
import gym_miniworld
parser = argparse.ArgumentParser()
parser.add_argument('--env-name', default='MiniWorld-Hallway-v0')
parser.add_argument('--domain-rand', action='store_true', help='enable domain randomization')
parser.add_argument('--no-time-limit', action='store_true', help='ignore time step limits')
parser.add_argument('--top_view', action='store_true', help='show the top view instead of the agent view')
args = parser.parse_args()
env = gym.make(args.env_name)
if args.no_time_limit:
env.max_episode_steps = math.inf
if args.domain_rand:
env.domain_rand = True
view_mode = 'top' if args.top_view else 'agent'
env.reset()
# Create the display window
env.render('pyglet', view=view_mode)
#you can
#replace this learning agent function
#with your own custom RL algorithm
def learningAgent():
#Write Code
#Upload to Github
agent = []
print('hello world')
good_episodes = []
for episode in range(1000):
bad_episodes = []
for t in range(1000):
env.render()
if len(good_episodes) == 0:
action = env.action_space.sample()
else:
action = env.actions.move_forward;
observation, reward, done, info = step(action)
if reward:
good_episodes.append((observation, action, reward))
else:
bad_episodes.append((observation, action, reward))
print(observation)
if done:
print("Episode finished after {} timesteps".format(t+1))
break
def sample_policy(observation):
#add a policy here, this is just a reference.
value1, value2, value3 = observation
return 0 if value1 >= 20 else 1
def step(action):
print('step {}/{}: {}'.format(env.step_count+1, env.max_episode_steps, env.actions(action).name))
obs, reward, done, info = env.step(action)
if reward > 0:
print('reward={:.2f}'.format(reward))
if done:
print('done!')
env.reset()
env.render('pyglet', view=view_mode)
return obs, reward, done, info
@env.unwrapped.window.event
def on_key_press(symbol, modifiers):
"""
This handler processes keyboard commands that
control the simulation
"""
if symbol == key.BACKSPACE or symbol == key.SLASH:
print('RESET')
env.reset()
env.render('pyglet', view=view_mode)
return
if symbol == key.ESCAPE:
env.close()
sys.exit(0)
if symbol == key.UP:
step(env.actions.move_forward)
elif symbol == key.DOWN:
step(env.actions.move_back)
elif symbol == key.LEFT:
step(env.actions.turn_left)
elif symbol == key.RIGHT:
step(env.actions.turn_right)
elif symbol == key.PAGEUP or symbol == key.P:
step(env.actions.pickup)
elif symbol == key.PAGEDOWN or symbol == key.D:
step(env.actions.drop)
elif symbol == key.ENTER:
step(env.actions.done)
@env.unwrapped.window.event
def on_key_release(symbol, modifiers):
pass
@env.unwrapped.window.event
def on_draw():
env.render('pyglet', view=view_mode)
learningAgent()
@env.unwrapped.window.event
def on_close():
pyglet.app.exit()
# Enter main event loop
pyglet.app.run()
env.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment