Skip to content

Instantly share code, notes, and snippets.

View davidADSP's full-sized avatar

David Foster davidADSP

View GitHub Profile
@davidADSP
davidADSP / train.py
Last active January 23, 2021 20:07
Training a PPO model on Pendulum
import gym
from stable_baselines import PPO1
from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.callbacks import EvalCallback
env = gym.make('Pendulum-v0')
model = PPO1(MlpPolicy, env)
# Separate evaluation env
@davidADSP
davidADSP / selfplay.py
Last active January 23, 2021 22:04
reset wrapper for self-play environment
class SelfPlayEnv(env):
# ...
def reset(self):
super(SelfPlayEnv, self).reset()
self.setup_opponents()
if self.current_player_num != self.agent_player_num:
self.continue_game()
@davidADSP
davidADSP / selfplay.py
Created January 23, 2021 22:05
the continue_game method of the SelfPlayEnv class
class SelfPlayEnv(env):
# ...
def continue_game(self):
while self.current_player_num != self.agent_player_num:
self.render()
action = self.current_agent.choose_action(self, choose_best_action = False, mask_invalid_actions = False)
observation, reward, done, _ = super(SelfPlayEnv, self).step(action)
logger.debug(f'Rewards: {reward}')
logger.debug(f'Done: {done}')
@davidADSP
davidADSP / selfplay.py
Created January 23, 2021 22:06
The step method of the SelfPlayEnv calss
class SelfPlayEnv(env):
# ...
def step(self, action):
self.render()
observation, reward, done, _ = super(SelfPlayEnv, self).step(action)
logger.debug(f'Action played by agent: {action}')
logger.debug(f'Rewards: {reward}')
logger.debug(f'Done: {done}')