Skip to content

Instantly share code, notes, and snippets.

View araffin's full-sized avatar

Antonin RAFFIN araffin

View GitHub Profile
import gym
from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines import PPO2
env = DummyVecEnv([lambda: gym.make("Reacher-v2")])
# Automatically normalize the input features
env = VecNormalize(env, norm_obs=True, norm_reward=False,
clip_obs=10.)
from stable_baselines.common.cmd_util import make_atari_env
from stable_baselines.common.policies import CnnPolicy
from stable_baselines import PPO2
# There already exists an environment generator
# that will make and wrap atari environments correctly
env = make_atari_env('DemonAttackNoFrameskip-v4', num_env=8, seed=0)
model = PPO2(CnnPolicy, env, verbose=1)
model.learn(total_timesteps=10000)
import imageio
import numpy as np
from stable_baselines.common.policies import MlpPolicy
from stable_baselines import A2C
model = A2C(MlpPolicy, "LunarLander-v2").learn(100000)
images = []
obs = model.env.reset()
from stable_baselines.common.cmd_util import make_atari_env
from stable_baselines.common.policies import CnnPolicy
from stable_baselines.common.vec_env import VecFrameStack
from stable_baselines import ACER
# There already exists an environment generator
# that will make and wrap atari environments correctly.
# Here we are also multiprocessing training (num_env=4 => 4 processes)
env = make_atari_env('PongNoFrameskip-v4', num_env=4, seed=0)
# Frame-stacking with 4 frames
import os
import gym
import numpy as np
import matplotlib.pyplot as plt
from stable_baselines.common.vec_env.dummy_vec_env import DummyVecEnv
from stable_baselines.bench import Monitor
from stable_baselines.results_plotter import load_results, ts2xy
from stable_baselines import DDPG
@araffin
araffin / one_line_rl.py
Last active September 18, 2018 09:29
Train a RL agent in one line of code!
from stable_baselines import PPO2
# Define and train a model in one line of code !
trained_model = PPO2('MlpPolicy', 'CartPole-v1').learn(total_timesteps=10000)
# you can then access the gym env using trained_model.get_env()
from stable_baselines.common.policies import FeedForwardPolicy
from stable_baselines import A2C
# Custom MLP policy of three layers of size 128 each
class CustomPolicy(FeedForwardPolicy):
def __init__(self, *args, **kwargs):
super(CustomPolicy, self).__init__(*args, **kwargs,
layers=[128, 128, 128],
feature_extraction="mlp")
import gym
import numpy as np
from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import SubprocVecEnv
from stable_baselines.common import set_global_seeds
from stable_baselines import ACKTR
def make_env(env_id, rank, seed=0):
"""
@araffin
araffin / a2c_lunar.py
Last active October 16, 2022 13:53
Training, Saving and Loading an A2C agent
import gym
from stable_baselines import A2C
from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import DummyVecEnv
# Create and wrap the environment
env = gym.make('LunarLander-v2')
env = DummyVecEnv([lambda: env])
@araffin
araffin / demo_baselines.py
Last active April 10, 2020 19:13
Getting Started With Stable Baselines
# from https://github.com/hill-a/stable-baselines
import gym
from stable_baselines.common.policies import MlpPolicy
from stable_baselines import PPO2
env = gym.make('CartPole-v1')
model = PPO2(MlpPolicy, env, verbose=1)
# Train the agent