Skip to content

Instantly share code, notes, and snippets.

View araffin's full-sized avatar

Antonin RAFFIN araffin

View GitHub Profile
from stable_baselines.common.policies import FeedForwardPolicy
from stable_baselines import A2C
# Custom MLP policy of three layers of size 128 each
class CustomPolicy(FeedForwardPolicy):
def __init__(self, *args, **kwargs):
super(CustomPolicy, self).__init__(*args, **kwargs,
layers=[128, 128, 128],
feature_extraction="mlp")
@araffin
araffin / one_line_rl.py
Last active September 18, 2018 09:29
Train a RL agent in one line of code!
from stable_baselines import PPO2
# Define and train a model in one line of code !
trained_model = PPO2('MlpPolicy', 'CartPole-v1').learn(total_timesteps=10000)
# you can then access the gym env using trained_model.get_env()
import os
import gym
import numpy as np
import matplotlib.pyplot as plt
from stable_baselines.common.vec_env.dummy_vec_env import DummyVecEnv
from stable_baselines.bench import Monitor
from stable_baselines.results_plotter import load_results, ts2xy
from stable_baselines import DDPG
from stable_baselines.common.cmd_util import make_atari_env
from stable_baselines.common.policies import CnnPolicy
from stable_baselines.common.vec_env import VecFrameStack
from stable_baselines import ACER
# There already exists an environment generator
# that will make and wrap atari environments correctly.
# Here we are also multiprocessing training (num_env=4 => 4 processes)
env = make_atari_env('PongNoFrameskip-v4', num_env=4, seed=0)
# Frame-stacking with 4 frames
import imageio
import numpy as np
from stable_baselines.common.policies import MlpPolicy
from stable_baselines import A2C
model = A2C(MlpPolicy, "LunarLander-v2").learn(100000)
images = []
obs = model.env.reset()
from stable_baselines.common.cmd_util import make_atari_env
from stable_baselines.common.policies import CnnPolicy
from stable_baselines import PPO2
# There already exists an environment generator
# that will make and wrap atari environments correctly
env = make_atari_env('DemonAttackNoFrameskip-v4', num_env=8, seed=0)
model = PPO2(CnnPolicy, env, verbose=1)
model.learn(total_timesteps=10000)
import gym
from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines import PPO2
env = DummyVecEnv([lambda: gym.make("Reacher-v2")])
# Automatically normalize the input features
env = VecNormalize(env, norm_obs=True, norm_reward=False,
clip_obs=10.)
@araffin
araffin / launch_tensorboard.sh
Created September 18, 2018 09:37
Stable Baselines - Tensorboard integration for RL
tensorboard --logdir /tmp/a2c_cartpole_tensorboard/
import pytest
import numpy as np
from stable_baselines import A2C, ACER, ACKTR, DQN, DDPG, PPO1, PPO2, TRPO
from stable_baselines.common import set_global_seeds
MODEL_LIST_DISCRETE = [
A2C,
ACER,
ACKTR,
@araffin
araffin / RL_CMAES.py
Last active April 19, 2021 08:54
Mixing Reinforcement Learning (RL) and Evolution Strategy (ES) using Stable-Baselines
import gym
import numpy as np
import cma
from collections import OrderedDict
from stable_baselines import A2C
def flatten(params):
"""