This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import gym | |
from gym.spaces import Discrete, Box | |
import numpy as np | |
import ray | |
from ray import tune | |
from ray.tune import grid_search | |
from ray.rllib.utils.framework import try_import_tf, try_import_torch | |
from ray.rllib.utils.test_utils import check_learning_achieved |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import os | |
import random | |
import numpy as np | |
import ray | |
FAST = "DRY_RUN" in os.environ | |
if FAST: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Idempotent actor API example. | |
1. Actors go from PENDING -> CREATED -> [RECONSTRUCTING -> CREATED] -> DEAD | |
2. A single client may issue multiple updates, it is important these updates aren't re-ordered. | |
This can be handled by making the actor update calls idempotent. | |
Non-idempotent: | |
def AsyncUpdate(actor_id, actor_state) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class MaskingLayerRNNmodel(TFModelV2): | |
def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kw): | |
super(MaskingLayerRNNmodel, self).__init__(obs_space, action_space, num_outputs, model_config, name, **kw) | |
self.initialize_lstm_with_prev_state = model_config['custom_options']['initialize_lstm_with_prev_state'] | |
self.input_layer = tf.keras.layers.Input( | |
shape=(None, obs_space.shape[0]), | |
name='inputLayer') | |
self.state_in_c = tf.keras.layers.Input( | |
shape=(model_config['lstm_cell_size']), | |
name='c') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class MaskingLayerRNNmodel(TFModelV2): | |
def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kw): | |
super(MaskingLayerRNNmodel, self).__init__(obs_space, action_space, num_outputs, model_config, name, **kw) | |
self.initialize_lstm_with_prev_state = model_config['custom_options']['initialize_lstm_with_prev_state'] | |
self.input_layer = tf.keras.layers.Input( | |
shape=(None, obs_space.shape[0]), | |
name='inputLayer') | |
self.state_in_c = tf.keras.layers.Input( | |
shape=(model_config['lstm_cell_size']), | |
name='c') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import absolute_import | |
from __future__ import division | |
from __future__ import print_function | |
import logging | |
from ray.rllib.agents import with_common_config | |
from ray.rllib.agents.trainer_template import build_trainer | |
from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph | |
from ray.rllib.optimizers import SyncSamplesOptimizer, LocalMultiGPUOptimizer |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@ray.remote | |
class MyEnvActor: | |
def reset(self): | |
return obs, 0, False, {} # dummy vals for all but obs | |
def step(self, action): | |
... | |
return obs, rew, done, info | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Example of a custom gym environment. Run this for a demo.""" | |
from __future__ import absolute_import | |
from __future__ import division | |
from __future__ import print_function | |
import numpy as np | |
import gym | |
from gym.spaces import Discrete, Box, Tuple | |
from gym.envs.registration import EnvSpec |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
First install Ray: http://ray.readthedocs.io/en/latest/installation.html | |
For Ape-X support, you'll want to install the latest version. | |
Run APEX in local mode with 4 workers (to use a GPU, add --resources='{"gpu": 1}') | |
$ cd ray/python/ray/rllib | |
$ ./train.py --env=PongNoFrameskip-v4 --run=APEX --config='{"num_workers": 4, "timesteps_per_iteration": 5000}' | |
Run on a cluster with 1 GPU + 32 workers | |
For cluster setup instructions see http://ray.readthedocs.io/en/latest/autoscaling.html |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Install Ray | |
$ pip install -U ray[rllib] | |
# Go to the rllib scripts directory | |
$ git clone [email protected]:ray-project/ray.git && cd ray/python/ray/rllib | |
# Run in local mode with reduced num workers (to use a GPU, add --resources='{"gpu": 1}') | |
$ ./train.py --env=PongNoFrameskip-v4 --run=APEX --config='{"num_workers": 4, "timesteps_per_iteration": 5000}' | |
# Run on a cluster with 1 GPU + 32 workers |