Created
March 16, 2020 13:45
-
-
Save analyticsindiamagazine/d9d4d7e10c653aadb45f61a8e1b2efab to your computer and use it in GitHub Desktop.
OpenAI Gym - Classic Control BM.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.4" | |
}, | |
"colab": { | |
"name": "OpenAI Gym - Classic Control BM.ipynb", | |
"provenance": [], | |
"include_colab_link": true | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/analyticsindiamagazine/d9d4d7e10c653aadb45f61a8e1b2efab/openai-gym-classic-control-bm.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "PlJhNeqXlZxw", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"import gym\n", | |
"\n", | |
"import matplotlib.pyplot as plt\n", | |
"%matplotlib inline\n", | |
"plt.style.use('fivethirtyeight')\n", | |
"\n", | |
"import pandas as pd\n", | |
"import imageio\n", | |
"import time\n", | |
"import numpy as np\n", | |
"import gym\n", | |
"from stable_baselines.common.vec_env import DummyVecEnv, VecVideoRecorder, SubprocVecEnv\n", | |
"from stable_baselines.ddpg.policies import CnnPolicy, MlpPolicy\n", | |
"from stable_baselines.common.policies import MlpLstmPolicy, CnnLstmPolicy, MlpPolicy\n", | |
"from stable_baselines import A2C, PPO2, SAC, TD3, TRPO, DDPG, ACER, ACKTR, SAC\n", | |
"from stable_baselines.common.evaluation import evaluate_policy\n", | |
"from stable_baselines.common import set_global_seeds\n", | |
"\n", | |
"from stable_baselines.bench import Monitor\n", | |
"from stable_baselines.results_plotter import load_results, ts2xy" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "1IFI9-VWlZx5", | |
"colab_type": "code", | |
"colab": {}, | |
"outputId": "11ab22f5-9ee7-4ba4-b850-64bd7fd58ccc" | |
}, | |
"source": [ | |
"gym.__version__" | |
], | |
"execution_count": 0, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"'0.15.3'" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 2 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "HnakU1vdlZx9", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"def make_env(env_id, rank, seed=0):\n", | |
" \"\"\"\n", | |
" Utility function for multiprocessed env.\n", | |
" \n", | |
" :param env_id: (str) the environment ID\n", | |
" :param num_env: (int) the number of environment you wish to have in subprocesses\n", | |
" :param seed: (int) the inital seed for RNG\n", | |
" :param rank: (int) index of the subprocess\n", | |
" \"\"\"\n", | |
" def _init():\n", | |
" env = gym.make(env_id)\n", | |
" env.seed(seed + rank)\n", | |
" return env\n", | |
" set_global_seeds(seed)\n", | |
" return _init\n", | |
"\n", | |
"\n", | |
"\n", | |
"def evaluate(model, num_steps=1000):\n", | |
" \"\"\"\n", | |
" Evaluate a RL agent\n", | |
" :param model: (BaseRLModel object) the RL Agent\n", | |
" :param num_steps: (int) number of timesteps to evaluate it\n", | |
" :return: (float) Mean reward\n", | |
" \"\"\"\n", | |
" \n", | |
" episode_rewards = [[0.0] for _ in range(env.num_envs)]\n", | |
" obs = env.reset()\n", | |
" for i in range(num_steps):\n", | |
" # _states are only useful when using LSTM policies\n", | |
" actions, _states = model.predict(obs)\n", | |
" # here, action, rewards and dones are arrays\n", | |
" # because we are using vectorized env\n", | |
" obs, rewards, dones, info = env.step(actions)\n", | |
"\n", | |
" # Stats\n", | |
" for i in range(env.num_envs):\n", | |
" episode_rewards[i][-1] += rewards[i]\n", | |
" if dones[i]:\n", | |
" episode_rewards[i].append(0.0)\n", | |
"\n", | |
" mean_rewards = [0.0 for _ in range(env.num_envs)]\n", | |
" n_episodes = 0\n", | |
" for i in range(env.num_envs):\n", | |
" mean_rewards[i] = np.mean(episode_rewards[i]) \n", | |
" n_episodes += len(episode_rewards[i]) \n", | |
"\n", | |
" # Compute mean reward\n", | |
" mean_reward = round(np.mean(mean_rewards), 1)\n", | |
" print(\"Mean reward:\", mean_reward, \"Num episodes:\", n_episodes)\n", | |
"\n", | |
" return mean_reward" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "PDW8hNq_lZyA", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# Create a CNN based policy and optimize it using PPO2.\n", | |
"# ppo_params = {\"gamma\" : 0.99,\n", | |
"# \"n_steps\" : 128,\n", | |
"# \"ent_coef\" : 0.01,\n", | |
"# \"learning_rate\" : 0.00025,\n", | |
"# \"vf_coef\" : 0.5,\n", | |
"# \"max_grad_norm\" : 0.5,\n", | |
"# \"lam\" : 0.95,\n", | |
"# \"nminibatches\" : 4,\n", | |
"# \"noptepochs\" : 4,\n", | |
"# \"cliprange\" :0.2,\n", | |
"# \"cliprange_vf\" : None,\n", | |
"# \"verbose\" : 1,\n", | |
"# \"tensorboard_log\" : None,\n", | |
"# \"_init_setup_model\" : True,\n", | |
"# \"policy_kwargs\" : None,\n", | |
"# \"full_tensorboard_log\" : False,\n", | |
"# \"seed\" : None,\n", | |
"# \"n_cpu_tf_sess\" : None\n", | |
"# }\n", | |
"\n", | |
"# params=ppo_params, " | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "reBxig4blZyD", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"def train_save_agent(model, env, gif_name, time_steps=int(1e4), \n", | |
" save_gif=False):\n", | |
" #use the policy, environment and define params to compile the PPO2 model..\n", | |
"# model = model #PPO2(\"MlpPolicy\", env)#, **params)\n", | |
"\n", | |
" s_time = time.time()\n", | |
" #Train the model\n", | |
" model.learn(total_timesteps=time_steps)\n", | |
" e_time = time.time()\n", | |
" tot_time = e_time - s_time\n", | |
" print(f\"Total Run-Time : , {tot_time : 0.3f} seconds\")\n", | |
"\n", | |
" if save_gif:\n", | |
" ########### Record-GIF ###########\n", | |
" images = []\n", | |
" obs = model.env.reset()\n", | |
" img = model.env.render(mode='rgb_array')\n", | |
" gif_length = 500\n", | |
"\n", | |
" for i in range(gif_length):\n", | |
" images.append(img)\n", | |
" action, _ = model.predict(obs)\n", | |
" obs, _, _ ,_ = model.env.step(action)\n", | |
" img = model.env.render(mode='rgb_array')\n", | |
"\n", | |
" imageio.mimsave(f'{gif_name}-{timesteps}.gif', [np.array(img) for i, img in enumerate(images) if i%2 == 0],\n", | |
" fps=29)\n", | |
" \n", | |
" return model, tot_time" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"scrolled": true, | |
"id": "z87sphMklZyG", | |
"colab_type": "code", | |
"colab": {}, | |
"outputId": "f5fbb6d8-eb0f-4caf-89f6-b8c14ade38ba" | |
}, | |
"source": [ | |
"all_algs = [\"A2C\", \"PPO2\", \"ACER\", \"ACKTR\"]\n", | |
"\n", | |
"# Create log dir\n", | |
"import os\n", | |
"\n", | |
"log_dir = \"/tmp/gym/\"\n", | |
"os.makedirs(log_dir, exist_ok=True)\n", | |
"env_list = [\"Pendulum-v0\", \"MountainCar-v0\", \"Acrobot-v1\", \"CartPole-v1\"]\n", | |
"timesteps = int(1e6)\n", | |
"num_cpu = 4 # Number of processes to use\n", | |
"\n", | |
"\n", | |
"game_df = pd.DataFrame()\n", | |
"\n", | |
"for env_id in env_list:\n", | |
" print(f\"{env_id}......\")\n", | |
"# env = gym.make(env_id) #\n", | |
" # Logs will be saved in log_dir/monitor.csv\n", | |
"# env = Monitor(env, log_dir, allow_early_resets=True)\n", | |
"# env = DummyVecEnv([lambda: gym.make(env_id)])\n", | |
" env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)])\n", | |
" \n", | |
" alg_detail_df = pd.DataFrame()\n", | |
" for alg in all_algs:\n", | |
" if env_id == \"Pendulum-v0\" and alg == 'ACER':\n", | |
" print(f\"{env_id, alg}\")\n", | |
" pass\n", | |
" \n", | |
" else:\n", | |
" print(f'{alg}.....') \n", | |
" model = eval(alg + \"('MlpPolicy', env)\")\n", | |
" tr_model, run_time = train_save_agent(model, env, alg, time_steps=timesteps, save_gif=False)\n", | |
" # mean_reward, std_reward = evaluate_policy(tr_model, tr_model.get_env(), n_eval_episodes=20)\n", | |
" mean_reward = evaluate(tr_model, num_steps=1000)\n", | |
"\n", | |
" alg_detail_df = alg_detail_df.append([[env_id, alg, run_time, mean_reward]]) #, std_reward]])\n", | |
"\n", | |
" print(f\"Mean Reward : {mean_reward} \") #\"| Std_Reward : {std_reward}\")\n", | |
" \n", | |
" game_df = game_df.append(alg_detail_df) #], axis=1)\n", | |
" \n", | |
"game_df.columns = ['Envir', 'Algorithm', 'Run_Time', 'Mean_Rewards'] #, 'Std_Rewards'] " | |
], | |
"execution_count": 0, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Pendulum-v0......\n", | |
"A2C.....\n", | |
"Total Run-Time : , 257.970 seconds\n", | |
"Mean reward: -551.4 Num episodes: 24\n", | |
"Mean Reward : -551.4 \n", | |
"PPO2.....\n", | |
"Total Run-Time : , 338.824 seconds\n", | |
"Mean reward: -939.9 Num episodes: 24\n", | |
"Mean Reward : -939.9 \n", | |
"('Pendulum-v0', 'ACER')\n", | |
"ACKTR.....\n", | |
"Total Run-Time : , 242.154 seconds\n", | |
"Mean reward: -876.8 Num episodes: 24\n", | |
"Mean Reward : -876.8 \n", | |
"MountainCar-v0......\n", | |
"A2C.....\n", | |
"Total Run-Time : , 237.251 seconds\n", | |
"Mean reward: -166.7 Num episodes: 24\n", | |
"Mean Reward : -166.7 \n", | |
"PPO2.....\n", | |
"Total Run-Time : , 690.426 seconds\n", | |
"Mean reward: -166.7 Num episodes: 24\n", | |
"Mean Reward : -166.7 \n", | |
"ACER.....\n", | |
"Total Run-Time : , 346.463 seconds\n", | |
"Mean reward: -166.7 Num episodes: 24\n", | |
"Mean Reward : -166.7 \n", | |
"ACKTR.....\n", | |
"Total Run-Time : , 234.560 seconds\n", | |
"Mean reward: -166.7 Num episodes: 24\n", | |
"Mean Reward : -166.7 \n", | |
"Acrobot-v1......\n", | |
"A2C.....\n", | |
"Total Run-Time : , 290.163 seconds\n", | |
"Mean reward: -89.6 Num episodes: 46\n", | |
"Mean Reward : -89.6 \n", | |
"PPO2.....\n", | |
"Total Run-Time : , 273.282 seconds\n", | |
"Mean reward: -74.9 Num episodes: 53\n", | |
"Mean Reward : -74.9 \n", | |
"ACER.....\n", | |
"Total Run-Time : , 392.469 seconds\n", | |
"Mean reward: -77.8 Num episodes: 51\n", | |
"Mean Reward : -77.8 \n", | |
"ACKTR.....\n", | |
"Total Run-Time : , 253.496 seconds\n", | |
"Mean reward: -72.3 Num episodes: 55\n", | |
"Mean Reward : -72.3 \n", | |
"CartPole-v1......\n", | |
"A2C.....\n", | |
"Total Run-Time : , 215.154 seconds\n", | |
"Mean reward: 333.3 Num episodes: 12\n", | |
"Mean Reward : 333.3 \n", | |
"PPO2.....\n", | |
"Total Run-Time : , 189.328 seconds\n", | |
"Mean reward: 333.3 Num episodes: 12\n", | |
"Mean Reward : 333.3 \n", | |
"ACER.....\n", | |
"Total Run-Time : , 300.396 seconds\n", | |
"Mean reward: 333.3 Num episodes: 12\n", | |
"Mean Reward : 333.3 \n", | |
"ACKTR.....\n", | |
"Total Run-Time : , 189.950 seconds\n", | |
"Mean reward: 333.3 Num episodes: 12\n", | |
"Mean Reward : 333.3 \n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "cBh97yTBlZyJ", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# game_df.to_csv('Runtime_details.csv', index=False) " | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "3iWvj8_LlZyV", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "YBSRmVpRlZyX", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "nTa2uBOnlZya", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Nf7E3NDGlZyc", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment