Last active
February 5, 2023 02:02
-
-
Save djbyrne/d58aa1abfe0a14e68686b2c514120d49 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Twin Delayed Deep Deterministic Policy Gradient (TD3)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Imports" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import torch\n", | |
"import torch.nn as nn\n", | |
"from torch.autograd import Variable\n", | |
"import torch.nn.functional as F\n", | |
"from tensorboardX import SummaryWriter\n", | |
"\n", | |
"import gym\n", | |
"import roboschool\n", | |
"import sys" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Networks" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def hidden_init(layer):\n", | |
" fan_in = layer.weight.data.size()[0]\n", | |
" lim = 1. / np.sqrt(fan_in)\n", | |
" return (-lim, lim)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"class Actor(nn.Module):\n", | |
" \"\"\"Initialize parameters and build model.\n", | |
" Args:\n", | |
" state_size (int): Dimension of each state\n", | |
" action_size (int): Dimension of each action\n", | |
" max_action (float): highest action to take\n", | |
" seed (int): Random seed\n", | |
" h1_units (int): Number of nodes in first hidden layer\n", | |
" h2_units (int): Number of nodes in second hidden layer\n", | |
" \n", | |
" Return:\n", | |
" action output of network with tanh activation\n", | |
" \"\"\"\n", | |
" \n", | |
" def __init__(self, state_dim, action_dim, max_action):\n", | |
" super(Actor, self).__init__()\n", | |
"\n", | |
" self.l1 = nn.Linear(state_dim, 400)\n", | |
" self.l2 = nn.Linear(400, 300)\n", | |
" self.l3 = nn.Linear(300, action_dim)\n", | |
"\n", | |
" self.max_action = max_action\n", | |
"\n", | |
"\n", | |
" def forward(self, x):\n", | |
" x = F.relu(self.l1(x))\n", | |
" x = F.relu(self.l2(x))\n", | |
" x = self.max_action * torch.tanh(self.l3(x)) \n", | |
" return x\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"class Critic(nn.Module):\n", | |
" \"\"\"Initialize parameters and build model.\n", | |
" Args:\n", | |
" state_size (int): Dimension of each state\n", | |
" action_size (int): Dimension of each action\n", | |
" max_action (float): highest action to take\n", | |
" seed (int): Random seed\n", | |
" h1_units (int): Number of nodes in first hidden layer\n", | |
" h2_units (int): Number of nodes in second hidden layer\n", | |
" \n", | |
" Return:\n", | |
" value output of network \n", | |
" \"\"\"\n", | |
" \n", | |
" def __init__(self, state_dim, action_dim):\n", | |
" super(Critic, self).__init__()\n", | |
"\n", | |
" # Q1 architecture\n", | |
" self.l1 = nn.Linear(state_dim + action_dim, 400)\n", | |
" self.l2 = nn.Linear(400, 300)\n", | |
" self.l3 = nn.Linear(300, 1)\n", | |
"\n", | |
" # Q2 architecture\n", | |
" self.l4 = nn.Linear(state_dim + action_dim, 400)\n", | |
" self.l5 = nn.Linear(400, 300)\n", | |
" self.l6 = nn.Linear(300, 1)\n", | |
"\n", | |
"\n", | |
" def forward(self, x, u):\n", | |
" xu = torch.cat([x, u], 1)\n", | |
"\n", | |
" x1 = F.relu(self.l1(xu))\n", | |
" x1 = F.relu(self.l2(x1))\n", | |
" x1 = self.l3(x1)\n", | |
"\n", | |
" x2 = F.relu(self.l4(xu))\n", | |
" x2 = F.relu(self.l5(x2))\n", | |
" x2 = self.l6(x2)\n", | |
" return x1, x2\n", | |
"\n", | |
"\n", | |
" def Q1(self, x, u):\n", | |
" xu = torch.cat([x, u], 1)\n", | |
"\n", | |
" x1 = F.relu(self.l1(xu))\n", | |
" x1 = F.relu(self.l2(x1))\n", | |
" x1 = self.l3(x1)\n", | |
" return x1" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Memory" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Code based on: \n", | |
"# https://github.com/openai/baselines/blob/master/baselines/deepq/replay_buffer.py\n", | |
"\n", | |
"# Expects tuples of (state, next_state, action, reward, done)\n", | |
"class ReplayBuffer(object):\n", | |
" \"\"\"Buffer to store tuples of experience replay\"\"\"\n", | |
" \n", | |
" def __init__(self, max_size=1000000):\n", | |
" \"\"\"\n", | |
" Args:\n", | |
" max_size (int): total amount of tuples to store\n", | |
" \"\"\"\n", | |
" \n", | |
" self.storage = []\n", | |
" self.max_size = max_size\n", | |
" self.ptr = 0\n", | |
"\n", | |
" def add(self, data):\n", | |
" \"\"\"Add experience tuples to buffer\n", | |
" \n", | |
" Args:\n", | |
" data (tuple): experience replay tuple\n", | |
" \"\"\"\n", | |
" \n", | |
" if len(self.storage) == self.max_size:\n", | |
" self.storage[int(self.ptr)] = data\n", | |
" self.ptr = (self.ptr + 1) % self.max_size\n", | |
" else:\n", | |
" self.storage.append(data)\n", | |
"\n", | |
" def sample(self, batch_size):\n", | |
" \"\"\"Samples a random amount of experiences from buffer of batch size\n", | |
" \n", | |
" Args:\n", | |
" batch_size (int): size of sample\n", | |
" \"\"\"\n", | |
" \n", | |
" ind = np.random.randint(0, len(self.storage), size=batch_size)\n", | |
" states, actions, next_states, rewards, dones = [], [], [], [], []\n", | |
"\n", | |
" for i in ind: \n", | |
" s, a, s_, r, d = self.storage[i]\n", | |
" states.append(np.array(s, copy=False))\n", | |
" actions.append(np.array(a, copy=False))\n", | |
" next_states.append(np.array(s_, copy=False))\n", | |
" rewards.append(np.array(r, copy=False))\n", | |
" dones.append(np.array(d, copy=False))\n", | |
"\n", | |
" return np.array(states), np.array(actions), np.array(next_states), np.array(rewards).reshape(-1, 1), np.array(dones).reshape(-1, 1)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Agent" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"class TD3(object):\n", | |
" \"\"\"Agent class that handles the training of the networks and provides outputs as actions\n", | |
" \n", | |
" Args:\n", | |
" state_dim (int): state size\n", | |
" action_dim (int): action size\n", | |
" max_action (float): highest action to take\n", | |
" device (device): cuda or cpu to process tensors\n", | |
" env (env): gym environment to use\n", | |
" \n", | |
" \"\"\"\n", | |
" \n", | |
" def __init__(self, state_dim, action_dim, max_action, env):\n", | |
" self.actor = Actor(state_dim, action_dim, max_action).to(device)\n", | |
" self.actor_target = Actor(state_dim, action_dim, max_action).to(device)\n", | |
" self.actor_target.load_state_dict(self.actor.state_dict())\n", | |
" self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), lr=1e-3)\n", | |
"\n", | |
" self.critic = Critic(state_dim, action_dim).to(device)\n", | |
" self.critic_target = Critic(state_dim, action_dim).to(device)\n", | |
" self.critic_target.load_state_dict(self.critic.state_dict())\n", | |
" self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), lr=1e-3)\n", | |
"\n", | |
" self.max_action = max_action\n", | |
" self.env = env\n", | |
"\n", | |
"\n", | |
" \n", | |
" def select_action(self, state, noise=0.1):\n", | |
" \"\"\"Select an appropriate action from the agent policy\n", | |
" \n", | |
" Args:\n", | |
" state (array): current state of environment\n", | |
" noise (float): how much noise to add to acitons\n", | |
" \n", | |
" Returns:\n", | |
" action (float): action clipped within action range\n", | |
" \n", | |
" \"\"\"\n", | |
" \n", | |
" state = torch.FloatTensor(state.reshape(1, -1)).to(device)\n", | |
" \n", | |
" action = self.actor(state).cpu().data.numpy().flatten()\n", | |
" if noise != 0: \n", | |
" action = (action + np.random.normal(0, noise, size=self.env.action_space.shape[0]))\n", | |
" \n", | |
" return action.clip(self.env.action_space.low, self.env.action_space.high)\n", | |
"\n", | |
" \n", | |
" def train(self, replay_buffer, iterations, batch_size=100, discount=0.99, tau=0.005, policy_noise=0.2, noise_clip=0.5, policy_freq=2):\n", | |
" \"\"\"Train and update actor and critic networks\n", | |
" \n", | |
" Args:\n", | |
" replay_buffer (ReplayBuffer): buffer for experience replay\n", | |
" iterations (int): how many times to run training\n", | |
" batch_size(int): batch size to sample from replay buffer\n", | |
" discount (float): discount factor\n", | |
" tau (float): soft update for main networks to target networks\n", | |
" \n", | |
" Return:\n", | |
" actor_loss (float): loss from actor network\n", | |
" critic_loss (float): loss from critic network\n", | |
" \n", | |
" \"\"\"\n", | |
" \n", | |
" for it in range(iterations):\n", | |
"\n", | |
" # Sample replay buffer \n", | |
" x, y, u, r, d = replay_buffer.sample(batch_size)\n", | |
" state = torch.FloatTensor(x).to(device)\n", | |
" action = torch.FloatTensor(u).to(device)\n", | |
" next_state = torch.FloatTensor(y).to(device)\n", | |
" done = torch.FloatTensor(1 - d).to(device)\n", | |
" reward = torch.FloatTensor(r).to(device)\n", | |
"\n", | |
" # Select action according to policy and add clipped noise \n", | |
" noise = torch.FloatTensor(u).data.normal_(0, policy_noise).to(device)\n", | |
" noise = noise.clamp(-noise_clip, noise_clip)\n", | |
" next_action = (self.actor_target(next_state) + noise).clamp(-self.max_action, self.max_action)\n", | |
"\n", | |
" # Compute the target Q value\n", | |
" target_Q1, target_Q2 = self.critic_target(next_state, next_action)\n", | |
" target_Q = torch.min(target_Q1, target_Q2)\n", | |
" target_Q = reward + (done * discount * target_Q).detach()\n", | |
"\n", | |
" # Get current Q estimates\n", | |
" current_Q1, current_Q2 = self.critic(state, action)\n", | |
"\n", | |
" # Compute critic loss\n", | |
" critic_loss = F.mse_loss(current_Q1, target_Q) + F.mse_loss(current_Q2, target_Q) \n", | |
"\n", | |
" # Optimize the critic\n", | |
" self.critic_optimizer.zero_grad()\n", | |
" critic_loss.backward()\n", | |
" self.critic_optimizer.step()\n", | |
"\n", | |
" # Delayed policy updates\n", | |
" if it % policy_freq == 0:\n", | |
"\n", | |
" # Compute actor loss\n", | |
" actor_loss = -self.critic.Q1(state, self.actor(state)).mean()\n", | |
"\n", | |
" # Optimize the actor \n", | |
" self.actor_optimizer.zero_grad()\n", | |
" actor_loss.backward()\n", | |
" self.actor_optimizer.step()\n", | |
"\n", | |
" # Update the frozen target models\n", | |
" for param, target_param in zip(self.critic.parameters(), self.critic_target.parameters()):\n", | |
" target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data)\n", | |
"\n", | |
" for param, target_param in zip(self.actor.parameters(), self.actor_target.parameters()):\n", | |
" target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data)\n", | |
"\n", | |
"\n", | |
" def save(self, filename, directory):\n", | |
" torch.save(self.actor.state_dict(), '%s/%s_actor.pth' % (directory, filename))\n", | |
" torch.save(self.critic.state_dict(), '%s/%s_critic.pth' % (directory, filename))\n", | |
"\n", | |
"\n", | |
" def load(self, filename=\"best_avg\", directory=\"./saves\"):\n", | |
" self.actor.load_state_dict(torch.load('%s/%s_actor.pth' % (directory, filename)))\n", | |
" self.critic.load_state_dict(torch.load('%s/%s_critic.pth' % (directory, filename)))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Runner" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"class Runner():\n", | |
" \"\"\"Carries out the environment steps and adds experiences to memory\"\"\"\n", | |
" \n", | |
" def __init__(self, env, agent, replay_buffer):\n", | |
" \n", | |
" self.env = env\n", | |
" self.agent = agent\n", | |
" self.replay_buffer = replay_buffer\n", | |
" self.obs = env.reset()\n", | |
" self.done = False\n", | |
" \n", | |
" def next_step(self, episode_timesteps, noise=0.1):\n", | |
" \n", | |
" action = self.agent.select_action(np.array(self.obs), noise=0.1)\n", | |
" \n", | |
" # Perform action\n", | |
" new_obs, reward, done, _ = self.env.step(action) \n", | |
" done_bool = 0 if episode_timesteps + 1 == 200 else float(done)\n", | |
" \n", | |
" # Store data in replay buffer\n", | |
" replay_buffer.add((self.obs, new_obs, action, reward, done_bool))\n", | |
" \n", | |
" self.obs = new_obs\n", | |
" \n", | |
" if done:\n", | |
" self.obs = self.env.reset()\n", | |
" done = False\n", | |
" \n", | |
" return reward, True\n", | |
" \n", | |
" return reward, done" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Evaluate" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def evaluate_policy(policy, env, eval_episodes=100,render=False):\n", | |
" \"\"\"run several episodes using the best agent policy\n", | |
" \n", | |
" Args:\n", | |
" policy (agent): agent to evaluate\n", | |
" env (env): gym environment\n", | |
" eval_episodes (int): how many test episodes to run\n", | |
" render (bool): show training\n", | |
" \n", | |
" Returns:\n", | |
" avg_reward (float): average reward over the number of evaluations\n", | |
" \n", | |
" \"\"\"\n", | |
" \n", | |
" avg_reward = 0.\n", | |
" for i in range(eval_episodes):\n", | |
" obs = env.reset()\n", | |
" done = False\n", | |
" while not done:\n", | |
" if render:\n", | |
" env.render()\n", | |
" action = policy.select_action(np.array(obs), noise=0)\n", | |
" obs, reward, done, _ = env.step(action)\n", | |
" avg_reward += reward\n", | |
"\n", | |
" avg_reward /= eval_episodes\n", | |
"\n", | |
" print(\"\\n---------------------------------------\")\n", | |
" print(\"Evaluation over {:d} episodes: {:f}\" .format(eval_episodes, avg_reward))\n", | |
" print(\"---------------------------------------\")\n", | |
" return avg_reward" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Observation" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def observe(env,replay_buffer, observation_steps):\n", | |
" \"\"\"run episodes while taking random actions and filling replay_buffer\n", | |
" \n", | |
" Args:\n", | |
" env (env): gym environment\n", | |
" replay_buffer(ReplayBuffer): buffer to store experience replay\n", | |
" observation_steps (int): how many steps to observe for\n", | |
" \n", | |
" \"\"\"\n", | |
" \n", | |
" time_steps = 0\n", | |
" obs = env.reset()\n", | |
" done = False\n", | |
"\n", | |
" while time_steps < observation_steps:\n", | |
" action = env.action_space.sample()\n", | |
" new_obs, reward, done, _ = env.step(action)\n", | |
"\n", | |
" replay_buffer.add((obs, new_obs, action, reward, done))\n", | |
"\n", | |
" obs = new_obs\n", | |
" time_steps += 1\n", | |
"\n", | |
" if done:\n", | |
" obs = env.reset()\n", | |
" done = False\n", | |
"\n", | |
" print(\"\\rPopulating Buffer {}/{}.\".format(time_steps, observation_steps), end=\"\")\n", | |
" sys.stdout.flush()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Train" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def train(agent, test_env):\n", | |
" \"\"\"Train the agent for exploration steps\n", | |
" \n", | |
" Args:\n", | |
" agent (Agent): agent to use\n", | |
" env (environment): gym environment\n", | |
" writer (SummaryWriter): tensorboard writer\n", | |
" exploration (int): how many training steps to run\n", | |
" \n", | |
" \"\"\"\n", | |
"\n", | |
" total_timesteps = 0\n", | |
" timesteps_since_eval = 0\n", | |
" episode_num = 0\n", | |
" episode_reward = 0\n", | |
" episode_timesteps = 0\n", | |
" done = False \n", | |
" obs = env.reset()\n", | |
" evaluations = []\n", | |
" rewards = []\n", | |
" best_avg = -2000\n", | |
" \n", | |
" writer = SummaryWriter(comment=\"-TD3_Baseline_HalfCheetah\")\n", | |
" \n", | |
" while total_timesteps < EXPLORATION:\n", | |
" \n", | |
" if done: \n", | |
"\n", | |
" if total_timesteps != 0: \n", | |
" rewards.append(episode_reward)\n", | |
" avg_reward = np.mean(rewards[-100:])\n", | |
" \n", | |
" writer.add_scalar(\"avg_reward\", avg_reward, total_timesteps)\n", | |
" writer.add_scalar(\"reward_step\", reward, total_timesteps)\n", | |
" writer.add_scalar(\"episode_reward\", episode_reward, total_timesteps)\n", | |
" \n", | |
" if best_avg < avg_reward:\n", | |
" best_avg = avg_reward\n", | |
" print(\"saving best model....\\n\")\n", | |
" agent.save(\"best_avg\",\"saves\")\n", | |
"\n", | |
" print(\"\\rTotal T: {:d} Episode Num: {:d} Reward: {:f} Avg Reward: {:f}\".format(\n", | |
" total_timesteps, episode_num, episode_reward, avg_reward), end=\"\")\n", | |
" sys.stdout.flush()\n", | |
"\n", | |
"\n", | |
" if avg_reward >= REWARD_THRESH:\n", | |
" break\n", | |
"\n", | |
" agent.train(replay_buffer, episode_timesteps, BATCH_SIZE, GAMMA, TAU, NOISE, NOISE_CLIP, POLICY_FREQUENCY)\n", | |
"\n", | |
" # Evaluate episode\n", | |
"# if timesteps_since_eval >= EVAL_FREQUENCY:\n", | |
"# timesteps_since_eval %= EVAL_FREQUENCY\n", | |
"# eval_reward = evaluate_policy(agent, test_env)\n", | |
"# evaluations.append(avg_reward)\n", | |
"# writer.add_scalar(\"eval_reward\", eval_reward, total_timesteps)\n", | |
"\n", | |
"# if best_avg < eval_reward:\n", | |
"# best_avg = eval_reward\n", | |
"# print(\"saving best model....\\n\")\n", | |
"# agent.save(\"best_avg\",\"saves\")\n", | |
"\n", | |
" episode_reward = 0\n", | |
" episode_timesteps = 0\n", | |
" episode_num += 1 \n", | |
"\n", | |
" reward, done = runner.next_step(episode_timesteps)\n", | |
" episode_reward += reward\n", | |
"\n", | |
" episode_timesteps += 1\n", | |
" total_timesteps += 1\n", | |
" timesteps_since_eval += 1" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Config" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ENV = \"RoboschoolHalfCheetah-v1\"#\"Pendulum-v0\"\n", | |
"SEED = 0\n", | |
"OBSERVATION = 10000\n", | |
"EXPLORATION = 5000000\n", | |
"BATCH_SIZE = 100\n", | |
"GAMMA = 0.99\n", | |
"TAU = 0.005\n", | |
"NOISE = 0.2\n", | |
"NOISE_CLIP = 0.5\n", | |
"EXPLORE_NOISE = 0.1\n", | |
"POLICY_FREQUENCY = 2\n", | |
"EVAL_FREQUENCY = 5000\n", | |
"REWARD_THRESH = 8000" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Main" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"env = gym.make(ENV)\n", | |
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", | |
"\n", | |
"# Set seeds\n", | |
"env.seed(SEED)\n", | |
"torch.manual_seed(SEED)\n", | |
"np.random.seed(SEED)\n", | |
"\n", | |
"state_dim = env.observation_space.shape[0]\n", | |
"action_dim = env.action_space.shape[0] \n", | |
"max_action = float(env.action_space.high[0])\n", | |
"\n", | |
"policy = TD3(state_dim, action_dim, max_action, env)\n", | |
"\n", | |
"replay_buffer = ReplayBuffer()\n", | |
"\n", | |
"runner = Runner(env, policy, replay_buffer)\n", | |
"\n", | |
"total_timesteps = 0\n", | |
"timesteps_since_eval = 0\n", | |
"episode_num = 0\n", | |
"done = True" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Populating Buffer 10000/10000." | |
] | |
} | |
], | |
"source": [ | |
"# Populate replay buffer\n", | |
"observe(env, replay_buffer, OBSERVATION)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"saving best model....\n", | |
"\n", | |
"Total T: 20 Episode Num: 0 Reward: 16.743121 Avg Reward: 16.743121saving best model....\n", | |
"\n", | |
"Total T: 1514 Episode Num: 58 Reward: 21.769903 Avg Reward: 20.342496saving best model....\n", | |
"\n", | |
"Total T: 1536 Episode Num: 59 Reward: 22.781277 Avg Reward: 20.383142saving best model....\n", | |
"\n", | |
"Total T: 1582 Episode Num: 61 Reward: 17.289137 Avg Reward: 20.385368saving best model....\n", | |
"\n", | |
"Total T: 1630 Episode Num: 63 Reward: 19.862503 Avg Reward: 20.485710saving best model....\n", | |
"\n", | |
"Total T: 1650 Episode Num: 64 Reward: 21.866313 Avg Reward: 20.506950saving best model....\n", | |
"\n", | |
"Total T: 1675 Episode Num: 65 Reward: 26.819443 Avg Reward: 20.602593saving best model....\n", | |
"\n", | |
"Total T: 1700 Episode Num: 66 Reward: 23.843442 Avg Reward: 20.650964saving best model....\n", | |
"\n", | |
"Total T: 1726 Episode Num: 67 Reward: 21.144815 Avg Reward: 20.658227saving best model....\n", | |
"\n", | |
"Total T: 1750 Episode Num: 68 Reward: 24.633037 Avg Reward: 20.715833saving best model....\n", | |
"\n", | |
"Total T: 1773 Episode Num: 69 Reward: 21.303502 Avg Reward: 20.724228saving best model....\n", | |
"\n", | |
"Total T: 1796 Episode Num: 70 Reward: 22.716227 Avg Reward: 20.752284saving best model....\n", | |
"\n", | |
"Total T: 1819 Episode Num: 71 Reward: 20.989999 Avg Reward: 20.755586saving best model....\n", | |
"\n", | |
"Total T: 1843 Episode Num: 72 Reward: 23.784038 Avg Reward: 20.797072saving best model....\n", | |
"\n", | |
"Total T: 1865 Episode Num: 73 Reward: 23.528150 Avg Reward: 20.833978saving best model....\n", | |
"\n", | |
"Total T: 1909 Episode Num: 75 Reward: 19.643384 Avg Reward: 20.848522saving best model....\n", | |
"\n", | |
"Total T: 1931 Episode Num: 76 Reward: 24.425013 Avg Reward: 20.894970saving best model....\n", | |
"\n", | |
"Total T: 1996 Episode Num: 79 Reward: 23.952797 Avg Reward: 20.882782saving best model....\n", | |
"\n", | |
"Total T: 2106 Episode Num: 84 Reward: 24.645263 Avg Reward: 20.923955saving best model....\n", | |
"\n", | |
"Total T: 2129 Episode Num: 85 Reward: 23.699468 Avg Reward: 20.956229saving best model....\n", | |
"\n", | |
"Total T: 2156 Episode Num: 86 Reward: 26.974068 Avg Reward: 21.025399saving best model....\n", | |
"\n", | |
"Total T: 2180 Episode Num: 87 Reward: 25.211616 Avg Reward: 21.072970saving best model....\n", | |
"\n", | |
"Total T: 2204 Episode Num: 88 Reward: 23.616154 Avg Reward: 21.101545saving best model....\n", | |
"\n", | |
"Total T: 2232 Episode Num: 89 Reward: 22.625034 Avg Reward: 21.118473saving best model....\n", | |
"\n", | |
"Total T: 2258 Episode Num: 90 Reward: 25.570383 Avg Reward: 21.167395saving best model....\n", | |
"\n", | |
"Total T: 2365 Episode Num: 95 Reward: 20.145823 Avg Reward: 21.152679saving best model....\n", | |
"\n", | |
"Total T: 2387 Episode Num: 96 Reward: 24.185485 Avg Reward: 21.183945saving best model....\n", | |
"\n", | |
"Total T: 2408 Episode Num: 97 Reward: 22.592442 Avg Reward: 21.198317saving best model....\n", | |
"\n", | |
"Total T: 2430 Episode Num: 98 Reward: 21.265422 Avg Reward: 21.198995saving best model....\n", | |
"\n", | |
"Total T: 2451 Episode Num: 99 Reward: 22.979675 Avg Reward: 21.216802saving best model....\n", | |
"\n", | |
"Total T: 2493 Episode Num: 101 Reward: 21.581048 Avg Reward: 21.237202saving best model....\n", | |
"\n", | |
"Total T: 2516 Episode Num: 102 Reward: 18.065544 Avg Reward: 21.451748saving best model....\n", | |
"\n", | |
"Total T: 2549 Episode Num: 103 Reward: 23.354547 Avg Reward: 21.635259saving best model....\n", | |
"\n", | |
"Total T: 2568 Episode Num: 104 Reward: 18.699222 Avg Reward: 21.684347saving best model....\n", | |
"\n", | |
"Total T: 2590 Episode Num: 105 Reward: 23.184260 Avg Reward: 21.863509saving best model....\n", | |
"\n", | |
"Total T: 2607 Episode Num: 106 Reward: 17.421441 Avg Reward: 21.929896saving best model....\n", | |
"\n", | |
"Total T: 2628 Episode Num: 107 Reward: 22.084720 Avg Reward: 22.046856saving best model....\n", | |
"\n", | |
"Total T: 2673 Episode Num: 109 Reward: 24.765999 Avg Reward: 22.084884saving best model....\n", | |
"\n", | |
"Total T: 2693 Episode Num: 110 Reward: 20.889029 Avg Reward: 22.143954saving best model....\n", | |
"\n", | |
"Total T: 2737 Episode Num: 112 Reward: 18.132201 Avg Reward: 22.184750saving best model....\n", | |
"\n", | |
"Total T: 2779 Episode Num: 114 Reward: 20.979700 Avg Reward: 22.232221saving best model....\n", | |
"\n", | |
"Total T: 2800 Episode Num: 115 Reward: 23.987061 Avg Reward: 22.312318saving best model....\n", | |
"\n", | |
"Total T: 2822 Episode Num: 116 Reward: 22.098923 Avg Reward: 22.398968saving best model....\n", | |
"\n", | |
"Total T: 2844 Episode Num: 117 Reward: 20.284459 Avg Reward: 22.443698saving best model....\n", | |
"\n", | |
"Total T: 2865 Episode Num: 118 Reward: 23.519466 Avg Reward: 22.536493saving best model....\n", | |
"\n", | |
"Total T: 8075 Episode Num: 337 Reward: 22.603253 Avg Reward: 22.596228saving best model....\n", | |
"\n", | |
"Total T: 8102 Episode Num: 338 Reward: 24.827601 Avg Reward: 22.665031saving best model....\n", | |
"\n", | |
"Total T: 8125 Episode Num: 339 Reward: 20.482397 Avg Reward: 22.749411saving best model....\n", | |
"\n", | |
"Total T: 8154 Episode Num: 340 Reward: 29.937905 Avg Reward: 22.876361saving best model....\n", | |
"\n", | |
"Total T: 8178 Episode Num: 341 Reward: 21.904997 Avg Reward: 22.932600saving best model....\n", | |
"\n", | |
"Total T: 8205 Episode Num: 342 Reward: 30.164619 Avg Reward: 23.048934saving best model....\n", | |
"\n", | |
"Total T: 8231 Episode Num: 343 Reward: 22.462488 Avg Reward: 23.131638saving best model....\n", | |
"\n", | |
"Total T: 8269 Episode Num: 344 Reward: 38.728877 Avg Reward: 23.355593saving best model....\n", | |
"\n", | |
"Total T: 8323 Episode Num: 345 Reward: 49.055836 Avg Reward: 23.680787saving best model....\n", | |
"\n", | |
"Total T: 8353 Episode Num: 346 Reward: 29.906956 Avg Reward: 23.817346saving best model....\n", | |
"\n", | |
"Total T: 8400 Episode Num: 347 Reward: 42.409426 Avg Reward: 24.036897saving best model....\n", | |
"\n", | |
"Total T: 8422 Episode Num: 348 Reward: 17.867632 Avg Reward: 24.079333saving best model....\n", | |
"\n", | |
"Total T: 8444 Episode Num: 349 Reward: 17.905481 Avg Reward: 24.098456saving best model....\n", | |
"\n", | |
"Total T: 8473 Episode Num: 350 Reward: 27.287787 Avg Reward: 24.234559saving best model....\n", | |
"\n", | |
"Total T: 8535 Episode Num: 351 Reward: 38.570854 Avg Reward: 24.444101saving best model....\n", | |
"\n", | |
"Total T: 8579 Episode Num: 352 Reward: 33.785010 Avg Reward: 24.617698saving best model....\n", | |
"\n", | |
"Total T: 8608 Episode Num: 353 Reward: 29.897824 Avg Reward: 24.753943saving best model....\n", | |
"\n", | |
"Total T: 8641 Episode Num: 354 Reward: 36.745220 Avg Reward: 24.953284saving best model....\n", | |
"\n", | |
"Total T: 8689 Episode Num: 356 Reward: 16.709570 Avg Reward: 25.022410saving best model....\n", | |
"\n", | |
"Total T: 8728 Episode Num: 357 Reward: 42.953937 Avg Reward: 25.276442saving best model....\n", | |
"\n", | |
"Total T: 8753 Episode Num: 358 Reward: 20.088823 Avg Reward: 25.306779saving best model....\n", | |
"\n", | |
"Total T: 8780 Episode Num: 359 Reward: 22.650081 Avg Reward: 25.323501saving best model....\n", | |
"\n", | |
"Total T: 8808 Episode Num: 360 Reward: 27.804353 Avg Reward: 25.459005saving best model....\n", | |
"\n", | |
"Total T: 8841 Episode Num: 361 Reward: 40.678614 Avg Reward: 25.564849saving best model....\n", | |
"\n", | |
"Total T: 8870 Episode Num: 362 Reward: 34.146334 Avg Reward: 25.747527saving best model....\n", | |
"\n", | |
"Total T: 8899 Episode Num: 363 Reward: 22.897403 Avg Reward: 25.807958saving best model....\n", | |
"\n", | |
"Total T: 8935 Episode Num: 364 Reward: 27.907736 Avg Reward: 25.837514saving best model....\n", | |
"\n", | |
"Total T: 9021 Episode Num: 365 Reward: 43.411862 Avg Reward: 26.118753saving best model....\n", | |
"\n", | |
"Total T: 9060 Episode Num: 366 Reward: 43.043610 Avg Reward: 26.442973saving best model....\n", | |
"\n", | |
"Total T: 9110 Episode Num: 368 Reward: 21.099034 Avg Reward: 26.331570saving best model....\n", | |
"\n", | |
"Total T: 9153 Episode Num: 369 Reward: 44.194996 Avg Reward: 26.624449saving best model....\n", | |
"\n", | |
"Total T: 9193 Episode Num: 370 Reward: 32.725990 Avg Reward: 26.745011saving best model....\n", | |
"\n", | |
"Total T: 9225 Episode Num: 371 Reward: 29.689769 Avg Reward: 26.786023saving best model....\n", | |
"\n", | |
"Total T: 9271 Episode Num: 372 Reward: 25.130714 Avg Reward: 26.820340saving best model....\n", | |
"\n", | |
"Total T: 9302 Episode Num: 373 Reward: 33.233439 Avg Reward: 26.936862saving best model....\n", | |
"\n", | |
"Total T: 9336 Episode Num: 374 Reward: 32.769324 Avg Reward: 27.026555saving best model....\n", | |
"\n", | |
"Total T: 9368 Episode Num: 375 Reward: 26.749514 Avg Reward: 27.116073saving best model....\n", | |
"\n", | |
"Total T: 9515 Episode Num: 376 Reward: 88.880520 Avg Reward: 27.797828saving best model....\n", | |
"\n", | |
"Total T: 9542 Episode Num: 377 Reward: 25.058426 Avg Reward: 27.841152saving best model....\n", | |
"\n", | |
"Total T: 9655 Episode Num: 378 Reward: 86.434673 Avg Reward: 28.506462saving best model....\n", | |
"\n", | |
"Total T: 9682 Episode Num: 379 Reward: 21.743220 Avg Reward: 28.511879saving best model....\n", | |
"\n", | |
"Total T: 9734 Episode Num: 380 Reward: 48.613742 Avg Reward: 28.727580saving best model....\n", | |
"\n", | |
"Total T: 9806 Episode Num: 381 Reward: 28.639700 Avg Reward: 28.733419saving best model....\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Total T: 10769 Episode Num: 382 Reward: 539.815619 Avg Reward: 33.930442saving best model....\n", | |
"\n", | |
"Total T: 11769 Episode Num: 383 Reward: 566.808983 Avg Reward: 39.388113saving best model....\n", | |
"\n", | |
"Total T: 11853 Episode Num: 384 Reward: 67.279419 Avg Reward: 39.876748saving best model....\n", | |
"\n", | |
"Total T: 11957 Episode Num: 385 Reward: 71.503203 Avg Reward: 40.310240saving best model....\n", | |
"\n", | |
"Total T: 12046 Episode Num: 386 Reward: 77.520739 Avg Reward: 40.875106saving best model....\n", | |
"\n", | |
"Total T: 12429 Episode Num: 387 Reward: 246.536070 Avg Reward: 43.146199saving best model....\n", | |
"\n", | |
"Total T: 13429 Episode Num: 388 Reward: 576.103617 Avg Reward: 48.686063saving best model....\n", | |
"\n", | |
"Total T: 13577 Episode Num: 391 Reward: 21.495391 Avg Reward: 48.472590saving best model....\n", | |
"\n", | |
"Total T: 13650 Episode Num: 392 Reward: 54.999420 Avg Reward: 48.824773saving best model....\n", | |
"\n", | |
"Total T: 14206 Episode Num: 393 Reward: 205.837476 Avg Reward: 50.679812saving best model....\n", | |
"\n", | |
"Total T: 14278 Episode Num: 394 Reward: 65.735856 Avg Reward: 51.097288saving best model....\n", | |
"\n", | |
"Total T: 14353 Episode Num: 395 Reward: 61.474123 Avg Reward: 51.429518saving best model....\n", | |
"\n", | |
"Total T: 14408 Episode Num: 396 Reward: 52.433851 Avg Reward: 51.622611saving best model....\n", | |
"\n", | |
"Total T: 14460 Episode Num: 397 Reward: 45.060072 Avg Reward: 51.838137saving best model....\n", | |
"\n", | |
"Total T: 14549 Episode Num: 398 Reward: 72.634526 Avg Reward: 52.288803saving best model....\n", | |
"\n", | |
"Total T: 14630 Episode Num: 399 Reward: 76.795412 Avg Reward: 52.838007saving best model....\n", | |
"\n", | |
"Total T: 14690 Episode Num: 400 Reward: 62.358343 Avg Reward: 53.129828saving best model....\n", | |
"\n", | |
"Total T: 14767 Episode Num: 401 Reward: 62.494705 Avg Reward: 53.537558saving best model....\n", | |
"\n", | |
"Total T: 15825 Episode Num: 403 Reward: 9.949623 Avg Reward: 53.7445628saving best model....\n", | |
"\n", | |
"Total T: 15975 Episode Num: 404 Reward: 93.993168 Avg Reward: 54.462463saving best model....\n", | |
"\n", | |
"Total T: 16053 Episode Num: 405 Reward: 77.928995 Avg Reward: 54.958081saving best model....\n", | |
"\n", | |
"Total T: 16120 Episode Num: 406 Reward: 58.549251 Avg Reward: 55.245463saving best model....\n", | |
"\n", | |
"Total T: 16500 Episode Num: 407 Reward: 117.493571 Avg Reward: 55.933688saving best model....\n", | |
"\n", | |
"Total T: 16703 Episode Num: 408 Reward: 86.696944 Avg Reward: 56.529788saving best model....\n", | |
"\n", | |
"Total T: 16845 Episode Num: 409 Reward: 91.451277 Avg Reward: 57.088445saving best model....\n", | |
"\n", | |
"Total T: 17246 Episode Num: 413 Reward: 33.851439 Avg Reward: 57.3138992saving best model....\n", | |
"\n", | |
"Total T: 19487 Episode Num: 457 Reward: 57.267046 Avg Reward: 57.9176036saving best model....\n", | |
"\n", | |
"Total T: 19537 Episode Num: 458 Reward: 51.000588 Avg Reward: 58.226720saving best model....\n", | |
"\n", | |
"Total T: 19585 Episode Num: 459 Reward: 50.714799 Avg Reward: 58.507367saving best model....\n", | |
"\n", | |
"Total T: 19623 Episode Num: 460 Reward: 40.609265 Avg Reward: 58.635417saving best model....\n", | |
"\n", | |
"Total T: 19722 Episode Num: 461 Reward: 69.569158 Avg Reward: 58.924322saving best model....\n", | |
"\n", | |
"Total T: 20143 Episode Num: 462 Reward: 201.738595 Avg Reward: 60.600245saving best model....\n", | |
"\n", | |
"Total T: 20196 Episode Num: 463 Reward: 54.189743 Avg Reward: 60.913168saving best model....\n", | |
"\n", | |
"Total T: 20256 Episode Num: 464 Reward: 57.838426 Avg Reward: 61.212475saving best model....\n", | |
"\n", | |
"Total T: 20407 Episode Num: 465 Reward: 101.113747 Avg Reward: 61.789494saving best model....\n", | |
"\n", | |
"Total T: 20466 Episode Num: 466 Reward: 65.217016 Avg Reward: 62.011228saving best model....\n", | |
"\n", | |
"Total T: 20550 Episode Num: 467 Reward: 84.866026 Avg Reward: 62.606770saving best model....\n", | |
"\n", | |
"Total T: 20643 Episode Num: 468 Reward: 91.502709 Avg Reward: 63.310807saving best model....\n", | |
"\n", | |
"Total T: 20744 Episode Num: 469 Reward: 86.479545 Avg Reward: 63.733653saving best model....\n", | |
"\n", | |
"Total T: 20822 Episode Num: 470 Reward: 73.225521 Avg Reward: 64.138648saving best model....\n", | |
"\n", | |
"Total T: 20898 Episode Num: 471 Reward: 75.831572 Avg Reward: 64.600066saving best model....\n", | |
"\n", | |
"Total T: 21136 Episode Num: 472 Reward: 148.298909 Avg Reward: 65.831748saving best model....\n", | |
"\n", | |
"Total T: 21246 Episode Num: 473 Reward: 59.654322 Avg Reward: 66.095957saving best model....\n", | |
"\n", | |
"Total T: 21366 Episode Num: 474 Reward: 47.413121 Avg Reward: 66.242395saving best model....\n", | |
"\n", | |
"Total T: 21555 Episode Num: 476 Reward: 40.351045 Avg Reward: 66.447763saving best model....\n", | |
"\n", | |
"Total T: 21694 Episode Num: 477 Reward: 96.448310 Avg Reward: 67.161662saving best model....\n", | |
"\n", | |
"Total T: 22016 Episode Num: 478 Reward: 207.061984 Avg Reward: 68.367935saving best model....\n", | |
"\n", | |
"Total T: 22231 Episode Num: 479 Reward: 141.467320 Avg Reward: 69.565176saving best model....\n", | |
"\n", | |
"Total T: 22365 Episode Num: 480 Reward: 102.487831 Avg Reward: 70.103917saving best model....\n", | |
"\n", | |
"Total T: 23637 Episode Num: 484 Reward: 593.477348 Avg Reward: 66.445456saving best model....\n", | |
"\n", | |
"Total T: 24637 Episode Num: 485 Reward: 710.934881 Avg Reward: 72.839772saving best model....\n", | |
"\n", | |
"Total T: 27672 Episode Num: 517 Reward: 62.596648 Avg Reward: 73.0511290saving best model....\n", | |
"\n", | |
"Total T: 27734 Episode Num: 518 Reward: 67.672126 Avg Reward: 73.781579saving best model....\n", | |
"\n", | |
"Total T: 27785 Episode Num: 519 Reward: 57.932271 Avg Reward: 74.421600saving best model....\n", | |
"\n", | |
"Total T: 27835 Episode Num: 520 Reward: 55.663959 Avg Reward: 74.996262saving best model....\n", | |
"\n", | |
"Total T: 27933 Episode Num: 521 Reward: 87.078968 Avg Reward: 75.914216saving best model....\n", | |
"\n", | |
"Total T: 27974 Episode Num: 522 Reward: 38.165231 Avg Reward: 76.343365saving best model....\n", | |
"\n", | |
"Total T: 28072 Episode Num: 523 Reward: 90.268251 Avg Reward: 77.319059saving best model....\n", | |
"\n", | |
"Total T: 28155 Episode Num: 524 Reward: 85.209868 Avg Reward: 78.244842saving best model....\n", | |
"\n", | |
"Total T: 28215 Episode Num: 525 Reward: 56.868200 Avg Reward: 78.846508saving best model....\n", | |
"\n", | |
"Total T: 28301 Episode Num: 526 Reward: 78.205942 Avg Reward: 79.673087saving best model....\n", | |
"\n", | |
"Total T: 28390 Episode Num: 527 Reward: 83.921447 Avg Reward: 80.536428saving best model....\n", | |
"\n", | |
"Total T: 28540 Episode Num: 528 Reward: 108.993770 Avg Reward: 81.581168saving best model....\n", | |
"\n", | |
"Total T: 28625 Episode Num: 529 Reward: 76.458615 Avg Reward: 82.325346saving best model....\n", | |
"\n", | |
"Total T: 28698 Episode Num: 530 Reward: 71.806796 Avg Reward: 82.979184saving best model....\n", | |
"\n", | |
"Total T: 28782 Episode Num: 531 Reward: 82.587037 Avg Reward: 83.819181saving best model....\n", | |
"\n", | |
"Total T: 28849 Episode Num: 532 Reward: 68.233095 Avg Reward: 84.466645saving best model....\n", | |
"\n", | |
"Total T: 28920 Episode Num: 533 Reward: 70.993316 Avg Reward: 85.110843saving best model....\n", | |
"\n", | |
"Total T: 29107 Episode Num: 534 Reward: 108.478852 Avg Reward: 86.136762saving best model....\n", | |
"\n", | |
"Total T: 29253 Episode Num: 536 Reward: 77.353549 Avg Reward: 86.674760saving best model....\n", | |
"\n", | |
"Total T: 29641 Episode Num: 537 Reward: 174.417830 Avg Reward: 88.247766saving best model....\n", | |
"\n", | |
"Total T: 29757 Episode Num: 538 Reward: 95.540528 Avg Reward: 88.763552saving best model....\n", | |
"\n", | |
"Total T: 30360 Episode Num: 539 Reward: 367.916024 Avg Reward: 92.314258saving best model....\n", | |
"\n", | |
"Total T: 30465 Episode Num: 540 Reward: 72.477318 Avg Reward: 92.576066saving best model....\n", | |
"\n", | |
"Total T: 30534 Episode Num: 541 Reward: 57.214366 Avg Reward: 93.041586saving best model....\n", | |
"\n", | |
"Total T: 30702 Episode Num: 542 Reward: 132.262688 Avg Reward: 94.028824saving best model....\n", | |
"\n", | |
"Total T: 30822 Episode Num: 543 Reward: 96.035913 Avg Reward: 94.374318saving best model....\n", | |
"\n", | |
"Total T: 32701 Episode Num: 563 Reward: 173.171367 Avg Reward: 92.100857saving best model....\n", | |
"\n", | |
"Total T: 34059 Episode Num: 566 Reward: 90.331956 Avg Reward: 94.6459222saving best model....\n", | |
"\n", | |
"Total T: 35353 Episode Num: 568 Reward: 58.279942 Avg Reward: 96.7399686saving best model....\n", | |
"\n", | |
"Total T: 35695 Episode Num: 569 Reward: 162.680436 Avg Reward: 97.501977saving best model....\n", | |
"\n", | |
"Total T: 37328 Episode Num: 572 Reward: 161.161587 Avg Reward: 99.629027saving best model....\n", | |
"\n", | |
"Total T: 37720 Episode Num: 573 Reward: 204.134413 Avg Reward: 101.073828saving best model....\n", | |
"\n", | |
"Total T: 37932 Episode Num: 574 Reward: 165.014384 Avg Reward: 102.249841saving best model....\n", | |
"\n", | |
"Total T: 38323 Episode Num: 575 Reward: 196.480934 Avg Reward: 103.256492saving best model....\n", | |
"\n", | |
"Total T: 38478 Episode Num: 576 Reward: 108.871301 Avg Reward: 103.941694saving best model....\n", | |
"\n", | |
"Total T: 39478 Episode Num: 577 Reward: 278.518448 Avg Reward: 105.762396saving best model....\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Total T: 39806 Episode Num: 578 Reward: 208.145489 Avg Reward: 105.773231saving best model....\n", | |
"\n", | |
"Total T: 40153 Episode Num: 579 Reward: 188.060161 Avg Reward: 106.239159saving best model....\n", | |
"\n", | |
"Total T: 41153 Episode Num: 580 Reward: 555.741063 Avg Reward: 110.771692saving best model....\n", | |
"\n", | |
"Total T: 42153 Episode Num: 581 Reward: 569.504707 Avg Reward: 115.972529saving best model....\n", | |
"\n", | |
"Total T: 46021 Episode Num: 592 Reward: 198.298114 Avg Reward: 116.316214saving best model....\n", | |
"\n", | |
"Total T: 46418 Episode Num: 593 Reward: 210.076767 Avg Reward: 117.462085saving best model....\n", | |
"\n", | |
"Total T: 46844 Episode Num: 594 Reward: 246.176531 Avg Reward: 119.406595saving best model....\n", | |
"\n", | |
"Total T: 47203 Episode Num: 595 Reward: 209.325910 Avg Reward: 120.992479saving best model....\n", | |
"\n", | |
"Total T: 47523 Episode Num: 596 Reward: 185.038532 Avg Reward: 122.203928saving best model....\n", | |
"\n", | |
"Total T: 47685 Episode Num: 597 Reward: 84.376518 Avg Reward: 122.501402saving best model....\n", | |
"\n", | |
"Total T: 48022 Episode Num: 598 Reward: 197.779652 Avg Reward: 123.807743saving best model....\n", | |
"\n", | |
"Total T: 48373 Episode Num: 599 Reward: 219.670611 Avg Reward: 125.124664saving best model....\n", | |
"\n", | |
"Total T: 48818 Episode Num: 600 Reward: 180.068262 Avg Reward: 125.637730saving best model....\n", | |
"\n", | |
"Total T: 49314 Episode Num: 601 Reward: 232.243027 Avg Reward: 127.367441saving best model....\n", | |
"\n", | |
"Total T: 49668 Episode Num: 602 Reward: 196.362185 Avg Reward: 128.773097saving best model....\n", | |
"\n", | |
"Total T: 50057 Episode Num: 604 Reward: 72.392047 Avg Reward: 128.827502saving best model....\n", | |
"\n", | |
"Total T: 50557 Episode Num: 605 Reward: 163.868441 Avg Reward: 129.844544saving best model....\n", | |
"\n", | |
"Total T: 50787 Episode Num: 606 Reward: 154.744852 Avg Reward: 130.659464saving best model....\n", | |
"\n", | |
"Total T: 51078 Episode Num: 607 Reward: 174.716875 Avg Reward: 131.897640saving best model....\n", | |
"\n", | |
"Total T: 51385 Episode Num: 608 Reward: 187.696860 Avg Reward: 133.279146saving best model....\n", | |
"\n", | |
"Total T: 51704 Episode Num: 609 Reward: 174.131747 Avg Reward: 134.236059saving best model....\n", | |
"\n", | |
"Total T: 51998 Episode Num: 610 Reward: 178.286926 Avg Reward: 135.291577saving best model....\n", | |
"\n", | |
"Total T: 52282 Episode Num: 611 Reward: 175.440940 Avg Reward: 136.289370saving best model....\n", | |
"\n", | |
"Total T: 52632 Episode Num: 612 Reward: 213.872092 Avg Reward: 137.640046saving best model....\n", | |
"\n", | |
"Total T: 52945 Episode Num: 613 Reward: 179.088805 Avg Reward: 138.628769saving best model....\n", | |
"\n", | |
"Total T: 53261 Episode Num: 614 Reward: 180.717978 Avg Reward: 139.967138saving best model....\n", | |
"\n", | |
"Total T: 53673 Episode Num: 615 Reward: 221.511281 Avg Reward: 140.720320saving best model....\n", | |
"\n", | |
"Total T: 54017 Episode Num: 616 Reward: 178.833783 Avg Reward: 141.788851saving best model....\n", | |
"\n", | |
"Total T: 54342 Episode Num: 617 Reward: 168.209854 Avg Reward: 142.844983saving best model....\n", | |
"\n", | |
"Total T: 54676 Episode Num: 618 Reward: 181.625722 Avg Reward: 143.984519saving best model....\n", | |
"\n", | |
"Total T: 55164 Episode Num: 621 Reward: 38.381799 Avg Reward: 144.5121325saving best model....\n", | |
"\n", | |
"Total T: 56164 Episode Num: 622 Reward: 574.840826 Avg Reward: 149.878888saving best model....\n", | |
"\n", | |
"Total T: 58011 Episode Num: 627 Reward: 123.568904 Avg Reward: 152.010689saving best model....\n", | |
"\n", | |
"Total T: 59316 Episode Num: 631 Reward: 75.566247 Avg Reward: 151.8540930saving best model....\n", | |
"\n", | |
"Total T: 59685 Episode Num: 632 Reward: 160.998369 Avg Reward: 152.781745saving best model....\n", | |
"\n", | |
"Total T: 60631 Episode Num: 635 Reward: 92.106910 Avg Reward: 153.2261577saving best model....\n", | |
"\n", | |
"Total T: 94602 Episode Num: 787 Reward: 88.837455 Avg Reward: 153.6093017saving best model....\n", | |
"\n", | |
"Total T: 95602 Episode Num: 788 Reward: 627.306957 Avg Reward: 160.170133saving best model....\n", | |
"\n", | |
"Total T: 95680 Episode Num: 789 Reward: 81.199510 Avg Reward: 161.038349saving best model....\n", | |
"\n", | |
"Total T: 96680 Episode Num: 790 Reward: 726.985955 Avg Reward: 168.351230saving best model....\n", | |
"\n", | |
"Total T: 97027 Episode Num: 791 Reward: 214.464171 Avg Reward: 170.476207saving best model....\n", | |
"\n", | |
"Total T: 97315 Episode Num: 792 Reward: 164.787312 Avg Reward: 172.166890saving best model....\n", | |
"\n", | |
"Total T: 98180 Episode Num: 793 Reward: 499.563703 Avg Reward: 177.180258saving best model....\n", | |
"\n", | |
"Total T: 99180 Episode Num: 794 Reward: 641.903577 Avg Reward: 183.565071saving best model....\n", | |
"\n", | |
"Total T: 100180 Episode Num: 795 Reward: 669.655467 Avg Reward: 190.319001saving best model....\n", | |
"\n", | |
"Total T: 101180 Episode Num: 796 Reward: 752.240968 Avg Reward: 197.920959saving best model....\n", | |
"\n", | |
"Total T: 102180 Episode Num: 797 Reward: 761.280812 Avg Reward: 205.481511saving best model....\n", | |
"\n", | |
"Total T: 103180 Episode Num: 798 Reward: 623.072607 Avg Reward: 211.651707saving best model....\n", | |
"\n", | |
"Total T: 104180 Episode Num: 799 Reward: 782.477656 Avg Reward: 219.570758saving best model....\n", | |
"\n", | |
"Total T: 105180 Episode Num: 800 Reward: 779.456492 Avg Reward: 227.488873saving best model....\n", | |
"\n", | |
"Total T: 106180 Episode Num: 801 Reward: 781.362885 Avg Reward: 235.347343saving best model....\n", | |
"\n", | |
"Total T: 107180 Episode Num: 802 Reward: 775.136123 Avg Reward: 243.149804saving best model....\n", | |
"\n", | |
"Total T: 108180 Episode Num: 803 Reward: 774.094066 Avg Reward: 250.954661saving best model....\n", | |
"\n", | |
"Total T: 108644 Episode Num: 804 Reward: 338.825863 Avg Reward: 254.420116saving best model....\n", | |
"\n", | |
"Total T: 109644 Episode Num: 805 Reward: 613.417764 Avg Reward: 260.636433saving best model....\n", | |
"\n", | |
"Total T: 110080 Episode Num: 806 Reward: 273.777772 Avg Reward: 263.413542saving best model....\n", | |
"\n", | |
"Total T: 111080 Episode Num: 807 Reward: 657.352827 Avg Reward: 270.006090saving best model....\n", | |
"\n", | |
"Total T: 111431 Episode Num: 808 Reward: 226.134011 Avg Reward: 272.280073saving best model....\n", | |
"\n", | |
"Total T: 112431 Episode Num: 809 Reward: 701.014874 Avg Reward: 279.296688saving best model....\n", | |
"\n", | |
"Total T: 113431 Episode Num: 810 Reward: 648.278442 Avg Reward: 285.836800saving best model....\n", | |
"\n", | |
"Total T: 114431 Episode Num: 811 Reward: 671.086582 Avg Reward: 292.577366saving best model....\n", | |
"\n", | |
"Total T: 115431 Episode Num: 812 Reward: 389.517077 Avg Reward: 296.344534saving best model....\n", | |
"\n", | |
"Total T: 116431 Episode Num: 813 Reward: 589.032364 Avg Reward: 302.165522saving best model....\n", | |
"\n", | |
"Total T: 117431 Episode Num: 814 Reward: 296.571765 Avg Reward: 305.048346saving best model....\n", | |
"\n", | |
"Total T: 118033 Episode Num: 815 Reward: 358.732093 Avg Reward: 308.633917saving best model....\n", | |
"\n", | |
"Total T: 119009 Episode Num: 816 Reward: 345.556066 Avg Reward: 312.157789saving best model....\n", | |
"\n", | |
"Total T: 120009 Episode Num: 817 Reward: 428.745061 Avg Reward: 316.325602saving best model....\n", | |
"\n", | |
"Total T: 120811 Episode Num: 818 Reward: 466.206787 Avg Reward: 320.972867saving best model....\n", | |
"\n", | |
"Total T: 121811 Episode Num: 819 Reward: 645.154980 Avg Reward: 327.371958saving best model....\n", | |
"\n", | |
"Total T: 122811 Episode Num: 820 Reward: 749.890390 Avg Reward: 334.831932saving best model....\n", | |
"\n", | |
"Total T: 123811 Episode Num: 821 Reward: 513.603701 Avg Reward: 339.952398saving best model....\n", | |
"\n", | |
"Total T: 124811 Episode Num: 822 Reward: 704.736128 Avg Reward: 347.004822saving best model....\n", | |
"\n", | |
"Total T: 125811 Episode Num: 823 Reward: 596.104607 Avg Reward: 353.004368saving best model....\n", | |
"\n", | |
"Total T: 126811 Episode Num: 824 Reward: 771.726789 Avg Reward: 360.779569saving best model....\n", | |
"\n", | |
"Total T: 127811 Episode Num: 825 Reward: 774.429527 Avg Reward: 368.485279saving best model....\n", | |
"\n", | |
"Total T: 128811 Episode Num: 826 Reward: 776.687635 Avg Reward: 376.129347saving best model....\n", | |
"\n", | |
"Total T: 129811 Episode Num: 827 Reward: 786.188823 Avg Reward: 383.649707saving best model....\n", | |
"\n", | |
"Total T: 130811 Episode Num: 828 Reward: 781.769788 Avg Reward: 391.324391saving best model....\n", | |
"\n", | |
"Total T: 131811 Episode Num: 829 Reward: 778.572348 Avg Reward: 397.996367saving best model....\n", | |
"\n", | |
"Total T: 132811 Episode Num: 830 Reward: 779.322882 Avg Reward: 404.029274saving best model....\n", | |
"\n", | |
"Total T: 133811 Episode Num: 831 Reward: 686.317689 Avg Reward: 410.692927saving best model....\n", | |
"\n", | |
"Total T: 134811 Episode Num: 832 Reward: 624.585270 Avg Reward: 416.750915saving best model....\n", | |
"\n", | |
"Total T: 135811 Episode Num: 833 Reward: 666.012935 Avg Reward: 423.179106saving best model....\n", | |
"\n", | |
"Total T: 136811 Episode Num: 834 Reward: 628.673884 Avg Reward: 429.218067saving best model....\n", | |
"\n", | |
"Total T: 137656 Episode Num: 835 Reward: 508.240021 Avg Reward: 434.041061saving best model....\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Total T: 138656 Episode Num: 836 Reward: 607.065039 Avg Reward: 439.941842saving best model....\n", | |
"\n", | |
"Total T: 139656 Episode Num: 837 Reward: 595.689970 Avg Reward: 445.555158saving best model....\n", | |
"\n", | |
"Total T: 140656 Episode Num: 838 Reward: 732.384281 Avg Reward: 452.465407saving best model....\n", | |
"\n", | |
"Total T: 141531 Episode Num: 839 Reward: 647.656644 Avg Reward: 459.453589saving best model....\n", | |
"\n", | |
"Total T: 142531 Episode Num: 840 Reward: 756.008799 Avg Reward: 465.000707saving best model....\n", | |
"\n", | |
"Total T: 143531 Episode Num: 841 Reward: 695.013929 Avg Reward: 471.457913saving best model....\n", | |
"\n", | |
"Total T: 144531 Episode Num: 842 Reward: 768.923176 Avg Reward: 478.003507saving best model....\n", | |
"\n", | |
"Total T: 145531 Episode Num: 843 Reward: 742.911920 Avg Reward: 483.338175saving best model....\n", | |
"\n", | |
"Total T: 146531 Episode Num: 844 Reward: 762.849011 Avg Reward: 488.611679saving best model....\n", | |
"\n", | |
"Total T: 147531 Episode Num: 845 Reward: 759.828828 Avg Reward: 492.855337saving best model....\n", | |
"\n", | |
"Total T: 148531 Episode Num: 846 Reward: 760.423366 Avg Reward: 498.781393saving best model....\n", | |
"\n", | |
"Total T: 149531 Episode Num: 847 Reward: 693.903102 Avg Reward: 503.894419saving best model....\n", | |
"\n", | |
"Total T: 150531 Episode Num: 848 Reward: 690.529362 Avg Reward: 509.918776saving best model....\n", | |
"\n", | |
"Total T: 151531 Episode Num: 849 Reward: 768.121156 Avg Reward: 516.182598saving best model....\n", | |
"\n", | |
"Total T: 152531 Episode Num: 850 Reward: 768.642842 Avg Reward: 516.759832saving best model....\n", | |
"\n", | |
"Total T: 154531 Episode Num: 852 Reward: 697.973138 Avg Reward: 521.283181saving best model....\n", | |
"\n", | |
"Total T: 155531 Episode Num: 853 Reward: 665.384594 Avg Reward: 523.956814saving best model....\n", | |
"\n", | |
"Total T: 156531 Episode Num: 854 Reward: 762.398970 Avg Reward: 527.921202saving best model....\n", | |
"\n", | |
"Total T: 169213 Episode Num: 871 Reward: 346.901259 Avg Reward: 527.493650saving best model....\n", | |
"\n", | |
"Total T: 171176 Episode Num: 874 Reward: 69.619570 Avg Reward: 531.3691755saving best model....\n", | |
"\n", | |
"Total T: 172176 Episode Num: 875 Reward: 736.132934 Avg Reward: 537.978233saving best model....\n", | |
"\n", | |
"Total T: 173176 Episode Num: 876 Reward: 741.795397 Avg Reward: 544.165684saving best model....\n", | |
"\n", | |
"Total T: 174058 Episode Num: 878 Reward: 180.599964 Avg Reward: 542.819475saving best model....\n", | |
"\n", | |
"Total T: 175058 Episode Num: 879 Reward: 755.526016 Avg Reward: 549.465812saving best model....\n", | |
"\n", | |
"Total T: 176058 Episode Num: 880 Reward: 768.825127 Avg Reward: 555.038449saving best model....\n", | |
"\n", | |
"Total T: 177058 Episode Num: 881 Reward: 783.613216 Avg Reward: 561.374477saving best model....\n", | |
"\n", | |
"Total T: 178058 Episode Num: 882 Reward: 628.747135 Avg Reward: 566.602984saving best model....\n", | |
"\n", | |
"Total T: 179058 Episode Num: 883 Reward: 790.460362 Avg Reward: 573.112686saving best model....\n", | |
"\n", | |
"Total T: 180058 Episode Num: 884 Reward: 752.781964 Avg Reward: 573.258240saving best model....\n", | |
"\n", | |
"Total T: 181058 Episode Num: 885 Reward: 579.352368 Avg Reward: 578.358996saving best model....\n", | |
"\n", | |
"Total T: 182058 Episode Num: 886 Reward: 772.846145 Avg Reward: 585.130737saving best model....\n", | |
"\n", | |
"Total T: 183058 Episode Num: 887 Reward: 791.617293 Avg Reward: 592.158536saving best model....\n", | |
"\n", | |
"Total T: 184058 Episode Num: 888 Reward: 771.091110 Avg Reward: 593.596377saving best model....\n", | |
"\n", | |
"Total T: 186058 Episode Num: 890 Reward: 634.438210 Avg Reward: 599.561875saving best model....\n", | |
"\n", | |
"Total T: 187058 Episode Num: 891 Reward: 592.944176 Avg Reward: 603.346675saving best model....\n", | |
"\n", | |
"Total T: 188058 Episode Num: 892 Reward: 753.950534 Avg Reward: 609.238307saving best model....\n", | |
"\n", | |
"Total T: 189058 Episode Num: 893 Reward: 778.486372 Avg Reward: 612.027534saving best model....\n", | |
"\n", | |
"Total T: 190058 Episode Num: 894 Reward: 729.012056 Avg Reward: 612.898618saving best model....\n", | |
"\n", | |
"Total T: 200832 Episode Num: 905 Reward: 598.843994 Avg Reward: 612.364402saving best model....\n", | |
"\n", | |
"Total T: 207218 Episode Num: 913 Reward: 567.725593 Avg Reward: 611.981382saving best model....\n", | |
"\n", | |
"Total T: 208420 Episode Num: 915 Reward: 142.749539 Avg Reward: 614.458512saving best model....\n", | |
"\n", | |
"Total T: 209420 Episode Num: 916 Reward: 600.423960 Avg Reward: 617.007191saving best model....\n", | |
"\n", | |
"Total T: 210420 Episode Num: 917 Reward: 584.272372 Avg Reward: 618.562464saving best model....\n", | |
"\n", | |
"Total T: 211420 Episode Num: 918 Reward: 631.155074 Avg Reward: 620.211947saving best model....\n", | |
"\n", | |
"Total T: 212420 Episode Num: 919 Reward: 700.045365 Avg Reward: 620.760851saving best model....\n", | |
"\n", | |
"Total T: 213420 Episode Num: 920 Reward: 751.913776 Avg Reward: 620.781085saving best model....\n", | |
"\n", | |
"Total T: 215420 Episode Num: 922 Reward: 638.740870 Avg Reward: 621.963664saving best model....\n", | |
"\n", | |
"Total T: 395578 Episode Num: 1136 Reward: 649.601751 Avg Reward: 621.347656saving best model....\n", | |
"\n", | |
"Total T: 396578 Episode Num: 1137 Reward: 708.990441 Avg Reward: 627.158660saving best model....\n", | |
"\n", | |
"Total T: 397578 Episode Num: 1138 Reward: 644.682781 Avg Reward: 630.052805saving best model....\n", | |
"\n", | |
"Total T: 398578 Episode Num: 1139 Reward: 786.658749 Avg Reward: 632.597688saving best model....\n", | |
"\n", | |
"Total T: 399578 Episode Num: 1140 Reward: 867.085626 Avg Reward: 635.357976saving best model....\n", | |
"\n", | |
"Total T: 400578 Episode Num: 1141 Reward: 779.503252 Avg Reward: 641.919438saving best model....\n", | |
"\n", | |
"Total T: 401578 Episode Num: 1142 Reward: 756.239944 Avg Reward: 644.276590saving best model....\n", | |
"\n", | |
"Total T: 402578 Episode Num: 1143 Reward: 734.565986 Avg Reward: 646.428746saving best model....\n", | |
"\n", | |
"Total T: 403578 Episode Num: 1144 Reward: 777.622045 Avg Reward: 651.599341saving best model....\n", | |
"\n", | |
"Total T: 406578 Episode Num: 1147 Reward: 795.899634 Avg Reward: 654.058628saving best model....\n", | |
"\n", | |
"Total T: 407578 Episode Num: 1148 Reward: 864.715466 Avg Reward: 654.760812saving best model....\n", | |
"\n", | |
"Total T: 408578 Episode Num: 1149 Reward: 764.133676 Avg Reward: 656.128315saving best model....\n", | |
"\n", | |
"Total T: 409578 Episode Num: 1150 Reward: 722.064514 Avg Reward: 657.523010saving best model....\n", | |
"\n", | |
"Total T: 410578 Episode Num: 1151 Reward: 822.624245 Avg Reward: 659.905185saving best model....\n", | |
"\n", | |
"Total T: 411578 Episode Num: 1152 Reward: 728.414011 Avg Reward: 660.737572saving best model....\n", | |
"\n", | |
"Total T: 412578 Episode Num: 1153 Reward: 835.283055 Avg Reward: 662.395637saving best model....\n", | |
"\n", | |
"Total T: 413578 Episode Num: 1154 Reward: 817.009424 Avg Reward: 664.247630saving best model....\n", | |
"\n", | |
"Total T: 414578 Episode Num: 1155 Reward: 811.311305 Avg Reward: 665.597196saving best model....\n", | |
"\n", | |
"Total T: 415578 Episode Num: 1156 Reward: 786.708018 Avg Reward: 666.069587saving best model....\n", | |
"\n", | |
"Total T: 416578 Episode Num: 1157 Reward: 788.983852 Avg Reward: 666.645146saving best model....\n", | |
"\n", | |
"Total T: 417578 Episode Num: 1158 Reward: 661.522098 Avg Reward: 667.019939saving best model....\n", | |
"\n", | |
"Total T: 418578 Episode Num: 1159 Reward: 729.820605 Avg Reward: 668.409907saving best model....\n", | |
"\n", | |
"Total T: 430578 Episode Num: 1171 Reward: 971.458339 Avg Reward: 668.622259saving best model....\n", | |
"\n", | |
"Total T: 431578 Episode Num: 1172 Reward: 911.477856 Avg Reward: 670.595334saving best model....\n", | |
"\n", | |
"Total T: 432578 Episode Num: 1173 Reward: 966.078742 Avg Reward: 673.229036saving best model....\n", | |
"\n", | |
"Total T: 433578 Episode Num: 1174 Reward: 940.914691 Avg Reward: 674.836664saving best model....\n", | |
"\n", | |
"Total T: 434578 Episode Num: 1175 Reward: 820.999144 Avg Reward: 675.333570saving best model....\n", | |
"\n", | |
"Total T: 436578 Episode Num: 1177 Reward: 775.876824 Avg Reward: 676.101941saving best model....\n", | |
"\n", | |
"Total T: 437578 Episode Num: 1178 Reward: 852.367392 Avg Reward: 678.068884saving best model....\n", | |
"\n", | |
"Total T: 438578 Episode Num: 1179 Reward: 797.859466 Avg Reward: 684.013201saving best model....\n", | |
"\n", | |
"Total T: 439578 Episode Num: 1180 Reward: 792.535825 Avg Reward: 690.947694saving best model....\n", | |
"\n", | |
"Total T: 440578 Episode Num: 1181 Reward: 769.837102 Avg Reward: 697.649949saving best model....\n", | |
"\n", | |
"Total T: 441578 Episode Num: 1182 Reward: 766.926211 Avg Reward: 699.189786saving best model....\n", | |
"\n", | |
"Total T: 442578 Episode Num: 1183 Reward: 793.089289 Avg Reward: 705.492816saving best model....\n", | |
"\n", | |
"Total T: 443578 Episode Num: 1184 Reward: 827.894763 Avg Reward: 705.544051saving best model....\n", | |
"\n", | |
"Total T: 445578 Episode Num: 1186 Reward: 683.352853 Avg Reward: 706.873818saving best model....\n", | |
"\n", | |
"Total T: 446578 Episode Num: 1187 Reward: 1018.927876 Avg Reward: 710.924138saving best model....\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Total T: 447578 Episode Num: 1188 Reward: 792.753844 Avg Reward: 711.915058saving best model....\n", | |
"\n", | |
"Total T: 450578 Episode Num: 1191 Reward: 693.729834 Avg Reward: 712.214693saving best model....\n", | |
"\n", | |
"Total T: 451578 Episode Num: 1192 Reward: 815.165723 Avg Reward: 714.397943saving best model....\n", | |
"\n", | |
"Total T: 452578 Episode Num: 1193 Reward: 855.351908 Avg Reward: 719.103338saving best model....\n", | |
"\n", | |
"Total T: 479242 Episode Num: 1225 Reward: 655.502862 Avg Reward: 715.526317saving best model....\n", | |
"\n", | |
"Total T: 480242 Episode Num: 1226 Reward: 792.296167 Avg Reward: 722.797637saving best model....\n", | |
"\n", | |
"Total T: 483242 Episode Num: 1229 Reward: 796.779098 Avg Reward: 724.130064saving best model....\n", | |
"\n", | |
"Total T: 484242 Episode Num: 1230 Reward: 848.743813 Avg Reward: 724.889503saving best model....\n", | |
"\n", | |
"Total T: 485242 Episode Num: 1231 Reward: 922.743257 Avg Reward: 727.974447saving best model....\n", | |
"\n", | |
"Total T: 488242 Episode Num: 1234 Reward: 826.778749 Avg Reward: 730.109409saving best model....\n", | |
"\n", | |
"Total T: 489242 Episode Num: 1235 Reward: 701.188947 Avg Reward: 730.239606saving best model....\n", | |
"\n", | |
"Total T: 490242 Episode Num: 1236 Reward: 816.181276 Avg Reward: 731.905401saving best model....\n", | |
"\n", | |
"Total T: 491242 Episode Num: 1237 Reward: 849.544791 Avg Reward: 733.310945saving best model....\n", | |
"\n", | |
"Total T: 492242 Episode Num: 1238 Reward: 870.492120 Avg Reward: 735.569038saving best model....\n", | |
"\n", | |
"Total T: 493242 Episode Num: 1239 Reward: 867.974516 Avg Reward: 736.382196saving best model....\n", | |
"\n", | |
"Total T: 512242 Episode Num: 1258 Reward: 752.203607 Avg Reward: 736.945031saving best model....\n", | |
"\n", | |
"Total T: 513242 Episode Num: 1259 Reward: 815.165514 Avg Reward: 737.798480saving best model....\n", | |
"\n", | |
"Total T: 514242 Episode Num: 1260 Reward: 852.299877 Avg Reward: 738.815068saving best model....\n", | |
"\n", | |
"Total T: 515242 Episode Num: 1261 Reward: 819.679222 Avg Reward: 739.418269saving best model....\n", | |
"\n", | |
"Total T: 518242 Episode Num: 1264 Reward: 801.780432 Avg Reward: 740.006429saving best model....\n", | |
"\n", | |
"Total T: 519242 Episode Num: 1265 Reward: 926.175743 Avg Reward: 741.087535saving best model....\n", | |
"\n", | |
"Total T: 520242 Episode Num: 1266 Reward: 837.876381 Avg Reward: 742.443381saving best model....\n", | |
"\n", | |
"Total T: 521242 Episode Num: 1267 Reward: 827.456550 Avg Reward: 743.112672saving best model....\n", | |
"\n", | |
"Total T: 522242 Episode Num: 1268 Reward: 963.646703 Avg Reward: 743.741782saving best model....\n", | |
"\n", | |
"Total T: 543242 Episode Num: 1289 Reward: 922.867578 Avg Reward: 741.6915411saving best model....\n", | |
"\n", | |
"Total T: 544242 Episode Num: 1290 Reward: 1057.938789 Avg Reward: 746.300036saving best model....\n", | |
"\n", | |
"Total T: 545242 Episode Num: 1291 Reward: 1037.058065 Avg Reward: 749.733319saving best model....\n", | |
"\n", | |
"Total T: 546242 Episode Num: 1292 Reward: 1041.488839 Avg Reward: 751.996550saving best model....\n", | |
"\n", | |
"Total T: 547242 Episode Num: 1293 Reward: 1074.842421 Avg Reward: 754.191455saving best model....\n", | |
"\n", | |
"Total T: 548242 Episode Num: 1294 Reward: 932.343952 Avg Reward: 754.299039saving best model....\n", | |
"\n", | |
"Total T: 550005 Episode Num: 1296 Reward: 921.352108 Avg Reward: 760.605109saving best model....\n", | |
"\n", | |
"Total T: 551005 Episode Num: 1297 Reward: 952.664393 Avg Reward: 764.054141saving best model....\n", | |
"\n", | |
"Total T: 552005 Episode Num: 1298 Reward: 937.362327 Avg Reward: 765.729038saving best model....\n", | |
"\n", | |
"Total T: 553005 Episode Num: 1299 Reward: 1090.911131 Avg Reward: 772.431448saving best model....\n", | |
"\n", | |
"Total T: 554005 Episode Num: 1300 Reward: 1135.958969 Avg Reward: 781.455767saving best model....\n", | |
"\n", | |
"Total T: 555005 Episode Num: 1301 Reward: 1121.910155 Avg Reward: 792.578165saving best model....\n", | |
"\n", | |
"Total T: 556005 Episode Num: 1302 Reward: 1108.084628 Avg Reward: 800.977057saving best model....\n", | |
"\n", | |
"Total T: 557005 Episode Num: 1303 Reward: 1082.758387 Avg Reward: 811.729111saving best model....\n", | |
"\n", | |
"Total T: 558005 Episode Num: 1304 Reward: 1152.565287 Avg Reward: 823.142004saving best model....\n", | |
"\n", | |
"Total T: 559005 Episode Num: 1305 Reward: 1127.445844 Avg Reward: 830.929281saving best model....\n", | |
"\n", | |
"Total T: 560005 Episode Num: 1306 Reward: 1119.569741 Avg Reward: 840.932692saving best model....\n", | |
"\n", | |
"Total T: 561005 Episode Num: 1307 Reward: 1029.479529 Avg Reward: 850.394571saving best model....\n", | |
"\n", | |
"Total T: 562005 Episode Num: 1308 Reward: 1262.851958 Avg Reward: 855.328429saving best model....\n", | |
"\n", | |
"Total T: 563005 Episode Num: 1309 Reward: 1140.687564 Avg Reward: 859.220277saving best model....\n", | |
"\n", | |
"Total T: 564005 Episode Num: 1310 Reward: 1021.642922 Avg Reward: 863.302316saving best model....\n", | |
"\n", | |
"Total T: 565005 Episode Num: 1311 Reward: 993.327258 Avg Reward: 869.309899saving best model....\n", | |
"\n", | |
"Total T: 586005 Episode Num: 1332 Reward: 1034.832447 Avg Reward: 869.549966saving best model....\n", | |
"\n", | |
"Total T: 587005 Episode Num: 1333 Reward: 902.167605 Avg Reward: 870.772481saving best model....\n", | |
"\n", | |
"Total T: 588005 Episode Num: 1334 Reward: 931.154713 Avg Reward: 871.816241saving best model....\n", | |
"\n", | |
"Total T: 589005 Episode Num: 1335 Reward: 1137.122761 Avg Reward: 876.175579saving best model....\n", | |
"\n", | |
"Total T: 590005 Episode Num: 1336 Reward: 1036.170513 Avg Reward: 878.375471saving best model....\n", | |
"\n", | |
"Total T: 591005 Episode Num: 1337 Reward: 1164.927283 Avg Reward: 881.529296saving best model....\n", | |
"\n", | |
"Total T: 592005 Episode Num: 1338 Reward: 1121.567905 Avg Reward: 884.040054saving best model....\n", | |
"\n", | |
"Total T: 593005 Episode Num: 1339 Reward: 998.596896 Avg Reward: 885.346278saving best model....\n", | |
"\n", | |
"Total T: 594005 Episode Num: 1340 Reward: 1076.602315 Avg Reward: 886.457646saving best model....\n", | |
"\n", | |
"Total T: 595005 Episode Num: 1341 Reward: 1077.224323 Avg Reward: 889.750229saving best model....\n", | |
"\n", | |
"Total T: 596005 Episode Num: 1342 Reward: 1169.986189 Avg Reward: 895.944625saving best model....\n", | |
"\n", | |
"Total T: 597005 Episode Num: 1343 Reward: 846.093629 Avg Reward: 898.442255saving best model....\n", | |
"\n", | |
"Total T: 598005 Episode Num: 1344 Reward: 912.140225 Avg Reward: 900.870517saving best model....\n", | |
"\n", | |
"Total T: 599005 Episode Num: 1345 Reward: 998.977418 Avg Reward: 903.435651saving best model....\n", | |
"\n", | |
"Total T: 600005 Episode Num: 1346 Reward: 957.810337 Avg Reward: 906.599519saving best model....\n", | |
"\n", | |
"Total T: 601005 Episode Num: 1347 Reward: 1045.452156 Avg Reward: 910.834408saving best model....\n", | |
"\n", | |
"Total T: 602005 Episode Num: 1348 Reward: 1006.705847 Avg Reward: 912.641678saving best model....\n", | |
"\n", | |
"Total T: 603005 Episode Num: 1349 Reward: 1088.235410 Avg Reward: 914.639132saving best model....\n", | |
"\n", | |
"Total T: 604005 Episode Num: 1350 Reward: 1133.837508 Avg Reward: 916.304814saving best model....\n", | |
"\n", | |
"Total T: 605005 Episode Num: 1351 Reward: 1101.319154 Avg Reward: 918.642305saving best model....\n", | |
"\n", | |
"Total T: 606005 Episode Num: 1352 Reward: 1164.174945 Avg Reward: 921.339311saving best model....\n", | |
"\n", | |
"Total T: 607005 Episode Num: 1353 Reward: 1236.755023 Avg Reward: 925.942720saving best model....\n", | |
"\n", | |
"Total T: 608005 Episode Num: 1354 Reward: 1164.652817 Avg Reward: 929.599665saving best model....\n", | |
"\n", | |
"Total T: 609005 Episode Num: 1355 Reward: 1165.684755 Avg Reward: 933.718925saving best model....\n", | |
"\n", | |
"Total T: 610005 Episode Num: 1356 Reward: 1054.454487 Avg Reward: 936.074275saving best model....\n", | |
"\n", | |
"Total T: 611005 Episode Num: 1357 Reward: 1115.596398 Avg Reward: 938.943349saving best model....\n", | |
"\n", | |
"Total T: 612005 Episode Num: 1358 Reward: 1090.608486 Avg Reward: 942.327398saving best model....\n", | |
"\n", | |
"Total T: 613005 Episode Num: 1359 Reward: 1070.331658 Avg Reward: 944.879060saving best model....\n", | |
"\n", | |
"Total T: 614005 Episode Num: 1360 Reward: 1142.584467 Avg Reward: 947.781905saving best model....\n", | |
"\n", | |
"Total T: 615005 Episode Num: 1361 Reward: 1187.418647 Avg Reward: 951.459300saving best model....\n", | |
"\n", | |
"Total T: 616005 Episode Num: 1362 Reward: 918.100151 Avg Reward: 953.086016saving best model....\n", | |
"\n", | |
"Total T: 617005 Episode Num: 1363 Reward: 1222.887686 Avg Reward: 958.311620saving best model....\n", | |
"\n", | |
"Total T: 618005 Episode Num: 1364 Reward: 1011.881800 Avg Reward: 960.412634saving best model....\n", | |
"\n", | |
"Total T: 619005 Episode Num: 1365 Reward: 1050.040623 Avg Reward: 961.651283saving best model....\n", | |
"\n", | |
"Total T: 620005 Episode Num: 1366 Reward: 989.660661 Avg Reward: 963.169126saving best model....\n", | |
"\n", | |
"Total T: 621005 Episode Num: 1367 Reward: 1131.981404 Avg Reward: 966.214374saving best model....\n", | |
"\n", | |
"Total T: 622005 Episode Num: 1368 Reward: 1058.738071 Avg Reward: 967.165288saving best model....\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Total T: 623005 Episode Num: 1369 Reward: 1187.826921 Avg Reward: 969.779982saving best model....\n", | |
"\n", | |
"Total T: 624005 Episode Num: 1370 Reward: 1163.409350 Avg Reward: 973.381144saving best model....\n", | |
"\n", | |
"Total T: 625005 Episode Num: 1371 Reward: 918.428779 Avg Reward: 975.207919saving best model....\n", | |
"\n", | |
"Total T: 626005 Episode Num: 1372 Reward: 1244.181364 Avg Reward: 980.535153saving best model....\n", | |
"\n", | |
"Total T: 627005 Episode Num: 1373 Reward: 1105.513651 Avg Reward: 985.010907saving best model....\n", | |
"\n", | |
"Total T: 628005 Episode Num: 1374 Reward: 1131.012411 Avg Reward: 988.240308saving best model....\n", | |
"\n", | |
"Total T: 728909 Episode Num: 1484 Reward: 1428.454555 Avg Reward: 981.490534saving best model....\n", | |
"\n", | |
"Total T: 729909 Episode Num: 1485 Reward: 1434.856087 Avg Reward: 993.059802saving best model....\n", | |
"\n", | |
"Total T: 730909 Episode Num: 1486 Reward: 1137.569908 Avg Reward: 999.046120saving best model....\n", | |
"\n", | |
"Total T: 731909 Episode Num: 1487 Reward: 1443.342884 Avg Reward: 1009.637547saving best model....\n", | |
"\n", | |
"Total T: 732909 Episode Num: 1488 Reward: 954.222501 Avg Reward: 1014.775019saving best model....\n", | |
"\n", | |
"Total T: 733909 Episode Num: 1489 Reward: 1313.226783 Avg Reward: 1025.283603saving best model....\n", | |
"\n", | |
"Total T: 734909 Episode Num: 1490 Reward: 1519.503919 Avg Reward: 1040.227172saving best model....\n", | |
"\n", | |
"Total T: 736114 Episode Num: 1492 Reward: 254.569992 Avg Reward: 1049.2804163saving best model....\n", | |
"\n", | |
"Total T: 737114 Episode Num: 1493 Reward: 1504.772089 Avg Reward: 1060.317251saving best model....\n", | |
"\n", | |
"Total T: 738114 Episode Num: 1494 Reward: 1394.658359 Avg Reward: 1067.194087saving best model....\n", | |
"\n", | |
"Total T: 739114 Episode Num: 1495 Reward: 1463.194232 Avg Reward: 1074.199219saving best model....\n", | |
"\n", | |
"Total T: 740114 Episode Num: 1496 Reward: 1400.722019 Avg Reward: 1080.502140saving best model....\n", | |
"\n", | |
"Total T: 740954 Episode Num: 1497 Reward: 1190.432086 Avg Reward: 1084.683157saving best model....\n", | |
"\n", | |
"Total T: 743257 Episode Num: 1501 Reward: 1200.082435 Avg Reward: 1089.859694saving best model....\n", | |
"\n", | |
"Total T: 744257 Episode Num: 1502 Reward: 1458.376261 Avg Reward: 1100.004892saving best model....\n", | |
"\n", | |
"Total T: 745257 Episode Num: 1503 Reward: 1439.925858 Avg Reward: 1106.625293saving best model....\n", | |
"\n", | |
"Total T: 746257 Episode Num: 1504 Reward: 1556.926879 Avg Reward: 1115.691109saving best model....\n", | |
"\n", | |
"Total T: 747257 Episode Num: 1505 Reward: 1439.282788 Avg Reward: 1122.832899saving best model....\n", | |
"\n", | |
"Total T: 748257 Episode Num: 1506 Reward: 1369.395746 Avg Reward: 1129.501767saving best model....\n", | |
"\n", | |
"Total T: 749257 Episode Num: 1507 Reward: 1254.555832 Avg Reward: 1135.847085saving best model....\n", | |
"\n", | |
"Total T: 750257 Episode Num: 1508 Reward: 1373.900762 Avg Reward: 1139.422490saving best model....\n", | |
"\n", | |
"Total T: 751257 Episode Num: 1509 Reward: 1560.255871 Avg Reward: 1145.043701saving best model....\n", | |
"\n", | |
"Total T: 752257 Episode Num: 1510 Reward: 1385.998813 Avg Reward: 1149.252273saving best model....\n", | |
"\n", | |
"Total T: 767711 Episode Num: 1529 Reward: 1433.754504 Avg Reward: 1148.444785saving best model....\n", | |
"\n", | |
"Total T: 768711 Episode Num: 1530 Reward: 1528.218003 Avg Reward: 1157.587046saving best model....\n", | |
"\n", | |
"Total T: 776269 Episode Num: 1539 Reward: 1375.600956 Avg Reward: 1161.774089saving best model....\n", | |
"\n", | |
"Total T: 777269 Episode Num: 1540 Reward: 1567.416917 Avg Reward: 1164.238443saving best model....\n", | |
"\n", | |
"Total T: 778269 Episode Num: 1541 Reward: 1336.661864 Avg Reward: 1167.435113saving best model....\n", | |
"\n", | |
"Total T: 778464 Episode Num: 1542 Reward: 211.609520 Avg Reward: 1168.458280saving best model....\n", | |
"\n", | |
"Total T: 779464 Episode Num: 1543 Reward: 1462.497009 Avg Reward: 1171.463078saving best model....\n", | |
"\n", | |
"Total T: 780464 Episode Num: 1544 Reward: 1463.339541 Avg Reward: 1173.436844saving best model....\n", | |
"\n", | |
"Total T: 781464 Episode Num: 1545 Reward: 1401.733391 Avg Reward: 1176.629315saving best model....\n", | |
"\n", | |
"Total T: 782464 Episode Num: 1546 Reward: 1476.251947 Avg Reward: 1178.635507saving best model....\n", | |
"\n", | |
"Total T: 786505 Episode Num: 1551 Reward: 1464.648327 Avg Reward: 1177.506992saving best model....\n", | |
"\n", | |
"Total T: 787505 Episode Num: 1552 Reward: 1532.297859 Avg Reward: 1181.621506saving best model....\n", | |
"\n", | |
"Total T: 788505 Episode Num: 1553 Reward: 1538.141153 Avg Reward: 1185.963275saving best model....\n", | |
"\n", | |
"Total T: 802213 Episode Num: 1571 Reward: 1580.411465 Avg Reward: 1185.835794saving best model....\n", | |
"\n", | |
"Total T: 807828 Episode Num: 1578 Reward: 1593.818597 Avg Reward: 1187.904938saving best model....\n", | |
"\n", | |
"Total T: 815024 Episode Num: 1587 Reward: 1567.160422 Avg Reward: 1193.069525saving best model....\n", | |
"\n", | |
"Total T: 816024 Episode Num: 1588 Reward: 1529.357766 Avg Reward: 1198.820877saving best model....\n", | |
"\n", | |
"Total T: 817024 Episode Num: 1589 Reward: 1649.595876 Avg Reward: 1202.184568saving best model....\n", | |
"\n", | |
"Total T: 818024 Episode Num: 1590 Reward: 1709.176049 Avg Reward: 1204.081290saving best model....\n", | |
"\n", | |
"Total T: 819024 Episode Num: 1591 Reward: 1634.154944 Avg Reward: 1207.236369saving best model....\n", | |
"\n", | |
"Total T: 821024 Episode Num: 1593 Reward: 1452.935746 Avg Reward: 1218.526549saving best model....\n", | |
"\n", | |
"Total T: 822024 Episode Num: 1594 Reward: 1628.610742 Avg Reward: 1220.866073saving best model....\n", | |
"\n", | |
"Total T: 823024 Episode Num: 1595 Reward: 1566.626819 Avg Reward: 1221.900399saving best model....\n", | |
"\n", | |
"Total T: 824024 Episode Num: 1596 Reward: 1626.049313 Avg Reward: 1224.153672saving best model....\n", | |
"\n", | |
"Total T: 825334 Episode Num: 1599 Reward: 69.270429 Avg Reward: 1215.29225912saving best model....\n", | |
"\n", | |
"Total T: 827078 Episode Num: 1601 Reward: 1099.612406 Avg Reward: 1228.749391saving best model....\n", | |
"\n", | |
"Total T: 828078 Episode Num: 1602 Reward: 1607.307436 Avg Reward: 1230.238703saving best model....\n", | |
"\n", | |
"Total T: 829078 Episode Num: 1603 Reward: 1683.023873 Avg Reward: 1232.669683saving best model....\n", | |
"\n", | |
"Total T: 830078 Episode Num: 1604 Reward: 1598.905086 Avg Reward: 1233.089465saving best model....\n", | |
"\n", | |
"Total T: 831078 Episode Num: 1605 Reward: 1740.011000 Avg Reward: 1236.096747saving best model....\n", | |
"\n", | |
"Total T: 832078 Episode Num: 1606 Reward: 1659.521818 Avg Reward: 1238.998008saving best model....\n", | |
"\n", | |
"Total T: 833078 Episode Num: 1607 Reward: 1617.895272 Avg Reward: 1242.631403saving best model....\n", | |
"\n", | |
"Total T: 834078 Episode Num: 1608 Reward: 1885.542293 Avg Reward: 1247.747818saving best model....\n", | |
"\n", | |
"Total T: 836172 Episode Num: 1611 Reward: 1746.152163 Avg Reward: 1242.919017saving best model....\n", | |
"\n", | |
"Total T: 837172 Episode Num: 1612 Reward: 1843.710465 Avg Reward: 1260.239522saving best model....\n", | |
"\n", | |
"Total T: 838172 Episode Num: 1613 Reward: 1662.295215 Avg Reward: 1271.448373saving best model....\n", | |
"\n", | |
"Total T: 839172 Episode Num: 1614 Reward: 1715.570324 Avg Reward: 1287.682170saving best model....\n", | |
"\n", | |
"Total T: 840172 Episode Num: 1615 Reward: 1730.398079 Avg Reward: 1291.073057saving best model....\n", | |
"\n", | |
"Total T: 843362 Episode Num: 1619 Reward: 1710.500317 Avg Reward: 1297.436787saving best model....\n", | |
"\n", | |
"Total T: 844362 Episode Num: 1620 Reward: 1929.621518 Avg Reward: 1302.610391saving best model....\n", | |
"\n", | |
"Total T: 846703 Episode Num: 1624 Reward: 1874.738517 Avg Reward: 1301.969080saving best model....\n", | |
"\n", | |
"Total T: 847703 Episode Num: 1625 Reward: 1612.327840 Avg Reward: 1307.380700saving best model....\n", | |
"\n", | |
"Total T: 943968 Episode Num: 1785 Reward: 1827.621067 Avg Reward: 1300.053038saving best model....\n", | |
"\n", | |
"Total T: 944968 Episode Num: 1786 Reward: 1925.877309 Avg Reward: 1319.447488saving best model....\n", | |
"\n", | |
"Total T: 945968 Episode Num: 1787 Reward: 1815.236786 Avg Reward: 1337.795314saving best model....\n", | |
"\n", | |
"Total T: 946968 Episode Num: 1788 Reward: 1880.512984 Avg Reward: 1356.805697saving best model....\n", | |
"\n", | |
"Total T: 947968 Episode Num: 1789 Reward: 1536.977529 Avg Reward: 1375.307590saving best model....\n", | |
"\n", | |
"Total T: 948968 Episode Num: 1790 Reward: 1931.554997 Avg Reward: 1394.704101saving best model....\n", | |
"\n", | |
"Total T: 949968 Episode Num: 1791 Reward: 1788.989560 Avg Reward: 1412.678762saving best model....\n", | |
"\n", | |
"Total T: 950968 Episode Num: 1792 Reward: 1867.520271 Avg Reward: 1428.788225saving best model....\n", | |
"\n", | |
"Total T: 951968 Episode Num: 1793 Reward: 1892.033568 Avg Reward: 1447.770016saving best model....\n", | |
"\n", | |
"Total T: 952968 Episode Num: 1794 Reward: 1746.062162 Avg Reward: 1460.364522saving best model....\n", | |
"\n", | |
"Total T: 953968 Episode Num: 1795 Reward: 1936.241493 Avg Reward: 1473.498867saving best model....\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Total T: 955102 Episode Num: 1797 Reward: 133.835966 Avg Reward: 1482.105524saving best model....\n", | |
"\n", | |
"Total T: 956102 Episode Num: 1798 Reward: 2013.254597 Avg Reward: 1496.234026saving best model....\n", | |
"\n", | |
"Total T: 957102 Episode Num: 1799 Reward: 1892.994992 Avg Reward: 1511.288989saving best model....\n", | |
"\n", | |
"Total T: 958102 Episode Num: 1800 Reward: 1969.281886 Avg Reward: 1523.843636saving best model....\n", | |
"\n", | |
"Total T: 959102 Episode Num: 1801 Reward: 1950.789872 Avg Reward: 1536.192049saving best model....\n", | |
"\n", | |
"Total T: 960102 Episode Num: 1802 Reward: 1842.398237 Avg Reward: 1552.870527saving best model....\n", | |
"\n", | |
"Total T: 961102 Episode Num: 1803 Reward: 2010.537885 Avg Reward: 1564.873240saving best model....\n", | |
"\n", | |
"Total T: 962102 Episode Num: 1804 Reward: 2031.673085 Avg Reward: 1577.184046saving best model....\n", | |
"\n", | |
"Total T: 963102 Episode Num: 1805 Reward: 1479.962049 Avg Reward: 1584.201529saving best model....\n", | |
"\n", | |
"Total T: 964102 Episode Num: 1806 Reward: 1903.268789 Avg Reward: 1591.911864saving best model....\n", | |
"\n", | |
"Total T: 967929 Episode Num: 1812 Reward: 1783.964173 Avg Reward: 1598.642516saving best model....\n", | |
"\n", | |
"Total T: 968929 Episode Num: 1813 Reward: 2036.052431 Avg Reward: 1607.942555saving best model....\n", | |
"\n", | |
"Total T: 969929 Episode Num: 1814 Reward: 1525.355240 Avg Reward: 1613.799337saving best model....\n", | |
"\n", | |
"Total T: 970929 Episode Num: 1815 Reward: 1645.239215 Avg Reward: 1615.935759saving best model....\n", | |
"\n", | |
"Total T: 971929 Episode Num: 1816 Reward: 2015.619890 Avg Reward: 1619.912874saving best model....\n", | |
"\n", | |
"Total T: 972929 Episode Num: 1817 Reward: 1873.721952 Avg Reward: 1623.680393saving best model....\n", | |
"\n", | |
"Total T: 975301 Episode Num: 1820 Reward: 1927.221463 Avg Reward: 1618.036414saving best model....\n", | |
"\n", | |
"Total T: 976301 Episode Num: 1821 Reward: 1856.994684 Avg Reward: 1626.626127saving best model....\n", | |
"\n", | |
"Total T: 977301 Episode Num: 1822 Reward: 1872.479950 Avg Reward: 1642.328959saving best model....\n", | |
"\n", | |
"Total T: 978301 Episode Num: 1823 Reward: 1985.637439 Avg Reward: 1660.829056saving best model....\n", | |
"\n", | |
"Total T: 979301 Episode Num: 1824 Reward: 1882.712502 Avg Reward: 1664.755714saving best model....\n", | |
"\n", | |
"Total T: 980071 Episode Num: 1825 Reward: 1528.557868 Avg Reward: 1675.992482saving best model....\n", | |
"\n", | |
"Total T: 981071 Episode Num: 1826 Reward: 1950.513837 Avg Reward: 1684.273888saving best model....\n", | |
"\n", | |
"Total T: 982071 Episode Num: 1827 Reward: 1928.851329 Avg Reward: 1686.083388saving best model....\n", | |
"\n", | |
"Total T: 983071 Episode Num: 1828 Reward: 1884.049862 Avg Reward: 1687.431929saving best model....\n", | |
"\n", | |
"Total T: 984071 Episode Num: 1829 Reward: 1917.209568 Avg Reward: 1690.729429saving best model....\n", | |
"\n", | |
"Total T: 985071 Episode Num: 1830 Reward: 1908.401949 Avg Reward: 1694.573996saving best model....\n", | |
"\n", | |
"Total T: 986071 Episode Num: 1831 Reward: 2018.054154 Avg Reward: 1700.164403saving best model....\n", | |
"\n", | |
"Total T: 988071 Episode Num: 1833 Reward: 1716.781755 Avg Reward: 1700.977108saving best model....\n", | |
"\n", | |
"Total T: 989071 Episode Num: 1834 Reward: 1735.771069 Avg Reward: 1703.870580saving best model....\n", | |
"\n", | |
"Total T: 990071 Episode Num: 1835 Reward: 1937.821126 Avg Reward: 1706.492930saving best model....\n", | |
"\n", | |
"Total T: 1008126 Episode Num: 1856 Reward: 162.335238 Avg Reward: 1697.9141416saving best model....\n", | |
"\n", | |
"Total T: 1010045 Episode Num: 1858 Reward: 1683.987101 Avg Reward: 1713.688646saving best model....\n", | |
"\n", | |
"Total T: 1011045 Episode Num: 1859 Reward: 1899.892905 Avg Reward: 1715.825510saving best model....\n", | |
"\n", | |
"Total T: 1012045 Episode Num: 1860 Reward: 1927.730235 Avg Reward: 1716.912025saving best model....\n", | |
"\n", | |
"Total T: 1046356 Episode Num: 1896 Reward: 2056.256715 Avg Reward: 1701.355758saving best model....\n", | |
"\n", | |
"Total T: 1103394 Episode Num: 1961 Reward: 2142.402567 Avg Reward: 1709.062279saving best model....\n", | |
"\n", | |
"Total T: 1104100 Episode Num: 1962 Reward: 1423.517726 Avg Reward: 1720.439349saving best model....\n", | |
"\n", | |
"Total T: 1117663 Episode Num: 1976 Reward: 2052.184967 Avg Reward: 1720.660141saving best model....\n", | |
"\n", | |
"Total T: 1120663 Episode Num: 1979 Reward: 2000.589526 Avg Reward: 1721.762381saving best model....\n", | |
"\n", | |
"Total T: 1121663 Episode Num: 1980 Reward: 2116.956713 Avg Reward: 1724.367843saving best model....\n", | |
"\n", | |
"Total T: 1122663 Episode Num: 1981 Reward: 2185.936063 Avg Reward: 1728.441790saving best model....\n", | |
"\n", | |
"Total T: 1123663 Episode Num: 1982 Reward: 2116.577776 Avg Reward: 1731.218534saving best model....\n", | |
"\n", | |
"Total T: 1124663 Episode Num: 1983 Reward: 2217.289103 Avg Reward: 1735.182137saving best model....\n", | |
"\n", | |
"Total T: 1125663 Episode Num: 1984 Reward: 2212.460464 Avg Reward: 1737.764281saving best model....\n", | |
"\n", | |
"Total T: 1126663 Episode Num: 1985 Reward: 2094.186390 Avg Reward: 1741.068408saving best model....\n", | |
"\n", | |
"Total T: 1131763 Episode Num: 1991 Reward: 719.274058 Avg Reward: 1730.2249504saving best model....\n", | |
"\n", | |
"Total T: 1132763 Episode Num: 1992 Reward: 2059.926626 Avg Reward: 1749.475737saving best model....\n", | |
"\n", | |
"Total T: 1134763 Episode Num: 1994 Reward: 2005.944271 Avg Reward: 1752.050513saving best model....\n", | |
"\n", | |
"Total T: 1135763 Episode Num: 1995 Reward: 2163.437762 Avg Reward: 1755.731738saving best model....\n", | |
"\n", | |
"Total T: 1136763 Episode Num: 1996 Reward: 2133.015577 Avg Reward: 1756.499327saving best model....\n", | |
"\n", | |
"Total T: 1137763 Episode Num: 1997 Reward: 2098.084008 Avg Reward: 1757.933058saving best model....\n", | |
"\n", | |
"Total T: 1138763 Episode Num: 1998 Reward: 2184.859372 Avg Reward: 1776.603584saving best model....\n", | |
"\n", | |
"Total T: 1139763 Episode Num: 1999 Reward: 2159.819747 Avg Reward: 1776.975249saving best model....\n", | |
"\n", | |
"Total T: 1140763 Episode Num: 2000 Reward: 2084.308219 Avg Reward: 1777.999119saving best model....\n", | |
"\n", | |
"Total T: 1141763 Episode Num: 2001 Reward: 2032.529747 Avg Reward: 1778.170707saving best model....\n", | |
"\n", | |
"Total T: 1142763 Episode Num: 2002 Reward: 1903.058610 Avg Reward: 1797.093604saving best model....\n", | |
"\n", | |
"Total T: 1143763 Episode Num: 2003 Reward: 1981.396048 Avg Reward: 1816.749969saving best model....\n", | |
"\n", | |
"Total T: 1144763 Episode Num: 2004 Reward: 2085.454800 Avg Reward: 1818.319394saving best model....\n", | |
"\n", | |
"Total T: 1146763 Episode Num: 2006 Reward: 937.491220 Avg Reward: 1823.3627625saving best model....\n", | |
"\n", | |
"Total T: 1147763 Episode Num: 2007 Reward: 1985.179108 Avg Reward: 1824.447361saving best model....\n", | |
"\n", | |
"Total T: 1148763 Episode Num: 2008 Reward: 1904.669541 Avg Reward: 1826.739413saving best model....\n", | |
"\n", | |
"Total T: 1149763 Episode Num: 2009 Reward: 2078.590195 Avg Reward: 1847.364731saving best model....\n", | |
"\n", | |
"Total T: 1151763 Episode Num: 2011 Reward: 1824.806017 Avg Reward: 1867.787146saving best model....\n", | |
"\n", | |
"Total T: 1152763 Episode Num: 2012 Reward: 2138.167960 Avg Reward: 1871.408806saving best model....\n", | |
"\n", | |
"Total T: 1153763 Episode Num: 2013 Reward: 1969.463616 Avg Reward: 1872.331094saving best model....\n", | |
"\n", | |
"Total T: 1154763 Episode Num: 2014 Reward: 2154.504187 Avg Reward: 1874.285120saving best model....\n", | |
"\n", | |
"Total T: 1155763 Episode Num: 2015 Reward: 2160.355579 Avg Reward: 1876.028189saving best model....\n", | |
"\n", | |
"Total T: 1156763 Episode Num: 2016 Reward: 1951.743862 Avg Reward: 1877.050552saving best model....\n", | |
"\n", | |
"Total T: 1158763 Episode Num: 2018 Reward: 2044.493289 Avg Reward: 1878.960390saving best model....\n", | |
"\n", | |
"Total T: 1159763 Episode Num: 2019 Reward: 2034.834059 Avg Reward: 1880.888255saving best model....\n", | |
"\n", | |
"Total T: 1160763 Episode Num: 2020 Reward: 2092.946362 Avg Reward: 1901.671013saving best model....\n", | |
"\n", | |
"Total T: 1165320 Episode Num: 2025 Reward: 2005.365287 Avg Reward: 1918.611286saving best model....\n", | |
"\n", | |
"Total T: 1166320 Episode Num: 2026 Reward: 1909.152823 Avg Reward: 1926.859454saving best model....\n", | |
"\n", | |
"Total T: 1182478 Episode Num: 2043 Reward: 2097.861132 Avg Reward: 1929.429959saving best model....\n", | |
"\n", | |
"Total T: 1183478 Episode Num: 2044 Reward: 2255.397897 Avg Reward: 1933.135084saving best model....\n", | |
"\n", | |
"Total T: 1184478 Episode Num: 2045 Reward: 2287.414460 Avg Reward: 1938.861168saving best model....\n", | |
"\n", | |
"Total T: 1185478 Episode Num: 2046 Reward: 1817.681244 Avg Reward: 1943.034470saving best model....\n", | |
"\n", | |
"Total T: 1278653 Episode Num: 2147 Reward: 2095.636704 Avg Reward: 1940.963032saving best model....\n", | |
"\n", | |
"Total T: 1280653 Episode Num: 2149 Reward: 1993.272350 Avg Reward: 1952.655975saving best model....\n", | |
"\n", | |
"Total T: 1281653 Episode Num: 2150 Reward: 2211.595190 Avg Reward: 1955.909722saving best model....\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Total T: 1282653 Episode Num: 2151 Reward: 2278.623270 Avg Reward: 1955.972979saving best model....\n", | |
"\n", | |
"Total T: 1283653 Episode Num: 2152 Reward: 2296.058155 Avg Reward: 1958.867957saving best model....\n", | |
"\n", | |
"Total T: 1285653 Episode Num: 2154 Reward: 2047.697984 Avg Reward: 1959.030327saving best model....\n", | |
"\n", | |
"Total T: 1286653 Episode Num: 2155 Reward: 2173.697953 Avg Reward: 1962.093588saving best model....\n", | |
"\n", | |
"Total T: 1287653 Episode Num: 2156 Reward: 2275.344447 Avg Reward: 1965.456756saving best model....\n", | |
"\n", | |
"Total T: 1288653 Episode Num: 2157 Reward: 2041.508042 Avg Reward: 1976.215511saving best model....\n", | |
"\n", | |
"Total T: 1289653 Episode Num: 2158 Reward: 2285.170873 Avg Reward: 1992.755884saving best model....\n", | |
"\n", | |
"Total T: 1290653 Episode Num: 2159 Reward: 2247.382220 Avg Reward: 1992.975531saving best model....\n", | |
"\n", | |
"Total T: 1302653 Episode Num: 2171 Reward: 2123.387622 Avg Reward: 2009.603248saving best model....\n", | |
"\n", | |
"Total T: 1303653 Episode Num: 2172 Reward: 2033.092230 Avg Reward: 2010.851613saving best model....\n", | |
"\n", | |
"Total T: 1306653 Episode Num: 2175 Reward: 2108.636220 Avg Reward: 2014.844603saving best model....\n", | |
"\n", | |
"Total T: 1307847 Episode Num: 2177 Reward: 359.314961 Avg Reward: 1997.6389674saving best model....\n", | |
"\n", | |
"Total T: 1308847 Episode Num: 2178 Reward: 2090.023908 Avg Reward: 2017.604592saving best model....\n", | |
"\n", | |
"Total T: 1309847 Episode Num: 2179 Reward: 2325.507923 Avg Reward: 2022.155436saving best model....\n", | |
"\n", | |
"Total T: 1322152 Episode Num: 2192 Reward: 2207.796535 Avg Reward: 2020.090943saving best model....\n", | |
"\n", | |
"Total T: 1324152 Episode Num: 2194 Reward: 2077.735638 Avg Reward: 2042.514348saving best model....\n", | |
"\n", | |
"Total T: 1326152 Episode Num: 2196 Reward: 2099.490457 Avg Reward: 2042.996111saving best model....\n", | |
"\n", | |
"Total T: 1327152 Episode Num: 2197 Reward: 2300.156792 Avg Reward: 2065.613890saving best model....\n", | |
"\n", | |
"Total T: 1329152 Episode Num: 2199 Reward: 2208.152247 Avg Reward: 2066.353948saving best model....\n", | |
"\n", | |
"Total T: 1330152 Episode Num: 2200 Reward: 2274.157704 Avg Reward: 2069.350461saving best model....\n", | |
"\n", | |
"Total T: 1331152 Episode Num: 2201 Reward: 2294.586438 Avg Reward: 2071.562157saving best model....\n", | |
"\n", | |
"Total T: 1334152 Episode Num: 2204 Reward: 2239.825934 Avg Reward: 2071.062463saving best model....\n", | |
"\n", | |
"Total T: 1335152 Episode Num: 2205 Reward: 2068.378640 Avg Reward: 2091.693624saving best model....\n", | |
"\n", | |
"Total T: 1336152 Episode Num: 2206 Reward: 2319.505288 Avg Reward: 2092.168280saving best model....\n", | |
"\n", | |
"Total T: 1338152 Episode Num: 2208 Reward: 2168.682514 Avg Reward: 2092.876083saving best model....\n", | |
"\n", | |
"Total T: 1339152 Episode Num: 2209 Reward: 2257.336493 Avg Reward: 2093.785281saving best model....\n", | |
"\n", | |
"Total T: 1340152 Episode Num: 2210 Reward: 2244.577420 Avg Reward: 2094.723440saving best model....\n", | |
"\n", | |
"Total T: 1350933 Episode Num: 2221 Reward: 2414.021788 Avg Reward: 2092.064168saving best model....\n", | |
"\n", | |
"Total T: 1397224 Episode Num: 2269 Reward: 2211.787399 Avg Reward: 2108.363821saving best model....\n", | |
"\n", | |
"Total T: 1398224 Episode Num: 2270 Reward: 2200.814384 Avg Reward: 2110.563665saving best model....\n", | |
"\n", | |
"Total T: 1399224 Episode Num: 2271 Reward: 2141.530119 Avg Reward: 2110.745090saving best model....\n", | |
"\n", | |
"Total T: 1400224 Episode Num: 2272 Reward: 2335.074237 Avg Reward: 2113.764910saving best model....\n", | |
"\n", | |
"Total T: 1401224 Episode Num: 2273 Reward: 2378.968279 Avg Reward: 2117.731247saving best model....\n", | |
"\n", | |
"Total T: 1402224 Episode Num: 2274 Reward: 2213.004554 Avg Reward: 2121.295618saving best model....\n", | |
"\n", | |
"Total T: 1403224 Episode Num: 2275 Reward: 2209.527284 Avg Reward: 2122.304529saving best model....\n", | |
"\n", | |
"Total T: 1404224 Episode Num: 2276 Reward: 2340.998001 Avg Reward: 2124.045122saving best model....\n", | |
"\n", | |
"Total T: 1405224 Episode Num: 2277 Reward: 2243.985206 Avg Reward: 2142.891825saving best model....\n", | |
"\n", | |
"Total T: 1408224 Episode Num: 2280 Reward: 2272.415727 Avg Reward: 2143.057264saving best model....\n", | |
"\n", | |
"Total T: 1411224 Episode Num: 2283 Reward: 2249.070127 Avg Reward: 2156.231997saving best model....\n", | |
"\n", | |
"Total T: 1412224 Episode Num: 2284 Reward: 2227.564290 Avg Reward: 2157.487644saving best model....\n", | |
"\n", | |
"Total T: 1413224 Episode Num: 2285 Reward: 2336.607954 Avg Reward: 2159.781280saving best model....\n", | |
"\n", | |
"Total T: 1415224 Episode Num: 2287 Reward: 2169.868690 Avg Reward: 2161.012190saving best model....\n", | |
"\n", | |
"Total T: 1418224 Episode Num: 2290 Reward: 2234.837062 Avg Reward: 2161.669349saving best model....\n", | |
"\n", | |
"Total T: 1421224 Episode Num: 2293 Reward: 2197.163960 Avg Reward: 2161.569966saving best model....\n", | |
"\n", | |
"Total T: 1422224 Episode Num: 2294 Reward: 2303.901425 Avg Reward: 2163.831624saving best model....\n", | |
"\n", | |
"Total T: 1423224 Episode Num: 2295 Reward: 2392.457169 Avg Reward: 2164.828817saving best model....\n", | |
"\n", | |
"Total T: 1425224 Episode Num: 2297 Reward: 2252.686888 Avg Reward: 2166.499965saving best model....\n", | |
"\n", | |
"Total T: 1426224 Episode Num: 2298 Reward: 2310.881405 Avg Reward: 2168.036706saving best model....\n", | |
"\n", | |
"Total T: 1430224 Episode Num: 2302 Reward: 2116.694561 Avg Reward: 2164.771133saving best model....\n", | |
"\n", | |
"Total T: 1440224 Episode Num: 2312 Reward: 2278.558496 Avg Reward: 2166.895669saving best model....\n", | |
"\n", | |
"Total T: 1443224 Episode Num: 2315 Reward: 2254.536775 Avg Reward: 2168.831017saving best model....\n", | |
"\n", | |
"Total T: 1444224 Episode Num: 2316 Reward: 2188.512419 Avg Reward: 2175.198731saving best model....\n", | |
"\n", | |
"Total T: 1475526 Episode Num: 2348 Reward: 2135.145530 Avg Reward: 2168.894627saving best model....\n", | |
"\n", | |
"Total T: 1476526 Episode Num: 2349 Reward: 2216.933613 Avg Reward: 2187.327830saving best model....\n", | |
"\n", | |
"Total T: 1478526 Episode Num: 2351 Reward: 2248.355618 Avg Reward: 2187.890770saving best model....\n", | |
"\n", | |
"Total T: 1544526 Episode Num: 2417 Reward: 2195.301625 Avg Reward: 2179.272170saving best model....\n", | |
"\n", | |
"Total T: 1587526 Episode Num: 2460 Reward: 2193.923542 Avg Reward: 2196.188334saving best model....\n", | |
"\n", | |
"Total T: 1588526 Episode Num: 2461 Reward: 2358.079283 Avg Reward: 2199.106330saving best model....\n", | |
"\n", | |
"Total T: 1589526 Episode Num: 2462 Reward: 2178.848581 Avg Reward: 2200.069872saving best model....\n", | |
"\n", | |
"Total T: 1590526 Episode Num: 2463 Reward: 2230.661274 Avg Reward: 2200.488631saving best model....\n", | |
"\n", | |
"Total T: 1591526 Episode Num: 2464 Reward: 2388.481468 Avg Reward: 2203.955786saving best model....\n", | |
"\n", | |
"Total T: 1592526 Episode Num: 2465 Reward: 2330.158211 Avg Reward: 2206.252784saving best model....\n", | |
"\n", | |
"Total T: 1593526 Episode Num: 2466 Reward: 2160.839371 Avg Reward: 2206.343569saving best model....\n", | |
"\n", | |
"Total T: 1595526 Episode Num: 2468 Reward: 2111.398864 Avg Reward: 2207.923683saving best model....\n", | |
"\n", | |
"Total T: 1597526 Episode Num: 2470 Reward: 2264.217299 Avg Reward: 2208.946414saving best model....\n", | |
"\n", | |
"Total T: 1621526 Episode Num: 2494 Reward: 2209.593422 Avg Reward: 2208.974320saving best model....\n", | |
"\n", | |
"Total T: 1622526 Episode Num: 2495 Reward: 2299.560293 Avg Reward: 2210.572843saving best model....\n", | |
"\n", | |
"Total T: 1623526 Episode Num: 2496 Reward: 2296.624656 Avg Reward: 2211.139742saving best model....\n", | |
"\n", | |
"Total T: 1625526 Episode Num: 2498 Reward: 2161.657298 Avg Reward: 2211.158486saving best model....\n", | |
"\n", | |
"Total T: 1626526 Episode Num: 2499 Reward: 2285.763795 Avg Reward: 2213.280079saving best model....\n", | |
"\n", | |
"Total T: 1627526 Episode Num: 2500 Reward: 2226.273782 Avg Reward: 2213.505020saving best model....\n", | |
"\n", | |
"Total T: 1630526 Episode Num: 2503 Reward: 2265.035197 Avg Reward: 2214.648240saving best model....\n", | |
"\n", | |
"Total T: 1653512 Episode Num: 2526 Reward: 2290.807955 Avg Reward: 2213.546957saving best model....\n", | |
"\n", | |
"Total T: 1656512 Episode Num: 2529 Reward: 2309.028109 Avg Reward: 2214.949771saving best model....\n", | |
"\n", | |
"Total T: 1657512 Episode Num: 2530 Reward: 2148.466407 Avg Reward: 2215.514334saving best model....\n", | |
"\n", | |
"Total T: 1663512 Episode Num: 2536 Reward: 2355.611254 Avg Reward: 2214.986010saving best model....\n", | |
"\n", | |
"Total T: 1668512 Episode Num: 2541 Reward: 2204.236205 Avg Reward: 2219.975941saving best model....\n", | |
"\n", | |
"Total T: 1669512 Episode Num: 2542 Reward: 2392.129118 Avg Reward: 2221.728860saving best model....\n", | |
"\n", | |
"Total T: 1672512 Episode Num: 2545 Reward: 2367.130317 Avg Reward: 2222.072612saving best model....\n", | |
"\n", | |
"Total T: 1673512 Episode Num: 2546 Reward: 2350.432181 Avg Reward: 2223.383427saving best model....\n", | |
"\n", | |
"Total T: 1676512 Episode Num: 2549 Reward: 2175.408641 Avg Reward: 2223.509407saving best model....\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Total T: 1677512 Episode Num: 2550 Reward: 2414.115895 Avg Reward: 2226.005899saving best model....\n", | |
"\n", | |
"Total T: 1678512 Episode Num: 2551 Reward: 2270.894867 Avg Reward: 2226.330132saving best model....\n", | |
"\n", | |
"Total T: 1783544 Episode Num: 2657 Reward: 2153.318392 Avg Reward: 2223.700933saving best model....\n", | |
"\n", | |
"Total T: 1784544 Episode Num: 2658 Reward: 2277.066450 Avg Reward: 2226.860937saving best model....\n", | |
"\n", | |
"Total T: 1785544 Episode Num: 2659 Reward: 2122.291268 Avg Reward: 2227.330161saving best model....\n", | |
"\n", | |
"Total T: 1788544 Episode Num: 2662 Reward: 2321.189522 Avg Reward: 2228.843355saving best model....\n", | |
"\n", | |
"Total T: 1800544 Episode Num: 2674 Reward: 2328.916106 Avg Reward: 2230.222128saving best model....\n", | |
"\n", | |
"Total T: 1801544 Episode Num: 2675 Reward: 2402.046611 Avg Reward: 2232.663871saving best model....\n", | |
"\n", | |
"Total T: 1819544 Episode Num: 2693 Reward: 2171.016039 Avg Reward: 2231.720215saving best model....\n", | |
"\n", | |
"Total T: 1821544 Episode Num: 2695 Reward: 2184.071621 Avg Reward: 2234.673290saving best model....\n", | |
"\n", | |
"Total T: 1827544 Episode Num: 2701 Reward: 2302.145385 Avg Reward: 2237.676467saving best model....\n", | |
"\n", | |
"Total T: 1829544 Episode Num: 2703 Reward: 2294.172838 Avg Reward: 2260.373372saving best model....\n", | |
"\n", | |
"Total T: 1830544 Episode Num: 2704 Reward: 2348.437319 Avg Reward: 2261.358574saving best model....\n", | |
"\n", | |
"Total T: 2025716 Episode Num: 2901 Reward: 2369.566094 Avg Reward: 2261.377187saving best model....\n", | |
"\n", | |
"Total T: 2026716 Episode Num: 2902 Reward: 2363.776229 Avg Reward: 2262.683288saving best model....\n", | |
"\n", | |
"Total T: 2027716 Episode Num: 2903 Reward: 2364.403532 Avg Reward: 2265.713936saving best model....\n", | |
"\n", | |
"Total T: 2028716 Episode Num: 2904 Reward: 2348.782221 Avg Reward: 2267.035678saving best model....\n", | |
"\n", | |
"Total T: 2029716 Episode Num: 2905 Reward: 2213.790423 Avg Reward: 2267.046273saving best model....\n", | |
"\n", | |
"Total T: 2030716 Episode Num: 2906 Reward: 2324.238609 Avg Reward: 2270.910208saving best model....\n", | |
"\n", | |
"Total T: 2031716 Episode Num: 2907 Reward: 2362.605007 Avg Reward: 2271.792372saving best model....\n", | |
"\n", | |
"Total T: 2032716 Episode Num: 2908 Reward: 2193.545262 Avg Reward: 2271.973718saving best model....\n", | |
"\n", | |
"Total T: 2033716 Episode Num: 2909 Reward: 2323.651273 Avg Reward: 2273.225299saving best model....\n", | |
"\n", | |
"Total T: 2039716 Episode Num: 2915 Reward: 2261.672338 Avg Reward: 2274.125151saving best model....\n", | |
"\n", | |
"Total T: 2041716 Episode Num: 2917 Reward: 2292.134488 Avg Reward: 2275.297822saving best model....\n", | |
"\n", | |
"Total T: 2042716 Episode Num: 2918 Reward: 2436.106488 Avg Reward: 2277.587426saving best model....\n", | |
"\n", | |
"Total T: 2047716 Episode Num: 2923 Reward: 2153.633101 Avg Reward: 2276.644183saving best model....\n", | |
"\n", | |
"Total T: 2048716 Episode Num: 2924 Reward: 2309.164043 Avg Reward: 2279.764625saving best model....\n", | |
"\n", | |
"Total T: 2055716 Episode Num: 2931 Reward: 2213.810087 Avg Reward: 2280.792951saving best model....\n", | |
"\n", | |
"Total T: 2056716 Episode Num: 2932 Reward: 2382.842279 Avg Reward: 2281.437055saving best model....\n", | |
"\n", | |
"Total T: 2057716 Episode Num: 2933 Reward: 2402.747680 Avg Reward: 2282.742137saving best model....\n", | |
"\n", | |
"Total T: 2058716 Episode Num: 2934 Reward: 2358.533544 Avg Reward: 2282.782894saving best model....\n", | |
"\n", | |
"Total T: 2082716 Episode Num: 2958 Reward: 2387.610250 Avg Reward: 2278.462805saving best model....\n", | |
"\n", | |
"Total T: 2083716 Episode Num: 2959 Reward: 2292.447475 Avg Reward: 2292.888698saving best model....\n", | |
"\n", | |
"Total T: 2084716 Episode Num: 2960 Reward: 2360.503764 Avg Reward: 2296.212227saving best model....\n", | |
"\n", | |
"Total T: 2095716 Episode Num: 2971 Reward: 2237.132254 Avg Reward: 2296.529476saving best model....\n", | |
"\n", | |
"Total T: 2096716 Episode Num: 2972 Reward: 2377.278959 Avg Reward: 2298.955639saving best model....\n", | |
"\n", | |
"Total T: 2098716 Episode Num: 2974 Reward: 2403.046292 Avg Reward: 2300.043672saving best model....\n", | |
"\n", | |
"Total T: 2099716 Episode Num: 2975 Reward: 2318.957888 Avg Reward: 2300.170343saving best model....\n", | |
"\n", | |
"Total T: 2100716 Episode Num: 2976 Reward: 2357.341800 Avg Reward: 2301.205319saving best model....\n", | |
"\n", | |
"Total T: 2101716 Episode Num: 2977 Reward: 2372.902845 Avg Reward: 2301.532729saving best model....\n", | |
"\n", | |
"Total T: 2104716 Episode Num: 2980 Reward: 2278.577205 Avg Reward: 2304.297573saving best model....\n", | |
"\n", | |
"Total T: 2105716 Episode Num: 2981 Reward: 2184.000451 Avg Reward: 2304.493286saving best model....\n", | |
"\n", | |
"Total T: 2147716 Episode Num: 3023 Reward: 2376.687748 Avg Reward: 2305.032192saving best model....\n", | |
"\n", | |
"Total T: 2148716 Episode Num: 3024 Reward: 2470.236271 Avg Reward: 2306.642914saving best model....\n", | |
"\n", | |
"Total T: 2149716 Episode Num: 3025 Reward: 2359.257020 Avg Reward: 2306.993727saving best model....\n", | |
"\n", | |
"Total T: 2150716 Episode Num: 3026 Reward: 2428.683904 Avg Reward: 2308.047295saving best model....\n", | |
"\n", | |
"Total T: 2154716 Episode Num: 3030 Reward: 2267.005528 Avg Reward: 2308.245183saving best model....\n", | |
"\n", | |
"Total T: 2187639 Episode Num: 3063 Reward: 2348.746874 Avg Reward: 2308.798742saving best model....\n", | |
"\n", | |
"Total T: 2188639 Episode Num: 3064 Reward: 2337.511754 Avg Reward: 2309.338858saving best model....\n", | |
"\n", | |
"Total T: 2189639 Episode Num: 3065 Reward: 2426.054401 Avg Reward: 2310.703234saving best model....\n", | |
"\n", | |
"Total T: 2190639 Episode Num: 3066 Reward: 2367.000867 Avg Reward: 2310.856375saving best model....\n", | |
"\n", | |
"Total T: 2191639 Episode Num: 3067 Reward: 2350.909388 Avg Reward: 2312.384510saving best model....\n", | |
"\n", | |
"Total T: 2192639 Episode Num: 3068 Reward: 2287.857333 Avg Reward: 2314.160484saving best model....\n", | |
"\n", | |
"Total T: 2194639 Episode Num: 3070 Reward: 2321.311670 Avg Reward: 2313.860515saving best model....\n", | |
"\n", | |
"Total T: 2210639 Episode Num: 3086 Reward: 2279.576818 Avg Reward: 2314.663742saving best model....\n", | |
"\n", | |
"Total T: 2211639 Episode Num: 3087 Reward: 2341.102102 Avg Reward: 2315.220682saving best model....\n", | |
"\n", | |
"Total T: 2216639 Episode Num: 3092 Reward: 2358.046487 Avg Reward: 2317.725334saving best model....\n", | |
"\n", | |
"Total T: 2217639 Episode Num: 3093 Reward: 2446.511075 Avg Reward: 2321.803850saving best model....\n", | |
"\n", | |
"Total T: 2218639 Episode Num: 3094 Reward: 2446.729687 Avg Reward: 2322.997402saving best model....\n", | |
"\n", | |
"Total T: 2476436 Episode Num: 3355 Reward: 2410.728740 Avg Reward: 2311.515216saving best model....\n", | |
"\n", | |
"Total T: 2477436 Episode Num: 3356 Reward: 2406.955269 Avg Reward: 2332.188476saving best model....\n", | |
"\n", | |
"Total T: 2478436 Episode Num: 3357 Reward: 2298.238307 Avg Reward: 2332.584360saving best model....\n", | |
"\n", | |
"Total T: 2480436 Episode Num: 3359 Reward: 2275.610972 Avg Reward: 2333.441170saving best model....\n", | |
"\n", | |
"Total T: 2481436 Episode Num: 3360 Reward: 2467.116348 Avg Reward: 2336.598461saving best model....\n", | |
"\n", | |
"Total T: 2482436 Episode Num: 3361 Reward: 2305.970150 Avg Reward: 2336.633027saving best model....\n", | |
"\n", | |
"Total T: 2484436 Episode Num: 3363 Reward: 2419.310456 Avg Reward: 2337.525082saving best model....\n", | |
"\n", | |
"Total T: 2485436 Episode Num: 3364 Reward: 2449.301079 Avg Reward: 2338.158351saving best model....\n", | |
"\n", | |
"Total T: 2486436 Episode Num: 3365 Reward: 2358.240732 Avg Reward: 2338.705752saving best model....\n", | |
"\n", | |
"Total T: 2488436 Episode Num: 3367 Reward: 2288.187367 Avg Reward: 2338.836166saving best model....\n", | |
"\n", | |
"Total T: 2489436 Episode Num: 3368 Reward: 2270.053563 Avg Reward: 2339.710343saving best model....\n", | |
"\n", | |
"Total T: 2490436 Episode Num: 3369 Reward: 2449.349531 Avg Reward: 2340.713924saving best model....\n", | |
"\n", | |
"Total T: 2492436 Episode Num: 3371 Reward: 2265.966478 Avg Reward: 2341.214860saving best model....\n", | |
"\n", | |
"Total T: 2493436 Episode Num: 3372 Reward: 2402.822523 Avg Reward: 2343.941677saving best model....\n", | |
"\n", | |
"Total T: 2494436 Episode Num: 3373 Reward: 2352.244790 Avg Reward: 2343.958599saving best model....\n", | |
"\n", | |
"Total T: 2495436 Episode Num: 3374 Reward: 2323.948352 Avg Reward: 2344.010485saving best model....\n", | |
"\n", | |
"Total T: 2496436 Episode Num: 3375 Reward: 2336.004591 Avg Reward: 2347.144307saving best model....\n", | |
"\n", | |
"Total T: 2674225 Episode Num: 3557 Reward: 2465.881392 Avg Reward: 2347.406816saving best model....\n", | |
"\n", | |
"Total T: 2675225 Episode Num: 3558 Reward: 2502.126712 Avg Reward: 2349.995398saving best model....\n", | |
"\n", | |
"Total T: 2676225 Episode Num: 3559 Reward: 2414.227466 Avg Reward: 2350.832502saving best model....\n", | |
"\n", | |
"Total T: 2687225 Episode Num: 3570 Reward: 2354.839472 Avg Reward: 2349.973442saving best model....\n", | |
"\n", | |
"Total T: 2696225 Episode Num: 3579 Reward: 2414.913937 Avg Reward: 2351.065531saving best model....\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Total T: 2697225 Episode Num: 3580 Reward: 2511.032046 Avg Reward: 2352.228172saving best model....\n", | |
"\n", | |
"Total T: 2809146 Episode Num: 3693 Reward: 2350.420103 Avg Reward: 2352.268838saving best model....\n", | |
"\n", | |
"Total T: 2810146 Episode Num: 3694 Reward: 2332.138665 Avg Reward: 2353.183528saving best model....\n", | |
"\n", | |
"Total T: 2811146 Episode Num: 3695 Reward: 2569.125164 Avg Reward: 2378.745147saving best model....\n", | |
"\n", | |
"Total T: 2812146 Episode Num: 3696 Reward: 2508.268060 Avg Reward: 2379.971951saving best model....\n", | |
"\n", | |
"Total T: 2813146 Episode Num: 3697 Reward: 2337.819026 Avg Reward: 2380.401157saving best model....\n", | |
"\n", | |
"Total T: 2814146 Episode Num: 3698 Reward: 2342.367980 Avg Reward: 2383.982854saving best model....\n", | |
"\n", | |
"Total T: 2815146 Episode Num: 3699 Reward: 2383.442357 Avg Reward: 2384.515742saving best model....\n", | |
"\n", | |
"Total T: 2816146 Episode Num: 3700 Reward: 2445.194353 Avg Reward: 2386.066220saving best model....\n", | |
"\n", | |
"Total T: 2817146 Episode Num: 3701 Reward: 2515.765400 Avg Reward: 2386.134727saving best model....\n", | |
"\n", | |
"Total T: 2830001 Episode Num: 3714 Reward: 2377.537848 Avg Reward: 2385.589093saving best model....\n", | |
"\n", | |
"Total T: 2831001 Episode Num: 3715 Reward: 2417.871830 Avg Reward: 2386.842204saving best model....\n", | |
"\n", | |
"Total T: 2833001 Episode Num: 3717 Reward: 2371.062311 Avg Reward: 2386.159154saving best model....\n", | |
"\n", | |
"Total T: 2836001 Episode Num: 3720 Reward: 2374.172877 Avg Reward: 2386.188476saving best model....\n", | |
"\n", | |
"Total T: 2844001 Episode Num: 3728 Reward: 2444.492765 Avg Reward: 2389.378069saving best model....\n", | |
"\n", | |
"Total T: 2845001 Episode Num: 3729 Reward: 2492.229906 Avg Reward: 2390.442268saving best model....\n", | |
"\n", | |
"Total T: 2846001 Episode Num: 3730 Reward: 2477.708993 Avg Reward: 2391.766433saving best model....\n", | |
"\n", | |
"Total T: 2850001 Episode Num: 3734 Reward: 2405.913545 Avg Reward: 2390.968404saving best model....\n", | |
"\n", | |
"Total T: 2860001 Episode Num: 3744 Reward: 2410.475301 Avg Reward: 2392.011909saving best model....\n", | |
"\n", | |
"Total T: 2862001 Episode Num: 3746 Reward: 2377.329223 Avg Reward: 2392.187182saving best model....\n", | |
"\n", | |
"Total T: 2864001 Episode Num: 3748 Reward: 2314.104632 Avg Reward: 2392.661296saving best model....\n", | |
"\n", | |
"Total T: 3076353 Episode Num: 3962 Reward: 2323.262045 Avg Reward: 2390.699533saving best model....\n", | |
"\n", | |
"Total T: 3094353 Episode Num: 3980 Reward: 2488.968907 Avg Reward: 2394.299379saving best model....\n", | |
"\n", | |
"Total T: 3312909 Episode Num: 4202 Reward: 2527.219390 Avg Reward: 2409.750232saving best model....\n", | |
"\n", | |
"Total T: 3313909 Episode Num: 4203 Reward: 2592.028576 Avg Reward: 2411.850027saving best model....\n", | |
"\n", | |
"Total T: 3314909 Episode Num: 4204 Reward: 2605.530976 Avg Reward: 2414.106672saving best model....\n", | |
"\n", | |
"Total T: 3315909 Episode Num: 4205 Reward: 2450.283907 Avg Reward: 2415.225172saving best model....\n", | |
"\n", | |
"Total T: 3316909 Episode Num: 4206 Reward: 2525.773282 Avg Reward: 2417.527727saving best model....\n", | |
"\n", | |
"Total T: 3321909 Episode Num: 4211 Reward: 2467.181438 Avg Reward: 2417.629422saving best model....\n", | |
"\n", | |
"Total T: 3328909 Episode Num: 4218 Reward: 2519.647356 Avg Reward: 2417.049697saving best model....\n", | |
"\n", | |
"Total T: 3330909 Episode Num: 4220 Reward: 2410.076400 Avg Reward: 2418.464940saving best model....\n", | |
"\n", | |
"Total T: 3331909 Episode Num: 4221 Reward: 2469.473985 Avg Reward: 2419.348305saving best model....\n", | |
"\n", | |
"Total T: 3332909 Episode Num: 4222 Reward: 2377.365073 Avg Reward: 2419.429706saving best model....\n", | |
"\n", | |
"Total T: 3339909 Episode Num: 4229 Reward: 2420.187748 Avg Reward: 2418.927405saving best model....\n", | |
"\n", | |
"Total T: 3340909 Episode Num: 4230 Reward: 2524.823406 Avg Reward: 2420.541890saving best model....\n", | |
"\n", | |
"Total T: 3341909 Episode Num: 4231 Reward: 2476.022348 Avg Reward: 2436.932421saving best model....\n", | |
"\n", | |
"Total T: 3342909 Episode Num: 4232 Reward: 2463.654157 Avg Reward: 2437.393380saving best model....\n", | |
"\n", | |
"Total T: 3343909 Episode Num: 4233 Reward: 2502.636857 Avg Reward: 2438.092138saving best model....\n", | |
"\n", | |
"Total T: 3344909 Episode Num: 4234 Reward: 2454.096131 Avg Reward: 2439.323447saving best model....\n", | |
"\n", | |
"Total T: 3345909 Episode Num: 4235 Reward: 2424.717197 Avg Reward: 2439.533469saving best model....\n", | |
"\n", | |
"Total T: 3347909 Episode Num: 4237 Reward: 2373.956316 Avg Reward: 2440.099359saving best model....\n", | |
"\n", | |
"Total T: 3348909 Episode Num: 4238 Reward: 2481.649648 Avg Reward: 2441.930308saving best model....\n", | |
"\n", | |
"Total T: 3349909 Episode Num: 4239 Reward: 2584.348264 Avg Reward: 2445.140738saving best model....\n", | |
"\n", | |
"Total T: 3354909 Episode Num: 4244 Reward: 2660.881966 Avg Reward: 2446.301478saving best model....\n", | |
"\n", | |
"Total T: 3360909 Episode Num: 4250 Reward: 2454.512823 Avg Reward: 2445.720651saving best model....\n", | |
"\n", | |
"Total T: 3362909 Episode Num: 4252 Reward: 2411.838660 Avg Reward: 2448.713267saving best model....\n", | |
"\n", | |
"Total T: 3363909 Episode Num: 4253 Reward: 2452.242794 Avg Reward: 2448.866561saving best model....\n", | |
"\n", | |
"Total T: 3368909 Episode Num: 4258 Reward: 2460.688496 Avg Reward: 2450.267813saving best model....\n", | |
"\n", | |
"Total T: 3369909 Episode Num: 4259 Reward: 2470.514952 Avg Reward: 2450.655349saving best model....\n", | |
"\n", | |
"Total T: 3371909 Episode Num: 4261 Reward: 2386.097083 Avg Reward: 2450.657544saving best model....\n", | |
"\n", | |
"Total T: 3373909 Episode Num: 4263 Reward: 2267.674041 Avg Reward: 2451.951235saving best model....\n", | |
"\n", | |
"Total T: 3374909 Episode Num: 4264 Reward: 2515.055709 Avg Reward: 2452.628128saving best model....\n", | |
"\n", | |
"Total T: 3375909 Episode Num: 4265 Reward: 2495.144286 Avg Reward: 2454.344502saving best model....\n", | |
"\n", | |
"Total T: 3381909 Episode Num: 4271 Reward: 2421.444522 Avg Reward: 2453.799039saving best model....\n", | |
"\n", | |
"Total T: 3382909 Episode Num: 4272 Reward: 2544.297140 Avg Reward: 2455.171976saving best model....\n", | |
"\n", | |
"Total T: 3383909 Episode Num: 4273 Reward: 2478.669311 Avg Reward: 2455.343207saving best model....\n", | |
"\n", | |
"Total T: 3384909 Episode Num: 4274 Reward: 2490.346491 Avg Reward: 2455.716744saving best model....\n", | |
"\n", | |
"Total T: 3385909 Episode Num: 4275 Reward: 2497.383338 Avg Reward: 2456.180096saving best model....\n", | |
"\n", | |
"Total T: 3387909 Episode Num: 4277 Reward: 2483.654232 Avg Reward: 2455.857844saving best model....\n", | |
"\n", | |
"Total T: 3390909 Episode Num: 4280 Reward: 2477.536472 Avg Reward: 2456.111898saving best model....\n", | |
"\n", | |
"Total T: 3391909 Episode Num: 4281 Reward: 2546.949060 Avg Reward: 2457.675940saving best model....\n", | |
"\n", | |
"Total T: 3392909 Episode Num: 4282 Reward: 2472.740467 Avg Reward: 2458.071198saving best model....\n", | |
"\n", | |
"Total T: 3393909 Episode Num: 4283 Reward: 2516.822066 Avg Reward: 2459.050470saving best model....\n", | |
"\n", | |
"Total T: 3394909 Episode Num: 4284 Reward: 2443.323380 Avg Reward: 2468.763484saving best model....\n", | |
"\n", | |
"Total T: 3395909 Episode Num: 4285 Reward: 2499.832183 Avg Reward: 2469.092760saving best model....\n", | |
"\n", | |
"Total T: 3399909 Episode Num: 4289 Reward: 2538.692761 Avg Reward: 2469.399040saving best model....\n", | |
"\n", | |
"Total T: 3556781 Episode Num: 4450 Reward: 2621.512585 Avg Reward: 2467.690498saving best model....\n", | |
"\n", | |
"Total T: 3557781 Episode Num: 4451 Reward: 2510.322269 Avg Reward: 2480.559453saving best model....\n", | |
"\n", | |
"Total T: 3558781 Episode Num: 4452 Reward: 2489.918402 Avg Reward: 2481.447998saving best model....\n", | |
"\n", | |
"Total T: 3559781 Episode Num: 4453 Reward: 2477.918112 Avg Reward: 2481.966846saving best model....\n", | |
"\n", | |
"Total T: 3563781 Episode Num: 4457 Reward: 2490.343949 Avg Reward: 2482.528808saving best model....\n", | |
"\n", | |
"Total T: 3565781 Episode Num: 4459 Reward: 2478.245739 Avg Reward: 2484.580858saving best model....\n", | |
"\n", | |
"Total T: 3566781 Episode Num: 4460 Reward: 2536.853393 Avg Reward: 2487.341995saving best model....\n", | |
"\n", | |
"Total T: 3609781 Episode Num: 4503 Reward: 2544.833473 Avg Reward: 2487.081777saving best model....\n", | |
"\n", | |
"Total T: 3610781 Episode Num: 4504 Reward: 2571.084328 Avg Reward: 2487.524957saving best model....\n", | |
"\n", | |
"Total T: 3615781 Episode Num: 4509 Reward: 2507.152480 Avg Reward: 2487.021592saving best model....\n", | |
"\n", | |
"Total T: 3616781 Episode Num: 4510 Reward: 2615.299517 Avg Reward: 2488.992194saving best model....\n", | |
"\n", | |
"Total T: 3617781 Episode Num: 4511 Reward: 2526.698008 Avg Reward: 2489.636547saving best model....\n", | |
"\n", | |
"Total T: 3618781 Episode Num: 4512 Reward: 2420.578944 Avg Reward: 2489.725465saving best model....\n", | |
"\n", | |
"Total T: 3803299 Episode Num: 4699 Reward: 2460.935178 Avg Reward: 2488.170857saving best model....\n", | |
"\n", | |
"Total T: 3804299 Episode Num: 4700 Reward: 2616.896801 Avg Reward: 2501.919947saving best model....\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Total T: 3805299 Episode Num: 4701 Reward: 2544.888870 Avg Reward: 2503.012489saving best model....\n", | |
"\n", | |
"Total T: 3806299 Episode Num: 4702 Reward: 2533.532089 Avg Reward: 2503.795806saving best model....\n", | |
"\n", | |
"Total T: 3807299 Episode Num: 4703 Reward: 2515.213482 Avg Reward: 2504.706304saving best model....\n", | |
"\n", | |
"Total T: 3808299 Episode Num: 4704 Reward: 2579.330991 Avg Reward: 2513.139760saving best model....\n", | |
"\n", | |
"Total T: 3811299 Episode Num: 4707 Reward: 2521.731264 Avg Reward: 2513.070881saving best model....\n", | |
"\n", | |
"Total T: 3812299 Episode Num: 4708 Reward: 2515.768538 Avg Reward: 2513.271868saving best model....\n", | |
"\n", | |
"Total T: 3815299 Episode Num: 4711 Reward: 2473.403224 Avg Reward: 2513.075645saving best model....\n", | |
"\n", | |
"Total T: 3819299 Episode Num: 4715 Reward: 2517.097845 Avg Reward: 2515.097030saving best model....\n", | |
"\n", | |
"Total T: 3820299 Episode Num: 4716 Reward: 2605.361263 Avg Reward: 2515.898618saving best model....\n", | |
"\n", | |
"Total T: 3821299 Episode Num: 4717 Reward: 2652.036548 Avg Reward: 2517.519978saving best model....\n", | |
"\n", | |
"Total T: 3822299 Episode Num: 4718 Reward: 2495.833785 Avg Reward: 2518.069069saving best model....\n", | |
"\n", | |
"Total T: 3832299 Episode Num: 4728 Reward: 2540.134428 Avg Reward: 2518.583238saving best model....\n", | |
"\n", | |
"Total T: 3833299 Episode Num: 4729 Reward: 2553.186390 Avg Reward: 2519.228081saving best model....\n", | |
"\n", | |
"Total T: 3837299 Episode Num: 4733 Reward: 2476.722248 Avg Reward: 2520.302369saving best model....\n", | |
"\n", | |
"Total T: 3838299 Episode Num: 4734 Reward: 2607.329411 Avg Reward: 2521.875432saving best model....\n", | |
"\n", | |
"Total T: 3844299 Episode Num: 4740 Reward: 2477.433662 Avg Reward: 2521.491254saving best model....\n", | |
"\n", | |
"Total T: 3845299 Episode Num: 4741 Reward: 2580.186531 Avg Reward: 2522.550975saving best model....\n", | |
"\n", | |
"Total T: 3945544 Episode Num: 4842 Reward: 2528.581087 Avg Reward: 2503.001519saving best model....\n", | |
"\n", | |
"Total T: 3947544 Episode Num: 4844 Reward: 2431.939322 Avg Reward: 2523.192406saving best model....\n", | |
"\n", | |
"Total T: 3948544 Episode Num: 4845 Reward: 2716.584696 Avg Reward: 2526.685884saving best model....\n", | |
"\n", | |
"Total T: 3949544 Episode Num: 4846 Reward: 2636.490789 Avg Reward: 2528.312800saving best model....\n", | |
"\n", | |
"Total T: 3950544 Episode Num: 4847 Reward: 2627.301597 Avg Reward: 2529.201942saving best model....\n", | |
"\n", | |
"Total T: 3952544 Episode Num: 4849 Reward: 2491.674385 Avg Reward: 2529.649269saving best model....\n", | |
"\n", | |
"Total T: 3953544 Episode Num: 4850 Reward: 2589.780944 Avg Reward: 2530.516178saving best model....\n", | |
"\n", | |
"Total T: 3964544 Episode Num: 4861 Reward: 2484.815300 Avg Reward: 2529.510955saving best model....\n", | |
"\n", | |
"Total T: 3965544 Episode Num: 4862 Reward: 2634.469138 Avg Reward: 2531.959503saving best model....\n", | |
"\n", | |
"Total T: 3968544 Episode Num: 4865 Reward: 2487.920686 Avg Reward: 2531.520343saving best model....\n", | |
"\n", | |
"Total T: 3970544 Episode Num: 4867 Reward: 2525.026694 Avg Reward: 2531.394949saving best model....\n", | |
"\n", | |
"Total T: 3971544 Episode Num: 4868 Reward: 2702.666353 Avg Reward: 2534.430254saving best model....\n", | |
"\n", | |
"Total T: 3972544 Episode Num: 4869 Reward: 2593.952477 Avg Reward: 2535.193934saving best model....\n", | |
"\n", | |
"Total T: 3973544 Episode Num: 4870 Reward: 2535.141773 Avg Reward: 2535.655316saving best model....\n", | |
"\n", | |
"Total T: 3974544 Episode Num: 4871 Reward: 2569.018526 Avg Reward: 2536.476554saving best model....\n", | |
"\n", | |
"Total T: 4040544 Episode Num: 4937 Reward: 2506.281276 Avg Reward: 2536.142316saving best model....\n", | |
"\n", | |
"Total T: 4143340 Episode Num: 5041 Reward: 2570.353100 Avg Reward: 2536.643663saving best model....\n", | |
"\n", | |
"Total T: 4147340 Episode Num: 5045 Reward: 2619.707146 Avg Reward: 2538.319509saving best model....\n", | |
"\n", | |
"Total T: 4149340 Episode Num: 5047 Reward: 2489.976830 Avg Reward: 2538.440159saving best model....\n", | |
"\n", | |
"Total T: 4150340 Episode Num: 5048 Reward: 2596.150038 Avg Reward: 2539.269417saving best model....\n", | |
"\n", | |
"Total T: 4151340 Episode Num: 5049 Reward: 2508.139598 Avg Reward: 2539.481933saving best model....\n", | |
"\n", | |
"Total T: 4152340 Episode Num: 5050 Reward: 2656.446812 Avg Reward: 2541.108077saving best model....\n", | |
"\n", | |
"Total T: 4153340 Episode Num: 5051 Reward: 2653.111281 Avg Reward: 2544.066128saving best model....\n", | |
"\n", | |
"Total T: 4154340 Episode Num: 5052 Reward: 2619.972178 Avg Reward: 2544.687510saving best model....\n", | |
"\n", | |
"Total T: 4155340 Episode Num: 5053 Reward: 2633.571978 Avg Reward: 2544.925633saving best model....\n", | |
"\n", | |
"Total T: 4156340 Episode Num: 5054 Reward: 2591.273245 Avg Reward: 2545.098056saving best model....\n", | |
"\n", | |
"Total T: 4157340 Episode Num: 5055 Reward: 2511.098302 Avg Reward: 2545.606603saving best model....\n", | |
"\n", | |
"Total T: 4158340 Episode Num: 5056 Reward: 2563.140150 Avg Reward: 2546.569701saving best model....\n", | |
"\n", | |
"Total T: 4174340 Episode Num: 5072 Reward: 2419.913965 Avg Reward: 2545.040836saving best model....\n", | |
"\n", | |
"Total T: 4999141 Episode Num: 6480 Reward: 343.330499 Avg Reward: 2446.4885350" | |
] | |
} | |
], | |
"source": [ | |
"# Train agent\n", | |
"train(policy, env)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"policy.load()\n", | |
"\n", | |
"for i in range(100):\n", | |
" evaluate_policy(policy, env, render=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"env.close()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.8" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment