Skip to content

Instantly share code, notes, and snippets.

@Hsgngr
Created July 14, 2021 15:43
Show Gist options
  • Save Hsgngr/d3d486a504b73b4ef6059d24450fa379 to your computer and use it in GitHub Desktop.
Save Hsgngr/d3d486a504b73b4ef6059d24450fa379 to your computer and use it in GitHub Desktop.
Tf-Agents Trading Notebook Fails
{
"metadata": {
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.5"
},
"orig_nbformat": 4,
"kernelspec": {
"name": "python3",
"display_name": "Python 3.9.5 64-bit ('gym_2': conda)"
},
"interpreter": {
"hash": "7d4c9b5d0f5bb8fb388c52216091e45585c947e4129bde3b9419a8a1ecc52274"
}
},
"nbformat": 4,
"nbformat_minor": 2,
"cells": [
{
"source": [
"# TF Agents Trade Environment"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"#Import Libraries\n",
"from pathlib import Path\n",
"from freqtrade.configuration import Configuration\n",
"from tf_agents.environments import utils\n",
"from StockMarket import StockMarketEnvironment\n",
"\n",
"#General Libraries\n",
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"#TF-Agents Related\n",
"from tf_agents.environments import py_environment\n",
"from tf_agents.environments import tf_environment\n",
"from tf_agents.environments import tf_py_environment"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{2881}\n"
]
}
],
"source": [
"config = Configuration.from_files(['config.json'])\n",
"environment = StockMarketEnvironment(config)"
]
},
{
"source": [
"## Validate Environment"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"utils.validate_py_environment(environment, episodes=5)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"action_spec: BoundedArraySpec(shape=(), dtype=dtype('int32'), name='action', minimum=0, maximum=2)\ntime_step_spec.observation: BoundedArraySpec(shape=(24,), dtype=dtype('float32'), name='observation', minimum=-3.4028234663852886e+38, maximum=3.4028234663852886e+38)\ntime_step_spec.step_type: ArraySpec(shape=(), dtype=dtype('int32'), name='step_type')\ntime_step_spec.discount: BoundedArraySpec(shape=(), dtype=dtype('float32'), name='discount', minimum=0.0, maximum=1.0)\ntime_step_spec.reward: ArraySpec(shape=(), dtype=dtype('float32'), name='reward')\n"
]
}
],
"source": [
"print('action_spec:', environment.action_spec())\n",
"print('time_step_spec.observation:', environment.time_step_spec().observation)\n",
"print('time_step_spec.step_type:', environment.time_step_spec().step_type)\n",
"print('time_step_spec.discount:', environment.time_step_spec().discount)\n",
"print('time_step_spec.reward:', environment.time_step_spec().reward)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{2881}\nTimeStep(\n{'discount': array(1., dtype=float32),\n 'observation': array([ 30.842302 , 235.31091 , 32.608276 , 78.42818 ,\n 10.868206 , 100. , 14.285714 , 85.71429 ,\n 104.508354 , 63.399635 , 184.45976 , 68.50428 ,\n 0.9517862 , 35.248367 , 58.747757 , 58.747757 ,\n 93.37857 , 66.666664 , 100. , 44.003708 ,\n 34.621227 , 9.3824835 , 65.05567 , 0.61875534],\n dtype=float32),\n 'reward': array(0., dtype=float32),\n 'step_type': array(0, dtype=int32)})\ncumulative_reward 0.0\n-------------\n-----------------\nFinal Reward = -0.42619243\n\n---------------------------------------------------------------\n**Every time you run this block final reward should change.**\n---------------------------------------------------------------\n"
]
}
],
"source": [
"get_skip_action = np.array(0, dtype=np.int32)\n",
"get_buy_action = np.array(1, dtype=np.int32)\n",
"end_round_action = np.array(2, dtype=np.int32)\n",
"\n",
"environment = StockMarketEnvironment(config)\n",
"time_step = environment.reset()\n",
"print(time_step)\n",
"cumulative_reward = time_step.reward\n",
"print('cumulative_reward', cumulative_reward)\n",
"print('-------------')\n",
"\n",
"#Play for 12 steps:\n",
"time_step = environment.step(get_buy_action)\n",
"#print(time_step)\n",
"cumulative_reward += time_step.reward\n",
"#print('cumulative_reward', cumulative_reward)\n",
"\n",
"for _ in range(600):\n",
" time_step = environment.step(get_skip_action)\n",
" #print(time_step)\n",
" cumulative_reward += time_step.reward\n",
" #print('cumulative_reward', cumulative_reward)\n",
"\n",
"print('-----------------')\n",
"time_step = environment.step(end_round_action)\n",
"#print(time_step)\n",
"cumulative_reward += time_step.reward\n",
"print('Final Reward = ', cumulative_reward)\n",
"\n",
"\n",
"print('\\n---------------------------------------------------------------')\n",
"print('**Every time you run this block final reward should change.**')\n",
"print('---------------------------------------------------------------')\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"True\n...................\nTimeStep Specs: TimeStep(\n{'discount': BoundedTensorSpec(shape=(), dtype=tf.float32, name='discount', minimum=array(0., dtype=float32), maximum=array(1., dtype=float32)),\n 'observation': BoundedTensorSpec(shape=(24,), dtype=tf.float32, name='observation', minimum=array(-3.4028235e+38, dtype=float32), maximum=array(3.4028235e+38, dtype=float32)),\n 'reward': TensorSpec(shape=(), dtype=tf.float32, name='reward'),\n 'step_type': TensorSpec(shape=(), dtype=tf.int32, name='step_type')})\n...................\nAction Specs: BoundedTensorSpec(shape=(), dtype=tf.int32, name='action', minimum=array(0, dtype=int32), maximum=array(2, dtype=int32))\n"
]
}
],
"source": [
"tf_env = tf_py_environment.TFPyEnvironment(environment)\n",
"\n",
"print(isinstance(tf_env, tf_environment.TFEnvironment))\n",
"print('...................')\n",
"print(\"TimeStep Specs:\", tf_env.time_step_spec())\n",
"print('...................')\n",
"print(\"Action Specs:\", tf_env.action_spec())"
]
},
{
"source": [
"## Training"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"from __future__ import absolute_import, division, print_function\n",
"\n",
"import base64\n",
"import imageio\n",
"import IPython\n",
"import matplotlib\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import PIL.Image\n",
"import pyvirtualdisplay\n",
"\n",
"import tensorflow as tf\n",
"\n",
"from tf_agents.agents.dqn import dqn_agent\n",
"from tf_agents.drivers import dynamic_step_driver\n",
"from tf_agents.environments import suite_gym\n",
"from tf_agents.environments import tf_py_environment\n",
"from tf_agents.eval import metric_utils\n",
"from tf_agents.metrics import tf_metrics\n",
"from tf_agents.networks import q_network\n",
"from tf_agents.policies import random_tf_policy\n",
"from tf_agents.replay_buffers import tf_uniform_replay_buffer\n",
"from tf_agents.trajectories import trajectory\n",
"from tf_agents.utils import common\n",
"tf.compat.v1.enable_v2_behavior()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"num_iterations = 100000 # @param {type:\"integer\"}\n",
"\n",
"initial_collect_steps = 1000 # @param {type:\"integer\"} \n",
"collect_steps_per_iteration = 2 # @param {type:\"integer\"}\n",
"replay_buffer_max_length = 100000 # @param {type:\"integer\"}\n",
"\n",
"batch_size = 64 # @param {type:\"integer\"}\n",
"learning_rate = 1e-3 # @param {type:\"number\"}\n",
"log_interval = 500 # @param {type:\"integer\"}\n",
"\n",
"num_eval_episodes = 10 # @param {type:\"integer\"}\n",
"eval_interval = 5000 # @param {type:\"integer\"}"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{2881}\n",
"{2881}\n"
]
}
],
"source": [
"train_env = tf_py_environment.TFPyEnvironment(StockMarketEnvironment(config))\n",
"eval_env = tf_py_environment.TFPyEnvironment(StockMarketEnvironment(config))"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"fc_layer_params = (40,)\n",
"\n",
"q_net = q_network.QNetwork(\n",
" train_env.observation_spec(),\n",
" train_env.action_spec(),\n",
" fc_layer_params=fc_layer_params)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate)\n",
"\n",
"train_step_counter = tf.Variable(0)\n",
"\n",
"agent = dqn_agent.DqnAgent(\n",
" train_env.time_step_spec(),\n",
" train_env.action_spec(),\n",
" q_network=q_net,\n",
" optimizer=optimizer,\n",
" td_errors_loss_fn=common.element_wise_squared_loss,\n",
" train_step_counter=train_step_counter)\n",
"\n",
"agent.initialize()"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"output_type": "error",
"ename": "AttributeError",
"evalue": "'DqnAgent' object has no attribute 'predict'",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-44-1ee9a2fce497>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0magent\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m: 'DqnAgent' object has no attribute 'predict'"
]
}
],
"source": []
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"eval_policy = agent.policy\n",
"collect_policy = agent.collect_policy\n",
"random_policy = random_tf_policy.RandomTFPolicy(train_env.time_step_spec(),\n",
" train_env.action_spec())"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"output_type": "error",
"ename": "ValueError",
"evalue": "Input 0 of layer dense_1 is incompatible with the layer: : expected min_ndim=2, found ndim=1. Full shape received: (40,)",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-55-2aa37233016b>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0magent\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpolicy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maction\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtime_step\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tf_agents/policies/tf_policy.py\u001b[0m in \u001b[0;36maction\u001b[0;34m(self, time_step, policy_state, seed)\u001b[0m\n\u001b[1;32m 322\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_automatic_state_reset\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 323\u001b[0m \u001b[0mpolicy_state\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_reset_state\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtime_step\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpolicy_state\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 324\u001b[0;31m \u001b[0mstep\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0maction_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtime_step\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtime_step\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpolicy_state\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpolicy_state\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mseed\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mseed\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 325\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 326\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mclip_action\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maction\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maction_spec\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tf_agents/utils/common.py\u001b[0m in \u001b[0;36mwith_check_resource_vars\u001b[0;34m(*fn_args, **fn_kwargs)\u001b[0m\n\u001b[1;32m 184\u001b[0m \u001b[0;31m# We're either in eager mode or in tf.function mode (no in-between); so\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 185\u001b[0m \u001b[0;31m# autodep-like behavior is already expected of fn.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 186\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mfn_args\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfn_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 187\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mresource_variables_enabled\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 188\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mMISSING_RESOURCE_VARIABLES_ERROR\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tf_agents/policies/tf_policy.py\u001b[0m in \u001b[0;36m_action\u001b[0;34m(self, time_step, policy_state, seed)\u001b[0m\n\u001b[1;32m 558\u001b[0m \"\"\"\n\u001b[1;32m 559\u001b[0m \u001b[0mseed_stream\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtfp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mutil\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSeedStream\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseed\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mseed\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msalt\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'tf_agents_tf_policy'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 560\u001b[0;31m \u001b[0mdistribution_step\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_distribution\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtime_step\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpolicy_state\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# pytype: disable=wrong-arg-types\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 561\u001b[0m actions = tf.nest.map_structure(\n\u001b[1;32m 562\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0md\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mreparameterized_sampling\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msample\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0md\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mseed\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mseed_stream\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tf_agents/policies/greedy_policy.py\u001b[0m in \u001b[0;36m_distribution\u001b[0;34m(self, time_step, policy_state)\u001b[0m\n\u001b[1;32m 78\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mDeterministicWithLogProb\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mgreedy_action\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 79\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 80\u001b[0;31m distribution_step = self._wrapped_policy.distribution(\n\u001b[0m\u001b[1;32m 81\u001b[0m time_step, policy_state)\n\u001b[1;32m 82\u001b[0m return policy_step.PolicyStep(\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tf_agents/policies/tf_policy.py\u001b[0m in \u001b[0;36mdistribution\u001b[0;34m(self, time_step, policy_state)\u001b[0m\n\u001b[1;32m 401\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_automatic_state_reset\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 402\u001b[0m \u001b[0mpolicy_state\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_reset_state\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtime_step\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpolicy_state\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 403\u001b[0;31m \u001b[0mstep\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_distribution\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtime_step\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtime_step\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpolicy_state\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpolicy_state\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 404\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0memit_log_probability\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 405\u001b[0m \u001b[0;31m# This here is set only for compatibility with info_spec in constructor.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tf_agents/policies/q_policy.py\u001b[0m in \u001b[0;36m_distribution\u001b[0;34m(self, time_step, policy_state)\u001b[0m\n\u001b[1;32m 151\u001b[0m network_observation)\n\u001b[1;32m 152\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 153\u001b[0;31m q_values, policy_state = self._q_network(\n\u001b[0m\u001b[1;32m 154\u001b[0m \u001b[0mnetwork_observation\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnetwork_state\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpolicy_state\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 155\u001b[0m step_type=time_step.step_type)\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tf_agents/networks/network.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, inputs, *args, **kwargs)\u001b[0m\n\u001b[1;32m 415\u001b[0m \u001b[0mnormalized_kwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"network_state\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 416\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 417\u001b[0;31m \u001b[0moutputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnew_state\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mNetwork\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__call__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mnormalized_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 418\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 419\u001b[0m nest_utils.assert_matching_dtypes_and_inner_shapes(\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/keras/engine/base_layer.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1010\u001b[0m with autocast_variable.enable_auto_cast_variables(\n\u001b[1;32m 1011\u001b[0m self._compute_dtype_object):\n\u001b[0;32m-> 1012\u001b[0;31m \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcall_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1013\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1014\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_activity_regularizer\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tf_agents/networks/q_network.py\u001b[0m in \u001b[0;36mcall\u001b[0;34m(self, observation, step_type, network_state, training)\u001b[0m\n\u001b[1;32m 146\u001b[0m \u001b[0mobservation\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstep_type\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mstep_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnetwork_state\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnetwork_state\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 147\u001b[0m training=training)\n\u001b[0;32m--> 148\u001b[0;31m \u001b[0mq_value\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_q_value_layer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtraining\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtraining\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 149\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mq_value\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnetwork_state\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/keras/engine/base_layer.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 996\u001b[0m \u001b[0minputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_cast_inputs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 997\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 998\u001b[0;31m \u001b[0minput_spec\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0massert_input_compatibility\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minput_spec\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 999\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0meager\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1000\u001b[0m \u001b[0mcall_fn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcall\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/keras/engine/input_spec.py\u001b[0m in \u001b[0;36massert_input_compatibility\u001b[0;34m(input_spec, inputs, layer_name)\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[0mndim\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrank\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 233\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mndim\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mndim\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0mspec\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmin_ndim\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 234\u001b[0;31m raise ValueError('Input ' + str(input_index) + ' of layer ' +\n\u001b[0m\u001b[1;32m 235\u001b[0m \u001b[0mlayer_name\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m' is incompatible with the layer: '\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 236\u001b[0m \u001b[0;34m': expected min_ndim='\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mspec\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmin_ndim\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: Input 0 of layer dense_1 is incompatible with the layer: : expected min_ndim=2, found ndim=1. Full shape received: (40,)"
]
}
],
"source": [
"from tf_agents.trajectories import time_step as ts\n",
"\n",
"\n",
"agent.policy.action"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"def compute_avg_return(environment, policy, num_episodes=10):\n",
"\n",
" total_return = 0.0\n",
" for _ in range(num_episodes):\n",
"\n",
" time_step = environment.reset()\n",
" episode_return = 0.0\n",
"\n",
" while not time_step.is_last():\n",
" action_step = policy.action(time_step)\n",
" time_step = environment.step(action_step.action)\n",
" episode_return += time_step.reward\n",
" total_return += episode_return\n",
"\n",
" avg_return = total_return / num_episodes\n",
" return avg_return.numpy()[0]"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"output_type": "error",
"ename": "NameError",
"evalue": "name 'random_policy' is not defined",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-13-3b7f8579967a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mcompute_avg_return\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0meval_env\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrandom_policy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnum_eval_episodes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mNameError\u001b[0m: name 'random_policy' is not defined"
]
}
],
"source": [
"compute_avg_return(eval_env, random_policy, num_eval_episodes)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"replay_buffer = tf_uniform_replay_buffer.TFUniformReplayBuffer(\n",
" data_spec=agent.collect_data_spec,\n",
" batch_size=train_env.batch_size,\n",
" max_length=replay_buffer_max_length)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Trajectory(\n",
"{'action': BoundedTensorSpec(shape=(), dtype=tf.int32, name='action', minimum=array(0, dtype=int32), maximum=array(2, dtype=int32)),\n",
" 'discount': BoundedTensorSpec(shape=(), dtype=tf.float32, name='discount', minimum=array(0., dtype=float32), maximum=array(1., dtype=float32)),\n",
" 'next_step_type': TensorSpec(shape=(), dtype=tf.int32, name='step_type'),\n",
" 'observation': BoundedTensorSpec(shape=(24,), dtype=tf.float32, name='observation', minimum=array(-3.4028235e+38, dtype=float32), maximum=array(3.4028235e+38, dtype=float32)),\n",
" 'policy_info': (),\n",
" 'reward': TensorSpec(shape=(), dtype=tf.float32, name='reward'),\n",
" 'step_type': TensorSpec(shape=(), dtype=tf.int32, name='step_type')})"
]
},
"metadata": {},
"execution_count": 15
}
],
"source": [
"agent.collect_data_spec"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"('step_type',\n",
" 'observation',\n",
" 'action',\n",
" 'policy_info',\n",
" 'next_step_type',\n",
" 'reward',\n",
" 'discount')"
]
},
"metadata": {},
"execution_count": 16
}
],
"source": [
"agent.collect_data_spec._fields"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"output_type": "error",
"ename": "NameError",
"evalue": "name 'random_policy' is not defined",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-17-649854ab5b42>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0mcollect_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0menv\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpolicy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbuffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 17\u001b[0;31m \u001b[0mcollect_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_env\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrandom_policy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreplay_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msteps\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mNameError\u001b[0m: name 'random_policy' is not defined"
]
}
],
"source": [
"def collect_step(environment, policy, buffer):\n",
" time_step = environment.current_time_step()\n",
" #print(time_step)\n",
" action_step = policy.action(time_step)\n",
" #print(action_step.action.numpy())\n",
" next_time_step = environment.step(action_step.action)\n",
" #print(next_time_step)\n",
" traj = trajectory.from_transition(time_step, action_step, next_time_step)\n",
"\n",
" # Add trajectory to the replay buffer\n",
" buffer.add_batch(traj)\n",
"\n",
"def collect_data(env, policy, buffer, steps):\n",
" for _ in range(steps):\n",
" collect_step(env, policy, buffer)\n",
"\n",
"collect_data(train_env, random_policy, replay_buffer, steps=1)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"WARNING:tensorflow:AutoGraph could not transform <bound method ReplayBuffer.get_next of <tf_agents.replay_buffers.tf_uniform_replay_buffer.TFUniformReplayBuffer object at 0x7f4f8c3c5730>> and will run it as-is.\n",
"Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.\n",
"Cause: module 'gast' has no attribute 'Index'\n",
"To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert\n",
"WARNING: AutoGraph could not transform <bound method ReplayBuffer.get_next of <tf_agents.replay_buffers.tf_uniform_replay_buffer.TFUniformReplayBuffer object at 0x7f4f8c3c5730>> and will run it as-is.\n",
"Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.\n",
"Cause: module 'gast' has no attribute 'Index'\n",
"To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert\n",
"WARNING:tensorflow:From /home/ege/anaconda3/envs/gym_2/lib/python3.9/site-packages/tf_agents/replay_buffers/tf_uniform_replay_buffer.py:338: ReplayBuffer.get_next (from tf_agents.replay_buffers.replay_buffer) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Use `as_dataset(..., single_deterministic_pass=False) instead.\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<PrefetchDataset shapes: (Trajectory(\n",
"{action: (64, 2),\n",
" discount: (64, 2),\n",
" next_step_type: (64, 2),\n",
" observation: (64, 2, 24),\n",
" policy_info: (),\n",
" reward: (64, 2),\n",
" step_type: (64, 2)}), BufferInfo(ids=(64, 2), probabilities=(64,))), types: (Trajectory(\n",
"{action: tf.int32,\n",
" discount: tf.float32,\n",
" next_step_type: tf.int32,\n",
" observation: tf.float32,\n",
" policy_info: (),\n",
" reward: tf.float32,\n",
" step_type: tf.int32}), BufferInfo(ids=tf.int64, probabilities=tf.float32))>"
]
},
"metadata": {},
"execution_count": 18
}
],
"source": [
"# Dataset generates trajectories with shape [Bx2x...]\n",
"dataset = replay_buffer.as_dataset(\n",
" num_parallel_calls=3, \n",
" sample_batch_size=batch_size, \n",
" num_steps=2).prefetch(3)\n",
"\n",
"\n",
"dataset"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"<tensorflow.python.data.ops.iterator_ops.OwnedIterator object at 0x7f4f74290c40>\n"
]
}
],
"source": [
"iterator = iter(dataset)\n",
"\n",
"print(iterator)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"WARNING:tensorflow:From /home/ege/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/util/dispatch.py:201: calling foldr_v2 (from tensorflow.python.ops.functional_ops) with back_prop=False is deprecated and will be removed in a future version.\nInstructions for updating:\nback_prop=False is deprecated. Consider using tf.stop_gradient instead.\nInstead of:\nresults = tf.foldr(fn, elems, back_prop=False)\nUse:\nresults = tf.nest.map_structure(tf.stop_gradient, tf.foldr(fn, elems))\n"
]
},
{
"output_type": "error",
"ename": "NotImplementedError",
"evalue": "Cannot convert a symbolic Tensor (loss/meshgrid/Size:0) to a numpy array. This error may indicate that you're trying to pass a Tensor to a NumPy call, which is not supported",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-20-106bfe411d92>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0;31m# Sample a batch of data from the buffer and update the agent's network.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[0mexperience\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0munused_info\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 24\u001b[0;31m \u001b[0mtrain_loss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0magent\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexperience\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloss\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 25\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[0mstep\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0magent\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain_step_counter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/eager/def_function.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 826\u001b[0m \u001b[0mtracing_count\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexperimental_get_tracing_count\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 827\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mtrace\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTrace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_name\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mtm\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 828\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 829\u001b[0m \u001b[0mcompiler\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"xla\"\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_experimental_compile\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;34m\"nonXla\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 830\u001b[0m \u001b[0mnew_tracing_count\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexperimental_get_tracing_count\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/eager/def_function.py\u001b[0m in \u001b[0;36m_call\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 869\u001b[0m \u001b[0;31m# This is the first call of __call__, so we have to initialize.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 870\u001b[0m \u001b[0minitializers\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 871\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_initialize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0madd_initializers_to\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minitializers\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 872\u001b[0m \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 873\u001b[0m \u001b[0;31m# At this point we know that the initialization is complete (or less\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/eager/def_function.py\u001b[0m in \u001b[0;36m_initialize\u001b[0;34m(self, args, kwds, add_initializers_to)\u001b[0m\n\u001b[1;32m 723\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_graph_deleter\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mFunctionDeleter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lifted_initializer_graph\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 724\u001b[0m self._concrete_stateful_fn = (\n\u001b[0;32m--> 725\u001b[0;31m self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access\n\u001b[0m\u001b[1;32m 726\u001b[0m *args, **kwds))\n\u001b[1;32m 727\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36m_get_concrete_function_internal_garbage_collected\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 2967\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2968\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lock\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2969\u001b[0;31m \u001b[0mgraph_function\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_define_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2970\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mgraph_function\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2971\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36m_maybe_define_function\u001b[0;34m(self, args, kwargs)\u001b[0m\n\u001b[1;32m 3359\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3360\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_function_cache\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmissed\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcall_context_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3361\u001b[0;31m \u001b[0mgraph_function\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_create_graph_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3362\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_function_cache\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprimary\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcache_key\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgraph_function\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3363\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36m_create_graph_function\u001b[0;34m(self, args, kwargs, override_flat_arg_shapes)\u001b[0m\n\u001b[1;32m 3194\u001b[0m \u001b[0marg_names\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbase_arg_names\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mmissing_arg_names\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3195\u001b[0m graph_function = ConcreteFunction(\n\u001b[0;32m-> 3196\u001b[0;31m func_graph_module.func_graph_from_py_func(\n\u001b[0m\u001b[1;32m 3197\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_name\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3198\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_python_function\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/framework/func_graph.py\u001b[0m in \u001b[0;36mfunc_graph_from_py_func\u001b[0;34m(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)\u001b[0m\n\u001b[1;32m 988\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moriginal_func\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf_decorator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munwrap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpython_func\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 989\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 990\u001b[0;31m \u001b[0mfunc_outputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpython_func\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mfunc_args\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfunc_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 991\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 992\u001b[0m \u001b[0;31m# invariant: `func_outputs` contains only Tensors, CompositeTensors,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/eager/def_function.py\u001b[0m in \u001b[0;36mwrapped_fn\u001b[0;34m(*args, **kwds)\u001b[0m\n\u001b[1;32m 632\u001b[0m \u001b[0mxla_context\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mExit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 633\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 634\u001b[0;31m \u001b[0mout\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mweak_wrapped_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__wrapped__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 635\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 636\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tf_agents/agents/tf_agent.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, experience, weights, **kwargs)\u001b[0m\n\u001b[1;32m 329\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 330\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_enable_functions\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 331\u001b[0;31m loss_info = self._train_fn(\n\u001b[0m\u001b[1;32m 332\u001b[0m experience=experience, weights=weights, **kwargs)\n\u001b[1;32m 333\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tf_agents/utils/common.py\u001b[0m in \u001b[0;36mwith_check_resource_vars\u001b[0;34m(*fn_args, **fn_kwargs)\u001b[0m\n\u001b[1;32m 184\u001b[0m \u001b[0;31m# We're either in eager mode or in tf.function mode (no in-between); so\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 185\u001b[0m \u001b[0;31m# autodep-like behavior is already expected of fn.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 186\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mfn_args\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfn_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 187\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mresource_variables_enabled\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 188\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mMISSING_RESOURCE_VARIABLES_ERROR\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tf_agents/agents/dqn/dqn_agent.py\u001b[0m in \u001b[0;36m_train\u001b[0;34m(self, experience, weights)\u001b[0m\n\u001b[1;32m 386\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_train\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexperience\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweights\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 387\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mGradientTape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mtape\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 388\u001b[0;31m loss_info = self._loss(\n\u001b[0m\u001b[1;32m 389\u001b[0m \u001b[0mexperience\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 390\u001b[0m \u001b[0mtd_errors_loss_fn\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_td_errors_loss_fn\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tf_agents/agents/dqn/dqn_agent.py\u001b[0m in \u001b[0;36m_loss\u001b[0;34m(self, experience, td_errors_loss_fn, gamma, reward_scale_factor, weights, training)\u001b[0m\n\u001b[1;32m 455\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 456\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname_scope\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'loss'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 457\u001b[0;31m \u001b[0mq_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_compute_q_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtime_steps\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mactions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtraining\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtraining\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 458\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 459\u001b[0m next_q_values = self._compute_next_q_values(\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tf_agents/agents/dqn/dqn_agent.py\u001b[0m in \u001b[0;36m_compute_q_values\u001b[0;34m(self, time_steps, actions, training)\u001b[0m\n\u001b[1;32m 537\u001b[0m \u001b[0maction_spec\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcast\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor_spec\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mBoundedTensorSpec\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_action_spec\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 538\u001b[0m \u001b[0mmulti_dim_actions\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0maction_spec\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrank\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 539\u001b[0;31m return common.index_with_actions(\n\u001b[0m\u001b[1;32m 540\u001b[0m \u001b[0mq_values\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 541\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcast\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mactions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mint32\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tf_agents/utils/common.py\u001b[0m in \u001b[0;36mindex_with_actions\u001b[0;34m(q_values, actions, multi_dim_actions)\u001b[0m\n\u001b[1;32m 384\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 385\u001b[0m \u001b[0mouter_shape\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mactions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 386\u001b[0;31m batch_indices = tf.meshgrid(\n\u001b[0m\u001b[1;32m 387\u001b[0m *[tf.range(outer_shape[i]) for i in range(batch_dims)], indexing='ij')\n\u001b[1;32m 388\u001b[0m batch_indices = [tf.cast(tf.expand_dims(batch_index, -1), dtype=tf.int32)\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/util/dispatch.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 199\u001b[0m \u001b[0;34m\"\"\"Call target, and fall back on dispatchers if there is a TypeError.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 200\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 201\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 202\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mTypeError\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 203\u001b[0m \u001b[0;31m# Note: convert_to_eager_tensor currently raises a ValueError, not a\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/ops/array_ops.py\u001b[0m in \u001b[0;36mmeshgrid\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 3550\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3551\u001b[0m \u001b[0;31m# TODO(nolivia): improve performance with a broadcast\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3552\u001b[0;31m \u001b[0mmult_fact\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mones\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mshapes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput_dtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3553\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mmult_fact\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3554\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/util/dispatch.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 199\u001b[0m \u001b[0;34m\"\"\"Call target, and fall back on dispatchers if there is a TypeError.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 200\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 201\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 202\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mTypeError\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 203\u001b[0m \u001b[0;31m# Note: convert_to_eager_tensor currently raises a ValueError, not a\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/ops/array_ops.py\u001b[0m in \u001b[0;36mones\u001b[0;34m(shape, dtype, name)\u001b[0m\n\u001b[1;32m 3118\u001b[0m \u001b[0;31m# Create a constant if it won't be very big. Otherwise create a fill\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3119\u001b[0m \u001b[0;31m# op to prevent serialized GraphDefs from becoming too large.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3120\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_constant_if_small\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3121\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0moutput\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3122\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/ops/array_ops.py\u001b[0m in \u001b[0;36m_constant_if_small\u001b[0;34m(value, shape, dtype, name)\u001b[0m\n\u001b[1;32m 2802\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_constant_if_small\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2803\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2804\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprod\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;36m1000\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2805\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mconstant\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mshape\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2806\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m<__array_function__ internals>\u001b[0m in \u001b[0;36mprod\u001b[0;34m(*args, **kwargs)\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/numpy/core/fromnumeric.py\u001b[0m in \u001b[0;36mprod\u001b[0;34m(a, axis, dtype, out, keepdims, initial, where)\u001b[0m\n\u001b[1;32m 3049\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3050\u001b[0m \"\"\"\n\u001b[0;32m-> 3051\u001b[0;31m return _wrapreduction(a, np.multiply, 'prod', axis, dtype, out,\n\u001b[0m\u001b[1;32m 3052\u001b[0m keepdims=keepdims, initial=initial, where=where)\n\u001b[1;32m 3053\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/numpy/core/fromnumeric.py\u001b[0m in \u001b[0;36m_wrapreduction\u001b[0;34m(obj, ufunc, method, axis, dtype, out, **kwargs)\u001b[0m\n\u001b[1;32m 84\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mreduction\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mpasskwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 85\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 86\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mufunc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreduce\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mpasskwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 87\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 88\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/framework/ops.py\u001b[0m in \u001b[0;36m__array__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 850\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 851\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__array__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 852\u001b[0;31m raise NotImplementedError(\n\u001b[0m\u001b[1;32m 853\u001b[0m \u001b[0;34m\"Cannot convert a symbolic Tensor ({}) to a numpy array.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 854\u001b[0m \u001b[0;34m\" This error may indicate that you're trying to pass a Tensor to\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mNotImplementedError\u001b[0m: Cannot convert a symbolic Tensor (loss/meshgrid/Size:0) to a numpy array. This error may indicate that you're trying to pass a Tensor to a NumPy call, which is not supported"
]
}
],
"source": [
"\n",
"try:\n",
" %%time\n",
"except:\n",
" pass\n",
"\n",
"# (Optional) Optimize by wrapping some of the code in a graph using TF function.\n",
"agent.train = common.function(agent.train)\n",
"\n",
"# Reset the train step\n",
"agent.train_step_counter.assign(0)\n",
"\n",
"# Evaluate the agent's policy once before training.\n",
"avg_return = compute_avg_return(eval_env, agent.policy, num_eval_episodes)\n",
"returns = [avg_return]\n",
"\n",
"for _ in range(num_iterations):\n",
"\n",
" # Collect a few steps using collect_policy and save to the replay buffer.\n",
" for _ in range(collect_steps_per_iteration):\n",
" collect_step(train_env, agent.collect_policy, replay_buffer)\n",
"\n",
" # Sample a batch of data from the buffer and update the agent's network.\n",
" experience, unused_info = next(iterator)\n",
" train_loss = agent.train(experience).loss\n",
"\n",
" step = agent.train_step_counter.numpy()\n",
"\n",
" if step % log_interval == 0:\n",
" print('step = {0}: loss = {1}'.format(step, train_loss))\n",
"\n",
" if step % eval_interval == 0:\n",
" avg_return = compute_avg_return(eval_env, agent.policy, num_eval_episodes)\n",
" print('step = {0}: Average Return = {1}'.format(step, avg_return))\n",
" returns.append(avg_return)"
]
},
{
"source": [
"## Tutorial Trial"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"from __future__ import absolute_import, division, print_function\n",
"\n",
"import base64\n",
"import imageio\n",
"import IPython\n",
"import matplotlib\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import PIL.Image\n",
"import pyvirtualdisplay\n",
"\n",
"import tensorflow as tf\n",
"\n",
"from tf_agents.agents.dqn import dqn_agent\n",
"from tf_agents.environments import suite_gym\n",
"from tf_agents.environments import tf_py_environment\n",
"from tf_agents.eval import metric_utils\n",
"from tf_agents.metrics import tf_metrics\n",
"from tf_agents.networks import sequential\n",
"from tf_agents.policies import random_tf_policy\n",
"from tf_agents.replay_buffers import tf_uniform_replay_buffer\n",
"from tf_agents.trajectories import trajectory\n",
"from tf_agents.specs import tensor_spec\n",
"from tf_agents.utils import common"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"fc_layer_params = (100, 50)\n",
"action_tensor_spec = tensor_spec.from_spec(environment.action_spec())\n",
"num_actions = action_tensor_spec.maximum - action_tensor_spec.minimum + 1\n",
"\n",
"# Define a helper function to create Dense layers configured with the right\n",
"# activation and kernel initializer.\n",
"def dense_layer(num_units):\n",
" return tf.keras.layers.Dense(\n",
" num_units,\n",
" activation=tf.keras.activations.relu,\n",
" kernel_initializer=tf.keras.initializers.VarianceScaling(\n",
" scale=2.0, mode='fan_in', distribution='truncated_normal'))\n",
"\n",
"# QNetwork consists of a sequence of Dense layers followed by a dense layer\n",
"# with `num_actions` units to generate one q_value per available action as\n",
"# it's output.\n",
"dense_layers = [dense_layer(num_units) for num_units in fc_layer_params]\n",
"q_values_layer = tf.keras.layers.Dense(\n",
" num_actions,\n",
" activation=None,\n",
" kernel_initializer=tf.keras.initializers.RandomUniform(\n",
" minval=-0.03, maxval=0.03),\n",
" bias_initializer=tf.keras.initializers.Constant(-0.2))\n",
"q_net = sequential.Sequential(dense_layers + [q_values_layer])"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
"optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)\n",
"\n",
"train_step_counter = tf.Variable(0)\n",
"\n",
"agent = dqn_agent.DqnAgent(\n",
" train_env.time_step_spec(),\n",
" train_env.action_spec(),\n",
" q_network=q_net,\n",
" optimizer=optimizer,\n",
" td_errors_loss_fn=common.element_wise_squared_loss,\n",
" train_step_counter=train_step_counter)\n",
"\n",
"agent.initialize()"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"eval_policy = agent.policy\n",
"collect_policy = agent.collect_policy\n",
"random_policy = random_tf_policy.RandomTFPolicy(train_env.time_step_spec(),\n",
" train_env.action_spec())"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"#@test {\"skip\": true}\n",
"def compute_avg_return(environment, policy, num_episodes=10):\n",
"\n",
" total_return = 0.0\n",
" for _ in range(num_episodes):\n",
"\n",
" time_step = environment.reset()\n",
" episode_return = 0.0\n",
"\n",
" while not time_step.is_last():\n",
" action_step = policy.action(time_step)\n",
" time_step = environment.step(action_step.action)\n",
" episode_return += time_step.reward\n",
" total_return += episode_return\n",
"\n",
" avg_return = total_return / num_episodes\n",
" return avg_return.numpy()[0]"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"-0.5704178"
]
},
"metadata": {},
"execution_count": 49
}
],
"source": [
"compute_avg_return(eval_env, random_policy, num_eval_episodes)"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [],
"source": [
"replay_buffer = tf_uniform_replay_buffer.TFUniformReplayBuffer(\n",
" data_spec=agent.collect_data_spec,\n",
" batch_size=train_env.batch_size,\n",
" max_length=replay_buffer_max_length)"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [],
"source": [
"#@test {\"skip\": true}\n",
"def collect_step(environment, policy, buffer):\n",
" time_step = environment.current_time_step()\n",
" action_step = policy.action(time_step)\n",
" next_time_step = environment.step(action_step.action)\n",
" traj = trajectory.from_transition(time_step, action_step, next_time_step)\n",
"\n",
" # Add trajectory to the replay buffer\n",
" buffer.add_batch(traj)\n",
"\n",
"def collect_data(env, policy, buffer, steps):\n",
" for _ in range(steps):\n",
" collect_step(env, policy, buffer)\n",
"\n",
"collect_data(train_env, random_policy, replay_buffer, initial_collect_steps)\n",
"\n",
"# This loop is so common in RL, that we provide standard implementations. \n",
"# For more details see tutorial 4 or the drivers module.\n",
"# https://github.com/tensorflow/agents/blob/master/docs/tutorials/4_drivers_tutorial.ipynb \n",
"# https://www.tensorflow.org/agents/api_docs/python/tf_agents/drivers"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [],
"source": [
"# Dataset generates trajectories with shape [Bx2x...]\n",
"dataset = replay_buffer.as_dataset(\n",
" num_parallel_calls=3, \n",
" sample_batch_size=batch_size, \n",
" num_steps=2).prefetch(3)"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"<tensorflow.python.data.ops.iterator_ops.OwnedIterator object at 0x7fce401d58b0>\n"
]
}
],
"source": [
"iterator = iter(dataset)\n",
"print(iterator)"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"output_type": "error",
"ename": "NotImplementedError",
"evalue": "Cannot convert a symbolic Tensor (loss/meshgrid/Size:0) to a numpy array. This error may indicate that you're trying to pass a Tensor to a NumPy call, which is not supported",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-54-013f8468e6ef>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0;31m# Sample a batch of data from the buffer and update the agent's network.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[0mexperience\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0munused_info\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 24\u001b[0;31m \u001b[0mtrain_loss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0magent\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexperience\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloss\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 25\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[0mstep\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0magent\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain_step_counter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/eager/def_function.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 826\u001b[0m \u001b[0mtracing_count\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexperimental_get_tracing_count\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 827\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mtrace\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTrace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_name\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mtm\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 828\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 829\u001b[0m \u001b[0mcompiler\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"xla\"\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_experimental_compile\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;34m\"nonXla\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 830\u001b[0m \u001b[0mnew_tracing_count\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexperimental_get_tracing_count\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/eager/def_function.py\u001b[0m in \u001b[0;36m_call\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 869\u001b[0m \u001b[0;31m# This is the first call of __call__, so we have to initialize.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 870\u001b[0m \u001b[0minitializers\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 871\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_initialize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0madd_initializers_to\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minitializers\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 872\u001b[0m \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 873\u001b[0m \u001b[0;31m# At this point we know that the initialization is complete (or less\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/eager/def_function.py\u001b[0m in \u001b[0;36m_initialize\u001b[0;34m(self, args, kwds, add_initializers_to)\u001b[0m\n\u001b[1;32m 723\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_graph_deleter\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mFunctionDeleter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lifted_initializer_graph\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 724\u001b[0m self._concrete_stateful_fn = (\n\u001b[0;32m--> 725\u001b[0;31m self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access\n\u001b[0m\u001b[1;32m 726\u001b[0m *args, **kwds))\n\u001b[1;32m 727\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36m_get_concrete_function_internal_garbage_collected\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 2967\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2968\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lock\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2969\u001b[0;31m \u001b[0mgraph_function\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_define_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2970\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mgraph_function\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2971\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36m_maybe_define_function\u001b[0;34m(self, args, kwargs)\u001b[0m\n\u001b[1;32m 3359\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3360\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_function_cache\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmissed\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcall_context_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3361\u001b[0;31m \u001b[0mgraph_function\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_create_graph_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3362\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_function_cache\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprimary\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcache_key\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgraph_function\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3363\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36m_create_graph_function\u001b[0;34m(self, args, kwargs, override_flat_arg_shapes)\u001b[0m\n\u001b[1;32m 3194\u001b[0m \u001b[0marg_names\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbase_arg_names\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mmissing_arg_names\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3195\u001b[0m graph_function = ConcreteFunction(\n\u001b[0;32m-> 3196\u001b[0;31m func_graph_module.func_graph_from_py_func(\n\u001b[0m\u001b[1;32m 3197\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_name\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3198\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_python_function\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/framework/func_graph.py\u001b[0m in \u001b[0;36mfunc_graph_from_py_func\u001b[0;34m(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)\u001b[0m\n\u001b[1;32m 988\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moriginal_func\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf_decorator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munwrap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpython_func\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 989\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 990\u001b[0;31m \u001b[0mfunc_outputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpython_func\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mfunc_args\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfunc_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 991\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 992\u001b[0m \u001b[0;31m# invariant: `func_outputs` contains only Tensors, CompositeTensors,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/eager/def_function.py\u001b[0m in \u001b[0;36mwrapped_fn\u001b[0;34m(*args, **kwds)\u001b[0m\n\u001b[1;32m 632\u001b[0m \u001b[0mxla_context\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mExit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 633\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 634\u001b[0;31m \u001b[0mout\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mweak_wrapped_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__wrapped__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 635\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 636\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tf_agents/agents/tf_agent.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, experience, weights, **kwargs)\u001b[0m\n\u001b[1;32m 329\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 330\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_enable_functions\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 331\u001b[0;31m loss_info = self._train_fn(\n\u001b[0m\u001b[1;32m 332\u001b[0m experience=experience, weights=weights, **kwargs)\n\u001b[1;32m 333\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tf_agents/utils/common.py\u001b[0m in \u001b[0;36mwith_check_resource_vars\u001b[0;34m(*fn_args, **fn_kwargs)\u001b[0m\n\u001b[1;32m 184\u001b[0m \u001b[0;31m# We're either in eager mode or in tf.function mode (no in-between); so\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 185\u001b[0m \u001b[0;31m# autodep-like behavior is already expected of fn.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 186\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mfn_args\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfn_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 187\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mresource_variables_enabled\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 188\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mMISSING_RESOURCE_VARIABLES_ERROR\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tf_agents/agents/dqn/dqn_agent.py\u001b[0m in \u001b[0;36m_train\u001b[0;34m(self, experience, weights)\u001b[0m\n\u001b[1;32m 386\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_train\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexperience\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweights\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 387\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mGradientTape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mtape\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 388\u001b[0;31m loss_info = self._loss(\n\u001b[0m\u001b[1;32m 389\u001b[0m \u001b[0mexperience\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 390\u001b[0m \u001b[0mtd_errors_loss_fn\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_td_errors_loss_fn\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tf_agents/agents/dqn/dqn_agent.py\u001b[0m in \u001b[0;36m_loss\u001b[0;34m(self, experience, td_errors_loss_fn, gamma, reward_scale_factor, weights, training)\u001b[0m\n\u001b[1;32m 455\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 456\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname_scope\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'loss'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 457\u001b[0;31m \u001b[0mq_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_compute_q_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtime_steps\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mactions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtraining\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtraining\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 458\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 459\u001b[0m next_q_values = self._compute_next_q_values(\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tf_agents/agents/dqn/dqn_agent.py\u001b[0m in \u001b[0;36m_compute_q_values\u001b[0;34m(self, time_steps, actions, training)\u001b[0m\n\u001b[1;32m 537\u001b[0m \u001b[0maction_spec\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcast\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor_spec\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mBoundedTensorSpec\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_action_spec\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 538\u001b[0m \u001b[0mmulti_dim_actions\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0maction_spec\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrank\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 539\u001b[0;31m return common.index_with_actions(\n\u001b[0m\u001b[1;32m 540\u001b[0m \u001b[0mq_values\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 541\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcast\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mactions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mint32\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tf_agents/utils/common.py\u001b[0m in \u001b[0;36mindex_with_actions\u001b[0;34m(q_values, actions, multi_dim_actions)\u001b[0m\n\u001b[1;32m 384\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 385\u001b[0m \u001b[0mouter_shape\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mactions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 386\u001b[0;31m batch_indices = tf.meshgrid(\n\u001b[0m\u001b[1;32m 387\u001b[0m *[tf.range(outer_shape[i]) for i in range(batch_dims)], indexing='ij')\n\u001b[1;32m 388\u001b[0m batch_indices = [tf.cast(tf.expand_dims(batch_index, -1), dtype=tf.int32)\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/util/dispatch.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 199\u001b[0m \u001b[0;34m\"\"\"Call target, and fall back on dispatchers if there is a TypeError.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 200\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 201\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 202\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mTypeError\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 203\u001b[0m \u001b[0;31m# Note: convert_to_eager_tensor currently raises a ValueError, not a\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/ops/array_ops.py\u001b[0m in \u001b[0;36mmeshgrid\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 3550\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3551\u001b[0m \u001b[0;31m# TODO(nolivia): improve performance with a broadcast\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3552\u001b[0;31m \u001b[0mmult_fact\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mones\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mshapes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput_dtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3553\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mmult_fact\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3554\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/util/dispatch.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 199\u001b[0m \u001b[0;34m\"\"\"Call target, and fall back on dispatchers if there is a TypeError.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 200\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 201\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 202\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mTypeError\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 203\u001b[0m \u001b[0;31m# Note: convert_to_eager_tensor currently raises a ValueError, not a\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/ops/array_ops.py\u001b[0m in \u001b[0;36mones\u001b[0;34m(shape, dtype, name)\u001b[0m\n\u001b[1;32m 3118\u001b[0m \u001b[0;31m# Create a constant if it won't be very big. Otherwise create a fill\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3119\u001b[0m \u001b[0;31m# op to prevent serialized GraphDefs from becoming too large.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3120\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_constant_if_small\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3121\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0moutput\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3122\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/ops/array_ops.py\u001b[0m in \u001b[0;36m_constant_if_small\u001b[0;34m(value, shape, dtype, name)\u001b[0m\n\u001b[1;32m 2802\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_constant_if_small\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2803\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2804\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprod\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;36m1000\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2805\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mconstant\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mshape\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2806\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m<__array_function__ internals>\u001b[0m in \u001b[0;36mprod\u001b[0;34m(*args, **kwargs)\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/numpy/core/fromnumeric.py\u001b[0m in \u001b[0;36mprod\u001b[0;34m(a, axis, dtype, out, keepdims, initial, where)\u001b[0m\n\u001b[1;32m 3049\u001b[0m \u001b[0;34m...\u001b[0m \u001b[0;31m# total product 1*2*3 = 6\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3050\u001b[0m \u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m6\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3051\u001b[0;31m \u001b[0;34m>>\u001b[0m\u001b[0;34m>\u001b[0m \u001b[0ma\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m6\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3052\u001b[0m \u001b[0;34m>>\u001b[0m\u001b[0;34m>\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcumprod\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfloat\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# specify type of output\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3053\u001b[0m \u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m \u001b[0;36m1.\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2.\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m6.\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m24.\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m120.\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m720.\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/numpy/core/fromnumeric.py\u001b[0m in \u001b[0;36m_wrapreduction\u001b[0;34m(obj, ufunc, method, axis, dtype, out, **kwargs)\u001b[0m\n\u001b[1;32m 84\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 85\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mreduction\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mpasskwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 86\u001b[0;31m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 87\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mufunc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreduce\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mpasskwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 88\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/gym_2/lib/python3.9/site-packages/tensorflow/python/framework/ops.py\u001b[0m in \u001b[0;36m__array__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 850\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 851\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__array__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 852\u001b[0;31m raise NotImplementedError(\n\u001b[0m\u001b[1;32m 853\u001b[0m \u001b[0;34m\"Cannot convert a symbolic Tensor ({}) to a numpy array.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 854\u001b[0m \u001b[0;34m\" This error may indicate that you're trying to pass a Tensor to\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mNotImplementedError\u001b[0m: Cannot convert a symbolic Tensor (loss/meshgrid/Size:0) to a numpy array. This error may indicate that you're trying to pass a Tensor to a NumPy call, which is not supported"
]
}
],
"source": [
"#@test {\"skip\": true}\n",
"try:\n",
" %%time\n",
"except:\n",
" pass\n",
"\n",
"# (Optional) Optimize by wrapping some of the code in a graph using TF function.\n",
"agent.train = common.function(agent.train)\n",
"\n",
"# Reset the train step\n",
"agent.train_step_counter.assign(0)\n",
"\n",
"# Evaluate the agent's policy once before training.\n",
"avg_return = compute_avg_return(eval_env, agent.policy, num_eval_episodes)\n",
"returns = [avg_return]\n",
"\n",
"for _ in range(num_iterations):\n",
"\n",
" # Collect a few steps using collect_policy and save to the replay buffer.\n",
" collect_data(train_env, agent.collect_policy, replay_buffer, collect_steps_per_iteration)\n",
"\n",
" # Sample a batch of data from the buffer and update the agent's network.\n",
" experience, unused_info = next(iterator)\n",
" train_loss = agent.train(experience).loss\n",
"\n",
" step = agent.train_step_counter.numpy()\n",
"\n",
" if step % log_interval == 0:\n",
" print('step = {0}: loss = {1}'.format(step, train_loss))\n",
"\n",
" if step % eval_interval == 0:\n",
" avg_return = compute_avg_return(eval_env, agent.policy, num_eval_episodes)\n",
" print('step = {0}: Average Return = {1}'.format(step, avg_return))\n",
" returns.append(avg_return)"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Package Version\n--------------------------------- -------------------\nabsl-py 0.12.0\naiodns 2.0.0\naiofiles 0.7.0\naiohttp 3.7.4.post0\nanyio 2.2.0\nAPScheduler 3.6.3\nargon2-cffi 20.1.0\narrow 1.1.1\nasgiref 3.4.1\nastor 0.8.1\nastroid 2.6.0\nastunparse 1.6.3\nasync-generator 1.10\nasync-timeout 3.0.1\natari-py 0.2.9\nattrs 21.2.0\nBabel 2.9.1\nbackcall 0.2.0\nbleach 3.3.0\nblinker 1.4\nblosc 1.10.4\nbrotlipy 0.7.0\ncachetools 4.2.2\nccxt 1.52.4\ncertifi 2021.5.30\ncffi 1.14.5\nchardet 3.0.4\nclick 8.0.1\ncloudpickle 1.6.0\ncolorama 0.4.4\ncoverage 5.5\ncryptography 3.4.7\ncycler 0.10.0\nCython 0.29.23\ndebugpy 1.3.0\ndecorator 5.0.9\ndefusedxml 0.7.1\ndm-tree 0.1.6\nEasyProcess 0.3\nentrypoints 0.3\nfastapi 0.65.2\nflatbuffers 20210226132247\ngast 0.4.0\ngin-config 0.4.0\ngoogle-auth 1.30.1\ngoogle-auth-oauthlib 0.4.4\ngoogle-pasta 0.2.0\ngreenlet 1.1.0\ngrpcio 1.36.1\ngym 0.18.3\ngym-anytrading 1.2.0\nh11 0.12.0\nh5py 2.10.0\nidna 2.10\nimageio 2.9.0\nimportlib-metadata 3.10.0\nipykernel 5.3.4\nipython 7.22.0\nipython-genutils 0.2.0\nipywidgets 7.6.3\nisort 5.9.1\njedi 0.17.2\nJinja2 3.0.1\njoblib 1.0.1\njson5 0.9.5\njsonschema 3.2.0\njupyter 1.0.0\njupyter-client 6.1.12\njupyter-console 6.4.0\njupyter-core 4.7.1\njupyter-server 1.4.1\njupyterlab 3.0.16\njupyterlab-pygments 0.1.2\njupyterlab-server 2.4.0\njupyterlab-widgets 1.0.0\nKeras-Preprocessing 1.1.2\nkiwisolver 1.3.1\nlazy-object-proxy 1.6.0\nlckr-jupyterlab-variableinspector 3.0.9\nMarkdown 3.3.4\nMarkupSafe 2.0.1\nmatplotlib 3.3.4\nmccabe 0.6.1\nmistune 0.8.4\nmkl-fft 1.3.0\nmkl-random 1.2.1\nmkl-service 2.3.0\nmplfinance 0.12.7a17\nmultidict 5.1.0\nnbclassic 0.2.6\nnbclient 0.5.3\nnbconvert 6.0.7\nnbformat 5.1.3\nnest-asyncio 1.5.1\nnodejs 0.1.1\nnotebook 6.4.0\nnumexpr 2.7.3\nnumpy 1.19.5\noauthlib 3.1.0\nolefile 0.46\nopencv-python 4.5.2.52\nopt-einsum 3.3.0\noptional-django 0.1.0\npackaging 20.9\npandas 1.2.5\npandocfilters 1.4.3\nparso 0.7.0\npexpect 4.8.0\npickleshare 0.7.5\nPillow 8.2.0\npip 21.1.1\nplotly 5.0.0\nprometheus-client 0.10.1\nprompt-toolkit 3.0.19\nprotobuf 3.14.0\nptyprocess 0.7.0\npy-find-1st 1.1.5\npyasn1 0.4.8\npyasn1-modules 0.2.8\npycares 4.0.0\npycoingecko 2.2.0\npycparser 2.20\npydantic 1.8.2\npyglet 1.5.15\nPygments 2.9.0\nPyJWT 2.1.0\npylint 2.8.3\npyOpenSSL 20.0.1\npyparsing 2.4.7\npyrsistent 0.17.3\nPySocks 1.7.1\npython-dateutil 2.8.1\npython-rapidjson 1.4\npython-telegram-bot 13.6\npytz 2021.1\nPyVirtualDisplay 2.2\npyzmq 20.0.0\nqtconsole 5.0.3\nQtPy 1.9.0\nquestionary 1.9.0\nrequests 2.25.1\nrequests-oauthlib 1.3.0\nrsa 4.7.2\nscipy 1.6.2\nsdnotify 0.3.2\nSend2Trash 1.5.0\nsetuptools 52.0.0.post20210125\nsip 4.19.13\nsix 1.15.0\nsniffio 1.2.0\nSQLAlchemy 1.4.19\nstable-baselines 2.10.2\nstable-baselines3 1.0\nstarlette 0.14.2\nTA-Lib 0.4.20\ntables 3.6.1\ntabulate 0.8.9\ntechnical 1.3.0\ntenacity 7.0.0\ntensorboard 2.4.0\ntensorboard-plugin-wit 1.6.0\ntensorflow 2.4.1\ntensorflow-estimator 2.5.0\ntensorflow-probability 0.12.2\ntermcolor 1.1.0\nterminado 0.9.4\ntestpath 0.4.4\ntf-agents 0.8.0\ntoml 0.10.2\ntorch 1.8.1\ntornado 6.1\ntraitlets 5.0.5\ntyping-extensions 3.7.4.3\ntzlocal 2.1\nurllib3 1.26.6\nuvicorn 0.14.0\nwcwidth 0.2.5\nwebencodings 0.5.1\nWerkzeug 1.0.1\nwheel 0.36.2\nwidgetsnbextension 3.5.1\nwrapt 1.12.1\nyarl 1.6.3\nzipp 3.4.1\n"
]
}
],
"source": [
"!pip list numpy"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment