Following are the higher quality versions of TLP and TinyTLP datasets. The resolution is still the same but images are much sharper and have higher quality.
https://drive.google.com/open?id=1mv0ULctCzGn4gzum_Sb6kwXwB2QjZwu1
| # monte carlo policy gradient algorithm | |
| # use neural network to decide the policy | |
| # from observations and rewards, update the parameters of the neural networks to optimize the policy | |
| import numpy as np | |
| import tensorflow as tf | |
| import gym | |
| from gym import wrappers | |
| # initialize constants |
| # hill climbing algorithm | |
| # generate a random configuration of the parameters, add small amount of noise to the parameters and evaluate the new parameter configuration | |
| # if new configuration is better than old one, discard the old one and accept the new one | |
| # optimize it for weighted sum | |
| # returns the net episode reward | |
| def get_episode_reward(env, observation, params): | |
| t = 0 | |
| net_reward = 0 | |
| while (t < 1000): |
| # random guessing algorithm | |
| # generate 10000 random configurations of the model's parameters and pick the one that achieves the best cumulative reward. | |
| # optimize it for weighted sum | |
| import gym | |
| from gym import wrappers | |
| import numpy as np | |
| env = gym.make('CartPole-v0') | |
| env = wrappers.Monitor(env, '/tmp/cartpole-random-guessing', force=True) |
| # monte carlo policy gradient algorithm | |
| # use neural network to decide the policy | |
| # from observations and rewards, update the parameters of the neural networks to optimize the policy | |
| import numpy as np | |
| import tensorflow as tf | |
| import gym | |
| from gym import wrappers | |
| # initialize constants |
| # simple neural network implementation of qlearning | |
| import gym | |
| from gym import wrappers | |
| import numpy as np | |
| import tensorflow as tf | |
| # build environment | |
| env = gym.make("FrozenLake-v0") | |
| env = wrappers.Monitor(env, '/tmp/frozenlake-qlearning', force=True) | |
| n_obv = env.observation_space.n |
Following are the higher quality versions of TLP and TinyTLP datasets. The resolution is still the same but images are much sharper and have higher quality.
https://drive.google.com/open?id=1mv0ULctCzGn4gzum_Sb6kwXwB2QjZwu1
| %% This part goes in preamble | |
| \newcommand{\dummyfig}[1]{ | |
| \centering | |
| \fbox{ | |
| \begin{minipage}[c][0.33\textheight][c]{0.5\textwidth} | |
| \centering{#1} | |
| \end{minipage} | |
| } | |
| } |
| # Copyright 2021 DeepMind Technologies Limited. All Rights Reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, |