Install visdom on your local system and remote server.
pip3 install visdom
On remote server, do:
# Copyright 2021 DeepMind Technologies Limited. All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, |
%% This part goes in preamble | |
\newcommand{\dummyfig}[1]{ | |
\centering | |
\fbox{ | |
\begin{minipage}[c][0.33\textheight][c]{0.5\textwidth} | |
\centering{#1} | |
\end{minipage} | |
} | |
} |
Following are the higher quality versions of TLP and TinyTLP datasets. The resolution is still the same but images are much sharper and have higher quality.
https://drive.google.com/open?id=1mv0ULctCzGn4gzum_Sb6kwXwB2QjZwu1
# simple neural network implementation of qlearning | |
import gym | |
from gym import wrappers | |
import numpy as np | |
import tensorflow as tf | |
# build environment | |
env = gym.make("FrozenLake-v0") | |
env = wrappers.Monitor(env, '/tmp/frozenlake-qlearning', force=True) | |
n_obv = env.observation_space.n |
# monte carlo policy gradient algorithm | |
# use neural network to decide the policy | |
# from observations and rewards, update the parameters of the neural networks to optimize the policy | |
import numpy as np | |
import tensorflow as tf | |
import gym | |
from gym import wrappers | |
# initialize constants |
# random guessing algorithm | |
# generate 10000 random configurations of the model's parameters and pick the one that achieves the best cumulative reward. | |
# optimize it for weighted sum | |
import gym | |
from gym import wrappers | |
import numpy as np | |
env = gym.make('CartPole-v0') | |
env = wrappers.Monitor(env, '/tmp/cartpole-random-guessing', force=True) |
# hill climbing algorithm | |
# generate a random configuration of the parameters, add small amount of noise to the parameters and evaluate the new parameter configuration | |
# if new configuration is better than old one, discard the old one and accept the new one | |
# optimize it for weighted sum | |
# returns the net episode reward | |
def get_episode_reward(env, observation, params): | |
t = 0 | |
net_reward = 0 | |
while (t < 1000): |
# monte carlo policy gradient algorithm | |
# use neural network to decide the policy | |
# from observations and rewards, update the parameters of the neural networks to optimize the policy | |
import numpy as np | |
import tensorflow as tf | |
import gym | |
from gym import wrappers | |
# initialize constants |