Last active
February 19, 2017 00:53
-
-
Save pilipolio/f551c334c99fb921e16855909269063b to your computer and use it in GitHub Desktop.
FrozenLake with Q tabular learning
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Frozen Lake\n", | |
"\n", | |
" * https://gym.openai.com/envs/FrozenLake-v0\n", | |
" * Inspired by https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-0-q-learning-with-tables-and-neural-networks-d195264329d0#.91x58km60\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import gym\n", | |
"import numpy as np" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 232, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"[2017-02-19 00:08:00,024] Making new env: FrozenLake-v0\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\u001b[41mS\u001b[0mFFF\n", | |
"FHFH\n", | |
"FFFH\n", | |
"HFFG\n", | |
"\n", | |
"0 0.0 False {'prob': 0.3333333333333333}\n", | |
"\u001b[41mS\u001b[0mFFF\n", | |
"FHFH\n", | |
"FFFH\n", | |
"HFFG\n", | |
" (Left)\n", | |
"4 0.0 False {'prob': 0.3333333333333333}\n", | |
"SFFF\n", | |
"\u001b[41mF\u001b[0mHFH\n", | |
"FFFH\n", | |
"HFFG\n", | |
" (Down)\n", | |
"0 0.0 False {'prob': 0.3333333333333333}\n", | |
"\u001b[41mS\u001b[0mFFF\n", | |
"FHFH\n", | |
"FFFH\n", | |
"HFFG\n", | |
" (Right)\n", | |
"0 0.0 False {'prob': 0.3333333333333333}\n", | |
"\u001b[41mS\u001b[0mFFF\n", | |
"FHFH\n", | |
"FFFH\n", | |
"HFFG\n", | |
" (Up)\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"<ipykernel.iostream.OutStream at 0x10c5dc2e8>" | |
] | |
}, | |
"execution_count": 232, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# slipperly lake, so actions not deterministic!\n", | |
"env = gym.make('FrozenLake-v0')\n", | |
"\n", | |
"env.render()\n", | |
"\n", | |
"next_s, r, is_end, inf = env.step(action=0)\n", | |
"print(next_s, r, is_end, inf)\n", | |
"env.render()\n", | |
"\n", | |
"next_s, r, is_end, inf = env.step(action=1)\n", | |
"print(next_s, r, is_end, inf)\n", | |
"env.render()\n", | |
"\n", | |
"next_s, r, is_end, inf = env.step(action=2)\n", | |
"print(next_s, r, is_end, inf)\n", | |
"env.render()\n", | |
"\n", | |
"next_s, r, is_end, inf = env.step(action=3)\n", | |
"print(next_s, r, is_end, inf)\n", | |
"env.render()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Tabular Q learning\n", | |
"\n", | |
"See https://webdocs.cs.ualberta.ca/~sutton/book/ebook/node65.html\n", | |
"\n", | |
"![alt text](https://webdocs.cs.ualberta.ca/~sutton/book/ebook/numeqtmp31.png \"Q learning update rule\")\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 234, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 286, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import math\n", | |
"import random\n", | |
"\n", | |
"def one_step_update(Q, s, a, r, next_s, alpha, gamma):\n", | |
" Q[s, a] += alpha * (r + gamma * Q[next_s, :].max() - Q[s, a])\n", | |
" return Q\n", | |
"\n", | |
"def eps_greedy_action(Q, s, epsilon):\n", | |
" return random.choice(np.arange(Q.shape[1])) if (random.uniform(0, 1) <= epsilon) else Q[s, :].argmax()\n", | |
"\n", | |
"\n", | |
"class QLearning:\n", | |
" def __init__(self, Q, epsilon, alpha=0.1, gamma=.99):\n", | |
" self.Q = Q\n", | |
" self.epsilon = epsilon\n", | |
" self.alpha = alpha\n", | |
" self.gamma = gamma\n", | |
" self.n_episodes = 0\n", | |
" \n", | |
" def take_step(self, env, s):\n", | |
" # decayed and first\n", | |
" epsilon = (self.epsilon / (1 + self.n_episodes)) if (self.n_episodes <= 5000) else 0\n", | |
" a = eps_greedy_action(self.Q, s, epsilon)\n", | |
" next_s, r, is_end, _ = env.step(a)\n", | |
" self.Q = one_step_update(self.Q, s, a, r, next_s, self.alpha, self.gamma)\n", | |
" \n", | |
" if is_end:\n", | |
" self.n_episodes += 1\n", | |
"\n", | |
" return next_s, r, is_end\n", | |
" \n", | |
" @classmethod\n", | |
" def create(cls, epsilon=10, alpha=0.1, gamma=.99):\n", | |
" Q = np.zeros((env.observation_space.n, env.action_space.n))\n", | |
" return cls(Q, epsilon, alpha, gamma)\n", | |
"\n", | |
"def play_new_episode(env, act_func):\n", | |
" s = env.reset()\n", | |
" is_end = False\n", | |
" i = 0\n", | |
" while (not is_end) and i < 100:\n", | |
" i += 1\n", | |
" s, r, is_end = act_func(env, s)\n", | |
" yield r" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 253, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\u001b[41mS\u001b[0mFFF\n", | |
"FHFH\n", | |
"FFFH\n", | |
"HFFG\n", | |
" (Up)\n", | |
"\n", | |
"S\u001b[41mF\u001b[0mFF\n", | |
"FHFH\n", | |
"FFFH\n", | |
"HFFG\n", | |
" (Down)\n", | |
"\n", | |
"SFFF\n", | |
"F\u001b[41mH\u001b[0mFH\n", | |
"FFFH\n", | |
"HFFG\n", | |
" (Down)\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"def capture_new_episode(env, act_func):\n", | |
" rewards, debugs = zip(*[(r, env.render(mode='ansi')) for r in play_new_episode(env, act_func)])\n", | |
" return sum(rewards), '\\n'.join(d.getvalue() for d in debugs)\n", | |
"\n", | |
"total_reward, debug = capture_new_episode(env, QLearning.create().take_step)\n", | |
"\n", | |
"print(debug)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 313, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"0.613\n" | |
] | |
}, | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAO4AAADuCAYAAAA+7jsiAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAADsBJREFUeJzt3VlsXNdhxvH/mZUc7qRGMheJkSzJ2ix5kdx4CWIJjg0v\nteMEDlKkUFu0SFo0KPrQGkWLPrR9aVM0LwVaIEWXlwJpEyAoUCRpCiOFE8V2JTeWYymRyNAiTWpI\nabhzOEPOcvpAaSRajiiKw3vuIb/fk4YzNj5y5s97h8PFWGsREb9EXA8QkdVTuCIeUrgiHlK4Ih5S\nuCIeUrgiHlK4Ih5SuCIeUrgiHoqt5sbR5gYbT7eu1xZnbHHjfv4yFdcL1ocpu16wPorTE5Tmc2al\n260q3Hi6ld6vfOnuV4XUQiblesK6ic9uzE9KsbkVH9teuvRPX72j223Me1Vkg1O4Ih5SuCIeUrgi\nHlK4Ih5SuCIeUrgiHlK4Ih5SuCIeUrgiHlK4Ih5SuCIeUrgB0++x9ktY7y+FGyBbqZB/9z3XM2QV\nCqPDFKcnXM+4hcIN0MKlIebePO16hqzCXN855vrOuZ5xC4UboPx75ylc6KOyuOh6ityhpXDPu55x\nC4UboPx757HFEoUL/a6nyB0ozkyxMDbC/FA/5YWC6znLKNyAFLMTFDNjwFLAEn5z/Uv3ky2XmX//\nouM1yyncgOTP3Yg1/95PsZUN+sugNpDr4S79O1zPcxVuQG4+ypZnZlgcHnG4RlZSKS4yf+nGUXau\nP1yfbBVuACr5AlQsyT33AtD05BMs9A04XiW3Mz/YT/OBh4g2NBFv6yDVu5tC5gPXs6oUbgBMMsHW\nL3+RROc9ALQ+9wxNxz/heJXcTsOufXS+8HkiiSTRVCPdL5+krnO761lVzsOtFEvk+4Zdz1hXJhLB\nGHPL2yS8Pur+CdN9tqrfq1wrpek5cv/XR+7MBXJnf077i49Rv6fHxRQRLwUSrrWWxaErzJ25QO7t\nixT6huGm7wGdffOn5N658dpmtLWR7ld/JYhpIl4K5ohbsZRn56nM5SnPzS+LFqAyX8AuFquXTSwa\nyKygtb38Aq0vPYeJx11PqanC8BAj//B3bHn+JVoeedT1nJra+Vt/ACZ8fzUhkHBNNELq0E5Sh3aS\n/rVnWLycXTr6nrlI/mdDtD77CO0vPRHEFKdMLEb4HgI1YC22WIQQvVxSK5F4wvWEj+TkOW6iawvt\nL26h/cXHKc/lKQxcdjFDxFvOv0wWbayn4fC9rmfIXfrwz6taa0P7M6wbiZMjrmwc49/9z+q/Cx8M\nkR98n22f+4LDRZuD8yOu+C2+Jc3UD/4HgNkfnyFSV3/La9ZSewpX1qThvgPLL+878AtuKbWkcGVN\nYs3NJHuWvhXQxBPU79rteNHmoHBlzRr2HQQgtXsPkQ32GnVYKVxZs+unxw37DzpesnkoXFmzRGcX\nsZZWUvftdz1l01C4smbGGNqfeoZYU7PrKZuGwpWaaHrwqOsJm4rClZoI08+qbgb6aIt4SOGKeEjh\ninhI4Yp4SOGKeEjhinhI4Yp4SOGKeEjhinhI4Yp4SOGKeEjhinhoVb/l0RhLIl5ary3O7Pi9t1xP\nWDfZL26svyxw3Zav/cj1hHUxYnN3dDsdcUU8pHBFPKRwRTykcEU8pHBFPKRwRTykcEU8pHBFPKRw\nRTykcEU8pHBFPKRwRTykcEU8pHBFPKRwRTykcEU8pHBFPKRwRTykcEU8pHBFPKRwRTykcEU8pHBF\nPKRwRTwUinCtta4nrJuN+r5t5PfLh/dtVX/JoJbK+UVy7wwwc/oikXiMrt95ztWUmrLWMsMkWTJM\ncIUjPEaCpOtZNVHMTTM9eJ6ZwfO03nuE9r1HXU+qibItM8kVrpKhRJFDPOJ60ooCDXfx6jSzp/uY\nPX2R3E8uYYtlANqfPcr4t89Ubxepi9N24kiQ09akbEuMc4UsGbJkWKQAQDNtjDEMN30C30oXSVPv\naOnqWGvJZ4erseazw0tXRCI0du8he+5U9bbJtm00de12tHT1FmyeLKNc5TITXKHC0mOxk16GGaje\nZwZDj9nlcOlHCyRcW64w8d23ufqNH1KanLvl+onvnFl2OZ5u8SbcaTvORd5lmvFbrpthkhkml72t\nkWaShD/cUmGe0dPfYfziaWypuPzKSoXLb/zHsjd17Pu4F+Faa8kwyADnKTB/y/UZBskwWL0cIUoP\nmzRcE43Q8fwx2p89Sr7/MrOnLzJ7uo/C+2MAbP3V4zQe3nnj9vFQPPW+Iy2mg2Mcp2Dz14641z+D\nV2ilgz0cXnb7BpodLV2dWF2Knk98lq5HX2R2pI+ZwfPMDJ2nmJuGSIRdz/wm0eSNT0Cx+kaHa++c\nMYYuPkan7WWOaa5eu8+uf4LtZS9b6b75v3AzdAWBniqbiCG1t5vU3m62feH40qnzmT7K0/Ok7ute\n+X8QYnWmnh520cMuyrbEBFfIMko9DSRMnet5dy0Si9PSe4CW3gPXTp1HmBk8R6W0SPOOfa7n3TVj\nDE200kQru9jPgi2QJcM8szTTjjHhDPY6Z1+cAkikW+h4dmN8geNmURMjTRdpulxPqSljDKl0D6l0\nj+spNZc0dXSzc+UbhoQ/56QiUqVwRTykcEU8pHBFPKRwRTykcEU8pHBFPKRwRTykcEU8pHBFPKRw\nRTykcEU8pHBFPKRwRTykcEU8pHBFPKRwRTykcEU8pHBFPKRwRTykcEU8pHBFPKRwRTy0qt+rHItW\n2NKYW68tzrSdanc9Yd00lPtdT1gXua+5XuCWjrgiHlK4Ih5SuCIeUrgiHlK4Ih5SuCIeUrgiHlK4\nIh5SuCIeUrgiHlK4Ih5SuCIeUrgiHlK4Ih4KZbjlxZLrCSKhFspwr37/AvnRadczREIrlOFm3xhg\n/I0B1zNEQit04VaKZSbPXFK4IrcRunCnzg5TzheZOvsBpdyC6zkioRS6cMff/DkAtlRh8u1Bx2tE\nwilU4Vprl50iZ3W6LPKRQhXu/OA4hdGZ6uWJt97HlisOF4mEU6jCLYzNcOgvXiLakKTt4V56T36c\nfEYvC4l82Kp+r/J66/ilXQAYA5FElJ5PP+h4kUg4heqIK+FXnFtg5LV+xt4acj2lpqy1TNksQ7YP\na63rOSsK1RFXwik3Ms3oqUuMnhpk/GwGgCf/+RWKucXqbSKxCNGkXw+nki0yzhhZMmTJUGSRXRyg\nTAluajdm4u5G/gKh/Eg37+8k1dvhekbNzQ1Pk8/O03FoG5FYuE92FmcK9H/9LJnX32ducPKW679/\n8t+WXe795f088OqTAa27e9ZaRhkiwyCTXMWy/Og6wHkGOF+9HCHKCV4OeuaKQhnu4b/8jOsJ66Lv\n39+j/5vn+PR//zqRWML1nNtKNNex+/NHaPpYG2OnBhn73yFKc0tHWBON0Pvifowx1du333+Pq6mr\nYoxhm91OHSkaaSFLhnnmqtd3sI16GquXIyF9NhnKcCUcEs11bH96L9uf3kulVGb83VHGTl1i9EeD\npB/uoeuTu1xPvCsRE6GNNG2k2csRcna2erocJc4+E/4viipcuSORWJT0Q92kH+rm4Jcfqx59N4IG\n00QDTfSyl6JdxFq77GwijMJ5HrDBlPJFSvPFZW8rTOQdrVk7YwzxpqTrGesibhKhjxYUbiAisQjf\nO/lNxs6MAPDDP/wvhr7X53iV+EzhBiASj9J23xZmL00BkH0nQ+fjvY5Xic8UbkA6n7gRatOOFpq2\ntzhcI75TuAHpfHQHRJaeO90cscjdULgBSbbW0XFoKwBdOk2WNVK4Aep6rJd4U5KO+7e5niKe0+u4\nAep8YgfTAxOh/3ZHCT89ggLUvLON3a8ccj1DNgCFGyBjDB0Ht7qeIRuAwhXxkMIV8ZDCFfGQwhXx\nkMIV8ZDCFfGQwhXxkMIV8ZDCFfGQwhXxkMIV8ZDCFfGQwhXx0Kp+HtdcXCT21Mb6Y08Ar1zcuH9A\n+7ONMyvfyEPP8IDrCU7piCviIYUr4iGFK+IhhSviIYUr4iGFK+IhhSviIYUr4iGFK+IhhSviIYUr\n4iGFK+IhhSviIYUr4iGFK+IhhSviIYUr4iGFK+IhhSviIYUr4iGFK+IhhSviIYUr4iGFK+Ih5+Fa\na8nZWdcznMlcKlApW9czhOuPxRmsDf/9saq/ZFArZVtmkitcJUOWDPewnT0cdjHFubM/mOEbfzvK\ng082c/R4C4efaCbVFHU9a9Oo2DKTZMmS4SqXaSPNQXPM9awVBRbugs1f++BkmOAKFcrV66YZ5x17\nqno5QZID5mhQ0wL1r389wnB/oXq5uFBhdrLE69+a4PVvTRCNGw4ca+ThEy0cPdHC1u1Jh2s3pkW7\nQPbaQWOcMcqUqtflmFn2WIwQ4bB51MXM2wok3IqtMMU4U4wzzfiyaAFmmcZgqpeT1Acxy4nBn+Xp\neydXvfzhs7Jy0dJ/NkdDS5SG5iitW+Mkks6f0dzWj3+ywGd+I8OfvdrOyc81u55zW9Zapq89FqcY\nXxYtQI5Z5pmrXo4QzrOfQMKNmAjb6GEbPdUP3PXT5Bwz7OYQ283uIKY498f/uPz9PPPaFF/57QG2\n9iR4+EQLD59o4cCxRmKJcMd6s8WiZWikRG4+/M8NjTGk6SJNF9ZaZpisHn1nmWIHe7jXHHQ9c0WB\nP8c1xtDKFlrZwh7uZ97OMctU0DNCIxI1fPXb++neXYcxZuX/QGrGGEML7bTQzr0cpGDnmWIca23o\n7wsnX5y6Wco0kqLR9QxnHnqyxfUEuabOpLiHlOsZd8Sf8zEJpT//mwlee30egHMXFvndP7rixcsp\nvlO4sibpjih/+lcTAPz9v0yzWCT0p5kbgcKVNXn+qdRtL8v6ULiyJjt64hw5mAAgmTR86pMKNwgK\nV9bshU81AHD88XoaUnpIBUEfZVmz56+F+8LTDY6XbB4KV9bs2ANJtqWjvKDnt4FRuLJmkYjhT36/\nje3dcddTNg2FKzXxpZP6RpIgKVypiVhMr90GSeGKeEjhinhI4Yp4SOGKeEjhinhI4Yp4SOGKeEjh\ninhI4Yp4SOGKeEjhinhI4Yp4SOGKeMis5ldpGmOuAoPrN0dk0+u11qZXutGqwhWRcNCpsoiHFK6I\nhxSuiIcUroiHFK6IhxSuiIcUroiHFK6IhxSuiIf+HziD+LEmUlKyAAAAAElFTkSuQmCC\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x10fbff390>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"n_episodes = 10000\n", | |
"qlearn = QLearning.create(epsilon=100)\n", | |
"\n", | |
"simulated_episodes = (play_new_episode(env, qlearn.take_step) for _ in range(n_episodes))\n", | |
"average_reward = sum(sum(rs) for rs in simulated_episodes) / n_episodes\n", | |
"print(average_reward)\n", | |
"\n", | |
"from matplotlib import pyplot as plt\n", | |
"%matplotlib inline\n", | |
"\n", | |
"def showQ(Q):\n", | |
" actions_grid = Q.argmax(1).reshape((4, 4))\n", | |
" values_grid = Q.max(1).reshape((4, 4))\n", | |
"\n", | |
" actions_xys = np.array([\n", | |
" [-1, 0], #L\n", | |
" [0, -1], #D\n", | |
" [1, 0], #R\n", | |
" [0, 1], #U\n", | |
" ])\n", | |
"\n", | |
" actions_x_grid = actions_xys[actions_grid, 0]\n", | |
" actions_y_grid = actions_xys[actions_grid, 1]\n", | |
"\n", | |
" plt.imshow(values_grid, interpolation='nearest')\n", | |
"\n", | |
" plt.quiver(actions_x_grid, actions_y_grid)\n", | |
"\n", | |
" plt.xticks([]); plt.yticks([]);\n", | |
"\n", | |
"showQ(qlearn.Q)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 310, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"0.72\n" | |
] | |
} | |
], | |
"source": [ | |
"n_episodes = 100\n", | |
"\n", | |
"simulated_episodes = (capture_new_episode(env, QLearning(qlearn.Q, epsilon=0).take_step) for _ in range(n_episodes))\n", | |
"\n", | |
"average_reward = sum(total_reward for total_reward, _ in simulated_episodes) / n_episodes\n", | |
"print(average_reward)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 321, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"SFFF\n", | |
"\u001b[41mF\u001b[0mHFH\n", | |
"FFFH\n", | |
"HFFG\n", | |
" (Left)\n", | |
"\n", | |
"SFFF\n", | |
"FHFH\n", | |
"\u001b[41mF\u001b[0mFFH\n", | |
"HFFG\n", | |
" (Left)\n", | |
"\n", | |
"SFFF\n", | |
"FHFH\n", | |
"F\u001b[41mF\u001b[0mFH\n", | |
"HFFG\n", | |
" (Up)\n", | |
"\n", | |
"SFFF\n", | |
"FHFH\n", | |
"FF\u001b[41mF\u001b[0mH\n", | |
"HFFG\n", | |
" (Down)\n", | |
"\n", | |
"SFFF\n", | |
"FHFH\n", | |
"FFFH\n", | |
"HF\u001b[41mF\u001b[0mG\n", | |
" (Left)\n", | |
"\n", | |
"SFFF\n", | |
"FHFH\n", | |
"FFFH\n", | |
"HFF\u001b[41mG\u001b[0m\n", | |
" (Down)\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"simulated_episodes = (capture_new_episode(env, QLearning(qlearn.Q, epsilon=0.).take_step) for _ in range(n_episodes))\n", | |
"first_winning_debug = next(debug for total_reward, debug in simulated_episodes \n", | |
" if (total_reward > 0 and len(debug) < 300))\n", | |
"print(first_winning_debug)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 311, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"[2017-02-19 00:46:50,628] Making new env: FrozenLake-v0\n", | |
"[2017-02-19 00:46:50,636] DEPRECATION WARNING: env.spec.timestep_limit has been deprecated. Replace your call to `env.spec.timestep_limit` with `env.spec.tags.get('wrapper_config.TimeLimit.max_episode_steps')`. This change was made 12/28/2016 and is included in version 0.7.0\n", | |
"[2017-02-19 00:46:50,638] Clearing 26 monitor files from previous run (because force=True was provided)\n", | |
"[2017-02-19 00:46:50,641] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video000000.json\n", | |
"[2017-02-19 00:46:50,644] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video000001.json\n", | |
"[2017-02-19 00:46:50,648] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video000008.json\n", | |
"[2017-02-19 00:46:50,653] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video000027.json\n", | |
"[2017-02-19 00:46:50,666] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video000064.json\n", | |
"[2017-02-19 00:46:50,680] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video000125.json\n", | |
"[2017-02-19 00:46:50,701] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video000216.json\n", | |
"[2017-02-19 00:46:50,732] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video000343.json\n", | |
"[2017-02-19 00:46:50,783] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video000512.json\n", | |
"[2017-02-19 00:46:50,884] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video000729.json\n", | |
"[2017-02-19 00:46:51,043] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video001000.json\n", | |
"[2017-02-19 00:46:51,793] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video002000.json\n", | |
"[2017-02-19 00:46:52,640] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video003000.json\n", | |
"[2017-02-19 00:46:53,444] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video004000.json\n", | |
"[2017-02-19 00:46:54,319] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video005000.json\n", | |
"[2017-02-19 00:46:55,253] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video006000.json\n", | |
"[2017-02-19 00:46:56,296] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video007000.json\n", | |
"[2017-02-19 00:46:57,306] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video008000.json\n", | |
"[2017-02-19 00:46:58,304] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video009000.json\n", | |
"[2017-02-19 00:46:59,449] Finished writing results. You can upload them to the scoreboard via gym.upload('/tmp/FrozenLake-v0')\n" | |
] | |
} | |
], | |
"source": [ | |
"qlearn = QLearning.create(epsilon=100)\n", | |
"\n", | |
"import gym\n", | |
"from gym import wrappers\n", | |
"\n", | |
"env = wrappers.Monitor(gym.make('FrozenLake-v0'), '/tmp/FrozenLake-v0', force=True)\n", | |
"\n", | |
"for i_episode in range(10000):\n", | |
" r = sum(play_new_episode(env, qlearn.take_step))\n", | |
"\n", | |
"env.close()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 312, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"[2017-02-19 00:47:00,333] [FrozenLake-v0] Uploading 10000 episodes of training data\n", | |
"[2017-02-19 00:47:12,078] [FrozenLake-v0] Uploading videos of 19 training episodes (2124 bytes)\n", | |
"[2017-02-19 00:47:12,595] [FrozenLake-v0] Creating evaluation object from /tmp/FrozenLake-v0 with learning curve and training video\n", | |
"[2017-02-19 00:47:12,926] \n", | |
"****************************************************\n", | |
"You successfully uploaded your evaluation on FrozenLake-v0 to\n", | |
"OpenAI Gym! You can find it at:\n", | |
"\n", | |
" https://gym.openai.com/evaluations/eval_sYk0FCES1iR4JPQJxfpmA\n", | |
"\n", | |
"****************************************************\n" | |
] | |
} | |
], | |
"source": [ | |
"gym.upload('/tmp/FrozenLake-v0', api_key='sk_bNZUvCfkTfabQCoKoKbjFA')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment