Created
September 14, 2020 20:04
-
-
Save kobus-v-schoor/be385ad09261f2f93108f03ec510a379 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Q-Table implementation to solve the FrozenLake RL problem\n", | |
"# Adapted from https://gist.github.com/awjuliani/4d69edad4d0ed9a5884f3cdcf0ea0874" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import gym\n", | |
"import numpy as np" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"env = gym.make('FrozenLake-v0', is_slippery=False)\n", | |
"np.set_printoptions(suppress=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[[0. 0.77378094 0. 0. ]\n", | |
" [0. 0. 0. 0. ]\n", | |
" [0. 0. 0. 0. ]\n", | |
" [0. 0. 0. 0. ]\n", | |
" [0. 0.81450625 0. 0. ]\n", | |
" [0. 0. 0. 0. ]\n", | |
" [0. 0. 0. 0. ]\n", | |
" [0. 0. 0. 0. ]\n", | |
" [0. 0. 0.857375 0. ]\n", | |
" [0. 0. 0.9025 0. ]\n", | |
" [0. 0.95 0. 0. ]\n", | |
" [0. 0. 0. 0. ]\n", | |
" [0. 0. 0. 0. ]\n", | |
" [0. 0. 0. 0. ]\n", | |
" [0. 0. 1. 0. ]\n", | |
" [0. 0. 0. 0. ]]\n" | |
] | |
} | |
], | |
"source": [ | |
"# q-table\n", | |
"q = np.zeros([env.observation_space.n, env.action_space.n])\n", | |
"\n", | |
"# discounting rate gamma\n", | |
"y = 0.95\n", | |
"\n", | |
"# run for x amount of episodes\n", | |
"for eps in range(2000):\n", | |
" # the current state (which is zero at the first timestep)\n", | |
" state = env.reset()\n", | |
"\n", | |
" # run for a maximum of 100 steps\n", | |
" for _ in range(100):\n", | |
" # choose an action by selecting the action with the highest Q-value for the current state\n", | |
" # also adds noise to aid in exploration\n", | |
" noise = np.random.randn(1, env.action_space.n) * (1 / (eps + 1))\n", | |
" action = np.argmax(q[state, :] + noise)\n", | |
" \n", | |
" # get the new state and reward from the environment\n", | |
" nstate, reward, done, info = env.step(action)\n", | |
"\n", | |
" # update the the q-table\n", | |
" # new q-value for action = reward + discount * maximum reward in next state\n", | |
" q[state, action] = reward + y * np.max(q[nstate, :])\n", | |
" \n", | |
" # set new state\n", | |
" state = nstate\n", | |
" \n", | |
" # stop if episode has ended\n", | |
" if done:\n", | |
" break\n", | |
" \n", | |
"print(q)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n", | |
"\u001b[41mS\u001b[0mFFF\n", | |
"FHFH\n", | |
"FFFH\n", | |
"HFFG\n", | |
"\n", | |
" (Down)\n", | |
"SFFF\n", | |
"\u001b[41mF\u001b[0mHFH\n", | |
"FFFH\n", | |
"HFFG\n", | |
"\n", | |
" (Down)\n", | |
"SFFF\n", | |
"FHFH\n", | |
"\u001b[41mF\u001b[0mFFH\n", | |
"HFFG\n", | |
"\n", | |
" (Right)\n", | |
"SFFF\n", | |
"FHFH\n", | |
"F\u001b[41mF\u001b[0mFH\n", | |
"HFFG\n", | |
"\n", | |
" (Right)\n", | |
"SFFF\n", | |
"FHFH\n", | |
"FF\u001b[41mF\u001b[0mH\n", | |
"HFFG\n", | |
"\n", | |
" (Down)\n", | |
"SFFF\n", | |
"FHFH\n", | |
"FFFH\n", | |
"HF\u001b[41mF\u001b[0mG\n", | |
"\n", | |
" (Right)\n", | |
"SFFF\n", | |
"FHFH\n", | |
"FFFH\n", | |
"HFF\u001b[41mG\u001b[0m\n" | |
] | |
} | |
], | |
"source": [ | |
"# display solution\n", | |
"done = False\n", | |
"state = env.reset()\n", | |
"env.render()\n", | |
"\n", | |
"while not done:\n", | |
" action = np.argmax(q[state, :])\n", | |
" state, _, done, _ = env.step(action)\n", | |
" print()\n", | |
" env.render()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment