Skip to content

Instantly share code, notes, and snippets.

@kobus-v-schoor
Created September 14, 2020 20:04
Show Gist options
  • Save kobus-v-schoor/be385ad09261f2f93108f03ec510a379 to your computer and use it in GitHub Desktop.
Save kobus-v-schoor/be385ad09261f2f93108f03ec510a379 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# Q-Table implementation to solve the FrozenLake RL problem\n",
"# Adapted from https://gist.github.com/awjuliani/4d69edad4d0ed9a5884f3cdcf0ea0874"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import gym\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"env = gym.make('FrozenLake-v0', is_slippery=False)\n",
"np.set_printoptions(suppress=True)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[0. 0.77378094 0. 0. ]\n",
" [0. 0. 0. 0. ]\n",
" [0. 0. 0. 0. ]\n",
" [0. 0. 0. 0. ]\n",
" [0. 0.81450625 0. 0. ]\n",
" [0. 0. 0. 0. ]\n",
" [0. 0. 0. 0. ]\n",
" [0. 0. 0. 0. ]\n",
" [0. 0. 0.857375 0. ]\n",
" [0. 0. 0.9025 0. ]\n",
" [0. 0.95 0. 0. ]\n",
" [0. 0. 0. 0. ]\n",
" [0. 0. 0. 0. ]\n",
" [0. 0. 0. 0. ]\n",
" [0. 0. 1. 0. ]\n",
" [0. 0. 0. 0. ]]\n"
]
}
],
"source": [
"# q-table\n",
"q = np.zeros([env.observation_space.n, env.action_space.n])\n",
"\n",
"# discounting rate gamma\n",
"y = 0.95\n",
"\n",
"# run for x amount of episodes\n",
"for eps in range(2000):\n",
" # the current state (which is zero at the first timestep)\n",
" state = env.reset()\n",
"\n",
" # run for a maximum of 100 steps\n",
" for _ in range(100):\n",
" # choose an action by selecting the action with the highest Q-value for the current state\n",
" # also adds noise to aid in exploration\n",
" noise = np.random.randn(1, env.action_space.n) * (1 / (eps + 1))\n",
" action = np.argmax(q[state, :] + noise)\n",
" \n",
" # get the new state and reward from the environment\n",
" nstate, reward, done, info = env.step(action)\n",
"\n",
" # update the the q-table\n",
" # new q-value for action = reward + discount * maximum reward in next state\n",
" q[state, action] = reward + y * np.max(q[nstate, :])\n",
" \n",
" # set new state\n",
" state = nstate\n",
" \n",
" # stop if episode has ended\n",
" if done:\n",
" break\n",
" \n",
"print(q)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\u001b[41mS\u001b[0mFFF\n",
"FHFH\n",
"FFFH\n",
"HFFG\n",
"\n",
" (Down)\n",
"SFFF\n",
"\u001b[41mF\u001b[0mHFH\n",
"FFFH\n",
"HFFG\n",
"\n",
" (Down)\n",
"SFFF\n",
"FHFH\n",
"\u001b[41mF\u001b[0mFFH\n",
"HFFG\n",
"\n",
" (Right)\n",
"SFFF\n",
"FHFH\n",
"F\u001b[41mF\u001b[0mFH\n",
"HFFG\n",
"\n",
" (Right)\n",
"SFFF\n",
"FHFH\n",
"FF\u001b[41mF\u001b[0mH\n",
"HFFG\n",
"\n",
" (Down)\n",
"SFFF\n",
"FHFH\n",
"FFFH\n",
"HF\u001b[41mF\u001b[0mG\n",
"\n",
" (Right)\n",
"SFFF\n",
"FHFH\n",
"FFFH\n",
"HFF\u001b[41mG\u001b[0m\n"
]
}
],
"source": [
"# display solution\n",
"done = False\n",
"state = env.reset()\n",
"env.render()\n",
"\n",
"while not done:\n",
" action = np.argmax(q[state, :])\n",
" state, _, done, _ = env.step(action)\n",
" print()\n",
" env.render()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment