Created
March 27, 2020 12:48
-
-
Save analyticsindiamagazine/c544f66085c0863428ea5c223adbe545 to your computer and use it in GitHub Desktop.
Q Learning with OpenAI Gym - FrozenLake.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "Q Learning with OpenAI Gym - FrozenLake.ipynb", | |
"provenance": [], | |
"collapsed_sections": [], | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/analyticsindiamagazine/c544f66085c0863428ea5c223adbe545/q-learning-with-openai-gym-frozenlake.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "54AIvDov_7aa", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"## Step -1: Install the dependencies on Google Colab" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "gxxpHDIs_lvg", | |
"colab_type": "code", | |
"outputId": "20dae3b3-c6c2-4e6b-9144-2ab872a0e6bb", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 70 | |
} | |
}, | |
"source": [ | |
"!pip install numpy\n", | |
"!pip install openai-gym" | |
], | |
"execution_count": 1, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (1.18.2)\n", | |
"\u001b[31mERROR: Could not find a version that satisfies the requirement openai-gym (from versions: none)\u001b[0m\n", | |
"\u001b[31mERROR: No matching distribution found for openai-gym\u001b[0m\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "oU8zRXv8QHlm", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"#import the required libraries.\n", | |
"import numpy as np\n", | |
"import gym\n", | |
"import random" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "mh9jBR_cQ5_a", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"#create the environment usign OpenAI Gym\n", | |
"env = gym.make(\"FrozenLake-v0\")" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "JEtXMldxQ7uw", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"## Step 2: Create the Q-table and initialize it" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Uc0xDVd_Q-C8", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "3abe6456-6584-4ea6-d948-24f3e16b10a7" | |
}, | |
"source": [ | |
"action_size = env.action_space.n\n", | |
"state_size = env.observation_space.n\n", | |
"\n", | |
"print(f\"Action Space : {action_size} | State Space: {state_size}\")" | |
], | |
"execution_count": 20, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Action Space : 4 | State Space: 16\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "0J_GfR-p25bq", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "a6e8f9d7-9363-41e3-eee4-e7668c4f115a" | |
}, | |
"source": [ | |
"qtable = np.zeros((state_size, action_size))\n", | |
"print(qtable.shape)" | |
], | |
"execution_count": 21, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"(16, 4)\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "9DbAR9J_3DXa", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"## Step 3: Create Required Hyperparameters" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "dBHB8MIl71Aw", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"total_episodes = 15000 # Total episodes\n", | |
"learning_rate = 0.8 # Learning rate\n", | |
"max_steps = 99 # Max steps per episode\n", | |
"gamma = 0.95 # Discounting rate\n", | |
"\n", | |
"# Exploration parameters\n", | |
"epsilon = 1.0 # Exploration rate\n", | |
"max_epsilon = 1.0 # Exploration probability at start\n", | |
"min_epsilon = 0.01 # Minimum exploration probability \n", | |
"decay_rate = 0.005 # Exponential decay rate for exploration prob" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "xqu-5j9B7qmy", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"## Step 4 : Q-Learning Algorithm" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "YJYnA88a3TmG", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 318 | |
}, | |
"outputId": "b7ac72cb-470a-4388-9b7a-dccbb47dd8d9" | |
}, | |
"source": [ | |
"# List of rewards\n", | |
"rewards = []\n", | |
"\n", | |
"#until learning is stopped\n", | |
"for episode in range(total_episodes):\n", | |
" # Reset the environment\n", | |
" state = env.reset()\n", | |
" step = 0\n", | |
" done = False\n", | |
" total_rewards = 0\n", | |
" \n", | |
" for step in range(max_steps):\n", | |
" #Choose an action a in the current world state (s)\n", | |
" exp_exp_tradeoff = random.uniform(0, 1)\n", | |
" \n", | |
" ## If this number > greater than epsilon --> exploitation (taking the biggest Q value for this state)\n", | |
" if exp_exp_tradeoff > epsilon:\n", | |
" action = np.argmax(qtable[state,:])\n", | |
"\n", | |
" # Else doing a random choice --> exploration\n", | |
" else:\n", | |
" action = env.action_space.sample()\n", | |
"\n", | |
" # Take the action (a) and observe the outcome state(s') and reward (r)\n", | |
" new_state, reward, done, info = env.step(action)\n", | |
"\n", | |
" # Update Q(s,a):= Q(s,a) + lr [R(s,a) + gamma * max Q(s',a') - Q(s,a)]\n", | |
" # qtable[new_state,:] : all the actions we can take from new state\n", | |
" qtable[state, action] = qtable[state, action] + learning_rate * \\\n", | |
" (reward + gamma * np.max(qtable[new_state, :]) - qtable[state, action])\n", | |
" \n", | |
" total_rewards += reward\n", | |
" \n", | |
" # Our new state is state\n", | |
" state = new_state\n", | |
" \n", | |
" # If done (if we're dead) : finish episode\n", | |
" if done == True: \n", | |
" break\n", | |
" \n", | |
" # Reduce epsilon (because we need less and less exploration)\n", | |
" epsilon = min_epsilon + (max_epsilon - min_epsilon) * np.exp(-decay_rate * episode) \n", | |
"\n", | |
" rewards.append(total_rewards)\n", | |
"\n", | |
"print (\"Score over time: \" + str(sum(rewards)/total_episodes))\n", | |
"print(qtable)" | |
], | |
"execution_count": 23, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Score over time: 0.4732\n", | |
"[[1.05246797e-01 4.01872123e-02 1.35132136e-02 1.48787076e-02]\n", | |
" [1.82176762e-03 2.69568226e-03 1.09909860e-03 1.61059078e-01]\n", | |
" [3.17791561e-03 4.06560846e-03 1.30660194e-03 3.01424520e-02]\n", | |
" [4.94699849e-06 1.93331852e-04 1.20861469e-03 1.93745500e-02]\n", | |
" [1.10367502e-01 2.91911411e-03 1.00353953e-02 2.63693712e-02]\n", | |
" [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]\n", | |
" [1.35836129e-02 1.82090030e-05 2.64523836e-05 3.82205929e-05]\n", | |
" [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]\n", | |
" [1.48845326e-02 7.52926653e-03 6.93762419e-03 1.83790127e-01]\n", | |
" [3.19882114e-02 1.54967666e-01 4.70875674e-02 4.27540059e-02]\n", | |
" [1.93771328e-02 8.31870743e-03 1.75091024e-02 9.78052945e-04]\n", | |
" [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]\n", | |
" [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]\n", | |
" [5.55887924e-03 1.98347857e-02 7.45547501e-01 1.61949091e-02]\n", | |
" [2.49407977e-01 9.81179468e-01 1.92530281e-01 3.14899072e-01]\n", | |
" [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]]\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "gQvoFSsr3TkM", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 727 | |
}, | |
"outputId": "39063780-bc9e-43ed-c292-82d162d9302b" | |
}, | |
"source": [ | |
"for episode in range(5):\n", | |
" state = env.reset()\n", | |
" step = 0\n", | |
" done = False\n", | |
" print(\"****************************************************\")\n", | |
" print(\"EPISODE \", episode)\n", | |
"\n", | |
" for step in range(max_steps):\n", | |
" \n", | |
" # Take the action (index) that have the maximum expected future reward given that state\n", | |
" action = np.argmax(qtable[state,:])\n", | |
" \n", | |
" new_state, reward, done, info = env.step(action)\n", | |
" \n", | |
" if done:\n", | |
" # Here, we decide to only print the last state (to see if our agent is on the goal or fall into an hole)\n", | |
" env.render()\n", | |
" \n", | |
" # We print the number of step it took.\n", | |
" print(\"Number of steps\", step)\n", | |
" break\n", | |
" state = new_state\n", | |
"env.close()" | |
], | |
"execution_count": 26, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"****************************************************\n", | |
"EPISODE 0\n", | |
" (Down)\n", | |
"SFFF\n", | |
"FHFH\n", | |
"FFFH\n", | |
"HFF\u001b[41mG\u001b[0m\n", | |
"Number of steps 17\n", | |
"****************************************************\n", | |
"EPISODE 1\n", | |
" (Down)\n", | |
"SFFF\n", | |
"FHFH\n", | |
"FFFH\n", | |
"HFF\u001b[41mG\u001b[0m\n", | |
"Number of steps 65\n", | |
"****************************************************\n", | |
"EPISODE 2\n", | |
" (Down)\n", | |
"SFFF\n", | |
"FHFH\n", | |
"FFFH\n", | |
"HFF\u001b[41mG\u001b[0m\n", | |
"Number of steps 14\n", | |
"****************************************************\n", | |
"EPISODE 3\n", | |
" (Down)\n", | |
"SFFF\n", | |
"FHFH\n", | |
"FFFH\n", | |
"HFF\u001b[41mG\u001b[0m\n", | |
"Number of steps 14\n", | |
"****************************************************\n", | |
"EPISODE 4\n", | |
" (Down)\n", | |
"SFFF\n", | |
"FHFH\n", | |
"FFFH\n", | |
"HFF\u001b[41mG\u001b[0m\n", | |
"Number of steps 46\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "yxfk5Cxr3Th0", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "TRCBCx2-3Tfb", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "IFAP7gDL3TdD", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment