analyticsindiamagazine · March 27, 2020 12:48
diff --git a/q-learning-with-openai-gym-frozenlake.ipynb b/q-learning-with-openai-gym-frozenlake.ipynb
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Q Learning with OpenAI Gym - FrozenLake.ipynb",
      "provenance": [],
      "collapsed_sections": [],
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/analyticsindiamagazine/c544f66085c0863428ea5c223adbe545/q-learning-with-openai-gym-frozenlake.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "54AIvDov_7aa",
        "colab_type": "text"
      },
      "source": [
        "## Step -1: Install the dependencies on Google Colab"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "gxxpHDIs_lvg",
        "colab_type": "code",
        "outputId": "20dae3b3-c6c2-4e6b-9144-2ab872a0e6bb",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 70
        }
      },
      "source": [
        "!pip install numpy\n",
        "!pip install openai-gym"
      ],
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (1.18.2)\n",
            "\u001b[31mERROR: Could not find a version that satisfies the requirement openai-gym (from versions: none)\u001b[0m\n",
            "\u001b[31mERROR: No matching distribution found for openai-gym\u001b[0m\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "oU8zRXv8QHlm",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "#import the required libraries.\n",
        "import numpy as np\n",
        "import gym\n",
        "import random"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "mh9jBR_cQ5_a",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "#create the environment usign OpenAI Gym\n",
        "env = gym.make(\"FrozenLake-v0\")"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "JEtXMldxQ7uw",
        "colab_type": "text"
      },
      "source": [
        "## Step 2: Create the Q-table and initialize it"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Uc0xDVd_Q-C8",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        },
        "outputId": "3abe6456-6584-4ea6-d948-24f3e16b10a7"
      },
      "source": [
        "action_size = env.action_space.n\n",
        "state_size = env.observation_space.n\n",
        "\n",
        "print(f\"Action Space : {action_size} | State Space: {state_size}\")"
      ],
      "execution_count": 20,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Action Space : 4 | State Space: 16\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "0J_GfR-p25bq",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        },
        "outputId": "a6e8f9d7-9363-41e3-eee4-e7668c4f115a"
      },
      "source": [
        "qtable = np.zeros((state_size, action_size))\n",
        "print(qtable.shape)"
      ],
      "execution_count": 21,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "(16, 4)\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "9DbAR9J_3DXa",
        "colab_type": "text"
      },
      "source": [
        "## Step 3: Create Required Hyperparameters"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "dBHB8MIl71Aw",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "total_episodes = 15000        # Total episodes\n",
        "learning_rate = 0.8           # Learning rate\n",
        "max_steps = 99                # Max steps per episode\n",
        "gamma = 0.95                  # Discounting rate\n",
        "\n",
        "# Exploration parameters\n",
        "epsilon = 1.0                 # Exploration rate\n",
        "max_epsilon = 1.0             # Exploration probability at start\n",
        "min_epsilon = 0.01            # Minimum exploration probability \n",
        "decay_rate = 0.005             # Exponential decay rate for exploration prob"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "xqu-5j9B7qmy",
        "colab_type": "text"
      },
      "source": [
        "## Step 4 : Q-Learning Algorithm"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "YJYnA88a3TmG",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 318
        },
        "outputId": "b7ac72cb-470a-4388-9b7a-dccbb47dd8d9"
      },
      "source": [
        "# List of rewards\n",
        "rewards = []\n",
        "\n",
        "#until learning is stopped\n",
        "for episode in range(total_episodes):\n",
        "    # Reset the environment\n",
        "    state = env.reset()\n",
        "    step = 0\n",
        "    done = False\n",
        "    total_rewards = 0\n",
        "    \n",
        "    for step in range(max_steps):\n",
        "        #Choose an action a in the current world state (s)\n",
        "        exp_exp_tradeoff = random.uniform(0, 1)\n",
        "        \n",
        "        ## If this number > greater than epsilon --> exploitation (taking the biggest Q value for this state)\n",
        "        if exp_exp_tradeoff > epsilon:\n",
        "            action = np.argmax(qtable[state,:])\n",
        "\n",
        "        # Else doing a random choice --> exploration\n",
        "        else:\n",
        "            action = env.action_space.sample()\n",
        "\n",
        "        # Take the action (a) and observe the outcome state(s') and reward (r)\n",
        "        new_state, reward, done, info = env.step(action)\n",
        "\n",
        "        # Update Q(s,a):= Q(s,a) + lr [R(s,a) + gamma * max Q(s',a') - Q(s,a)]\n",
        "        # qtable[new_state,:] : all the actions we can take from new state\n",
        "        qtable[state, action] = qtable[state, action] + learning_rate * \\\n",
        "                                                        (reward + gamma * np.max(qtable[new_state, :]) - qtable[state, action])\n",
        "        \n",
        "        total_rewards += reward\n",
        "        \n",
        "        # Our new state is state\n",
        "        state = new_state\n",
        "        \n",
        "        # If done (if we're dead) : finish episode\n",
        "        if done == True: \n",
        "            break\n",
        "        \n",
        "    # Reduce epsilon (because we need less and less exploration)\n",
        "    epsilon = min_epsilon + (max_epsilon - min_epsilon) * np.exp(-decay_rate * episode) \n",
        "\n",
        "    rewards.append(total_rewards)\n",
        "\n",
        "print (\"Score over time: \" +  str(sum(rewards)/total_episodes))\n",
        "print(qtable)"
      ],
      "execution_count": 23,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Score over time: 0.4732\n",
            "[[1.05246797e-01 4.01872123e-02 1.35132136e-02 1.48787076e-02]\n",
            " [1.82176762e-03 2.69568226e-03 1.09909860e-03 1.61059078e-01]\n",
            " [3.17791561e-03 4.06560846e-03 1.30660194e-03 3.01424520e-02]\n",
            " [4.94699849e-06 1.93331852e-04 1.20861469e-03 1.93745500e-02]\n",
            " [1.10367502e-01 2.91911411e-03 1.00353953e-02 2.63693712e-02]\n",
            " [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]\n",
            " [1.35836129e-02 1.82090030e-05 2.64523836e-05 3.82205929e-05]\n",
            " [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]\n",
            " [1.48845326e-02 7.52926653e-03 6.93762419e-03 1.83790127e-01]\n",
            " [3.19882114e-02 1.54967666e-01 4.70875674e-02 4.27540059e-02]\n",
            " [1.93771328e-02 8.31870743e-03 1.75091024e-02 9.78052945e-04]\n",
            " [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]\n",
            " [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]\n",
            " [5.55887924e-03 1.98347857e-02 7.45547501e-01 1.61949091e-02]\n",
            " [2.49407977e-01 9.81179468e-01 1.92530281e-01 3.14899072e-01]\n",
            " [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "gQvoFSsr3TkM",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 727
        },
        "outputId": "39063780-bc9e-43ed-c292-82d162d9302b"
      },
      "source": [
        "for episode in range(5):\n",
        "    state = env.reset()\n",
        "    step = 0\n",
        "    done = False\n",
        "    print(\"****************************************************\")\n",
        "    print(\"EPISODE \", episode)\n",
        "\n",
        "    for step in range(max_steps):\n",
        "        \n",
        "        # Take the action (index) that have the maximum expected future reward given that state\n",
        "        action = np.argmax(qtable[state,:])\n",
        "        \n",
        "        new_state, reward, done, info = env.step(action)\n",
        "        \n",
        "        if done:\n",
        "            # Here, we decide to only print the last state (to see if our agent is on the goal or fall into an hole)\n",
        "            env.render()\n",
        "            \n",
        "            # We print the number of step it took.\n",
        "            print(\"Number of steps\", step)\n",
        "            break\n",
        "        state = new_state\n",
        "env.close()"
      ],
      "execution_count": 26,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "****************************************************\n",
            "EPISODE  0\n",
            "  (Down)\n",
            "SFFF\n",
            "FHFH\n",
            "FFFH\n",
            "HFF\u001b[41mG\u001b[0m\n",
            "Number of steps 17\n",
            "****************************************************\n",
            "EPISODE  1\n",
            "  (Down)\n",
            "SFFF\n",
            "FHFH\n",
            "FFFH\n",
            "HFF\u001b[41mG\u001b[0m\n",
            "Number of steps 65\n",
            "****************************************************\n",
            "EPISODE  2\n",
            "  (Down)\n",
            "SFFF\n",
            "FHFH\n",
            "FFFH\n",
            "HFF\u001b[41mG\u001b[0m\n",
            "Number of steps 14\n",
            "****************************************************\n",
            "EPISODE  3\n",
            "  (Down)\n",
            "SFFF\n",
            "FHFH\n",
            "FFFH\n",
            "HFF\u001b[41mG\u001b[0m\n",
            "Number of steps 14\n",
            "****************************************************\n",
            "EPISODE  4\n",
            "  (Down)\n",
            "SFFF\n",
            "FHFH\n",
            "FFFH\n",
            "HFF\u001b[41mG\u001b[0m\n",
            "Number of steps 46\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "yxfk5Cxr3Th0",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        ""
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "TRCBCx2-3Tfb",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        ""
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "IFAP7gDL3TdD",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        ""
      ],
      "execution_count": 0,
      "outputs": []
    }
  ]
 }
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"name": "Q Learning with OpenAI Gym - FrozenLake.ipynb",
	"provenance": [],
	"collapsed_sections": [],
	"include_colab_link": true
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	}
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/gist/analyticsindiamagazine/c544f66085c0863428ea5c223adbe545/q-learning-with-openai-gym-frozenlake.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "54AIvDov_7aa",
	"colab_type": "text"
	},
	"source": [
	"## Step -1: Install the dependencies on Google Colab"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "gxxpHDIs_lvg",
	"colab_type": "code",
	"outputId": "20dae3b3-c6c2-4e6b-9144-2ab872a0e6bb",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 70
	}
	},
	"source": [
	"!pip install numpy\n",
	"!pip install openai-gym"
	],
	"execution_count": 1,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (1.18.2)\n",
	"\u001b[31mERROR: Could not find a version that satisfies the requirement openai-gym (from versions: none)\u001b[0m\n",
	"\u001b[31mERROR: No matching distribution found for openai-gym\u001b[0m\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "oU8zRXv8QHlm",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"#import the required libraries.\n",
	"import numpy as np\n",
	"import gym\n",
	"import random"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "mh9jBR_cQ5_a",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"#create the environment usign OpenAI Gym\n",
	"env = gym.make(\"FrozenLake-v0\")"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "JEtXMldxQ7uw",
	"colab_type": "text"
	},
	"source": [
	"## Step 2: Create the Q-table and initialize it"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "Uc0xDVd_Q-C8",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 34
	},
	"outputId": "3abe6456-6584-4ea6-d948-24f3e16b10a7"
	},
	"source": [
	"action_size = env.action_space.n\n",
	"state_size = env.observation_space.n\n",
	"\n",
	"print(f\"Action Space : {action_size} \| State Space: {state_size}\")"
	],
	"execution_count": 20,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"Action Space : 4 \| State Space: 16\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "0J_GfR-p25bq",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 34
	},
	"outputId": "a6e8f9d7-9363-41e3-eee4-e7668c4f115a"
	},
	"source": [
	"qtable = np.zeros((state_size, action_size))\n",
	"print(qtable.shape)"
	],
	"execution_count": 21,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"(16, 4)\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "9DbAR9J_3DXa",
	"colab_type": "text"
	},
	"source": [
	"## Step 3: Create Required Hyperparameters"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "dBHB8MIl71Aw",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"total_episodes = 15000 # Total episodes\n",
	"learning_rate = 0.8 # Learning rate\n",
	"max_steps = 99 # Max steps per episode\n",
	"gamma = 0.95 # Discounting rate\n",
	"\n",
	"# Exploration parameters\n",
	"epsilon = 1.0 # Exploration rate\n",
	"max_epsilon = 1.0 # Exploration probability at start\n",
	"min_epsilon = 0.01 # Minimum exploration probability \n",
	"decay_rate = 0.005 # Exponential decay rate for exploration prob"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "xqu-5j9B7qmy",
	"colab_type": "text"
	},
	"source": [
	"## Step 4 : Q-Learning Algorithm"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "YJYnA88a3TmG",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 318
	},
	"outputId": "b7ac72cb-470a-4388-9b7a-dccbb47dd8d9"
	},
	"source": [
	"# List of rewards\n",
	"rewards = []\n",
	"\n",
	"#until learning is stopped\n",
	"for episode in range(total_episodes):\n",
	" # Reset the environment\n",
	" state = env.reset()\n",
	" step = 0\n",
	" done = False\n",
	" total_rewards = 0\n",
	" \n",
	" for step in range(max_steps):\n",
	" #Choose an action a in the current world state (s)\n",
	" exp_exp_tradeoff = random.uniform(0, 1)\n",
	" \n",
	" ## If this number > greater than epsilon --> exploitation (taking the biggest Q value for this state)\n",
	" if exp_exp_tradeoff > epsilon:\n",
	" action = np.argmax(qtable[state,:])\n",
	"\n",
	" # Else doing a random choice --> exploration\n",
	" else:\n",
	" action = env.action_space.sample()\n",
	"\n",
	" # Take the action (a) and observe the outcome state(s') and reward (r)\n",
	" new_state, reward, done, info = env.step(action)\n",
	"\n",
	" # Update Q(s,a):= Q(s,a) + lr [R(s,a) + gamma * max Q(s',a') - Q(s,a)]\n",
	" # qtable[new_state,:] : all the actions we can take from new state\n",
	" qtable[state, action] = qtable[state, action] + learning_rate * \\\n",
	" (reward + gamma * np.max(qtable[new_state, :]) - qtable[state, action])\n",
	" \n",
	" total_rewards += reward\n",
	" \n",
	" # Our new state is state\n",
	" state = new_state\n",
	" \n",
	" # If done (if we're dead) : finish episode\n",
	" if done == True: \n",
	" break\n",
	" \n",
	" # Reduce epsilon (because we need less and less exploration)\n",
	" epsilon = min_epsilon + (max_epsilon - min_epsilon) * np.exp(-decay_rate * episode) \n",
	"\n",
	" rewards.append(total_rewards)\n",
	"\n",
	"print (\"Score over time: \" + str(sum(rewards)/total_episodes))\n",
	"print(qtable)"
	],
	"execution_count": 23,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"Score over time: 0.4732\n",
	"[[1.05246797e-01 4.01872123e-02 1.35132136e-02 1.48787076e-02]\n",
	" [1.82176762e-03 2.69568226e-03 1.09909860e-03 1.61059078e-01]\n",
	" [3.17791561e-03 4.06560846e-03 1.30660194e-03 3.01424520e-02]\n",
	" [4.94699849e-06 1.93331852e-04 1.20861469e-03 1.93745500e-02]\n",
	" [1.10367502e-01 2.91911411e-03 1.00353953e-02 2.63693712e-02]\n",
	" [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]\n",
	" [1.35836129e-02 1.82090030e-05 2.64523836e-05 3.82205929e-05]\n",
	" [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]\n",
	" [1.48845326e-02 7.52926653e-03 6.93762419e-03 1.83790127e-01]\n",
	" [3.19882114e-02 1.54967666e-01 4.70875674e-02 4.27540059e-02]\n",
	" [1.93771328e-02 8.31870743e-03 1.75091024e-02 9.78052945e-04]\n",
	" [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]\n",
	" [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]\n",
	" [5.55887924e-03 1.98347857e-02 7.45547501e-01 1.61949091e-02]\n",
	" [2.49407977e-01 9.81179468e-01 1.92530281e-01 3.14899072e-01]\n",
	" [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]]\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "gQvoFSsr3TkM",
	"colab_type": "code",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 727
	},
	"outputId": "39063780-bc9e-43ed-c292-82d162d9302b"
	},
	"source": [
	"for episode in range(5):\n",
	" state = env.reset()\n",
	" step = 0\n",
	" done = False\n",
	" print(\"****************************************************\")\n",
	" print(\"EPISODE \", episode)\n",
	"\n",
	" for step in range(max_steps):\n",
	" \n",
	" # Take the action (index) that have the maximum expected future reward given that state\n",
	" action = np.argmax(qtable[state,:])\n",
	" \n",
	" new_state, reward, done, info = env.step(action)\n",
	" \n",
	" if done:\n",
	" # Here, we decide to only print the last state (to see if our agent is on the goal or fall into an hole)\n",
	" env.render()\n",
	" \n",
	" # We print the number of step it took.\n",
	" print(\"Number of steps\", step)\n",
	" break\n",
	" state = new_state\n",
	"env.close()"
	],
	"execution_count": 26,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"****************************************************\n",
	"EPISODE 0\n",
	" (Down)\n",
	"SFFF\n",
	"FHFH\n",
	"FFFH\n",
	"HFF\u001b[41mG\u001b[0m\n",
	"Number of steps 17\n",
	"****************************************************\n",
	"EPISODE 1\n",
	" (Down)\n",
	"SFFF\n",
	"FHFH\n",
	"FFFH\n",
	"HFF\u001b[41mG\u001b[0m\n",
	"Number of steps 65\n",
	"****************************************************\n",
	"EPISODE 2\n",
	" (Down)\n",
	"SFFF\n",
	"FHFH\n",
	"FFFH\n",
	"HFF\u001b[41mG\u001b[0m\n",
	"Number of steps 14\n",
	"****************************************************\n",
	"EPISODE 3\n",
	" (Down)\n",
	"SFFF\n",
	"FHFH\n",
	"FFFH\n",
	"HFF\u001b[41mG\u001b[0m\n",
	"Number of steps 14\n",
	"****************************************************\n",
	"EPISODE 4\n",
	" (Down)\n",
	"SFFF\n",
	"FHFH\n",
	"FFFH\n",
	"HFF\u001b[41mG\u001b[0m\n",
	"Number of steps 46\n"
	],
	"name": "stdout"
	}
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "yxfk5Cxr3Th0",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	""
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "TRCBCx2-3Tfb",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	""
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "IFAP7gDL3TdD",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	""
	],
	"execution_count": 0,
	"outputs": []
	}
	]
	}