analyticsindiamagazine · June 7, 2019 10:33
diff --git a/ucb_bandits.ipynb b/ucb_bandits.ipynb
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "ucb_bandits.ipynb",
      "version": "0.3.2",
      "provenance": [],
      "collapsed_sections": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "YEQuX7Iue3h1",
        "colab_type": "text"
      },
      "source": [
        "# Implementing Upper Confidence Bound "
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "a55x_KGEeXYF",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Upper Confidence Bound\n",
        "import math\n",
        "import matplotlib.pyplot as plt\n",
        "import pandas as pd\n",
        "\n",
        "data = pd.read_csv(\"UCBbandits.csv\")\n",
        "\n",
        "observations = 200\n",
        "machines = 5\n",
        "numbers_of_selections_of_each_machine = [0] * machines\n",
        "sums_of_rewards_for_each_machine = [0] * machines\n",
        "machines_selected = []\n",
        "total_rewards = 0\n",
        "\n",
        "for n in range(observations):\n",
        "    bandit = 0\n",
        "    max_upper_bound = 0\n",
        "    \n",
        "    for i in range(machines):\n",
        "        if (numbers_of_selections_of_each_machine[i] > 0):        \n",
        "            average_reward = sums_of_rewards_for_each_machine[i] / numbers_of_selections_of_each_machine[i]           \n",
        "            di = math.sqrt(3/2 * math.log(n + 1) / numbers_of_selections_of_each_machine[i])           \n",
        "            upper_bound = average_reward + di\n",
        "            \n",
        "        else:\n",
        "            upper_bound = 1e400\n",
        "            \n",
        "        if upper_bound > max_upper_bound:\n",
        "            max_upper_bound = upper_bound\n",
        "            bandit = i\n",
        "   \n",
        "    machines_selected.append(bandit) \n",
        "    numbers_of_selections_of_each_machine[bandit] = numbers_of_selections_of_each_machine[bandit] + 1\n",
        "    reward = data.values[n, bandit]\n",
        "    sums_of_rewards_for_each_machine[bandit] = sums_of_rewards_for_each_machine[bandit] + reward\n",
        "    total_rewards = total_rewards + reward\n",
        "\n",
        "print(\"\\n\\nRewards By Machine = \", sums_of_rewards_for_each_machine)\n",
        "print(\"\\nTotal Rewards by UCB = \", total_rewards)\n",
        "print(\"\\nMachine Selected At Each Round By UCB: \\n\", machines_selected)\n",
        "\n",
        "\n",
        "#Visualizing the rewards of each machine\n",
        "plt.bar(['B1','B2','B3','B4','B5'],sums_of_rewards_for_each_machine)\n",
        "plt.title('MABP With UCB')\n",
        "plt.xlabel('Bandits')\n",
        "plt.ylabel('Rewards By Each Machine')\n",
        "plt.show()\n",
        "\n",
        "#Visualizing the selections of each machine\n",
        "plt.bar(['B1','B2','B3','B4','B5'],numbers_of_selections_of_each_machine)\n",
        "plt.title('Machines selected the most')\n",
        "plt.xlabel('Bandits')\n",
        "plt.ylabel('Number Of Times Each Bandit Was Selected To Play')\n",
        "plt.show()"
      ],
      "execution_count": 0,
      "outputs": []
    }
  ]
 }
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"name": "ucb_bandits.ipynb",
	"version": "0.3.2",
	"provenance": [],
	"collapsed_sections": []
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	}
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "YEQuX7Iue3h1",
	"colab_type": "text"
	},
	"source": [
	"# Implementing Upper Confidence Bound "
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "a55x_KGEeXYF",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"# Upper Confidence Bound\n",
	"import math\n",
	"import matplotlib.pyplot as plt\n",
	"import pandas as pd\n",
	"\n",
	"data = pd.read_csv(\"UCBbandits.csv\")\n",
	"\n",
	"observations = 200\n",
	"machines = 5\n",
	"numbers_of_selections_of_each_machine = [0] * machines\n",
	"sums_of_rewards_for_each_machine = [0] * machines\n",
	"machines_selected = []\n",
	"total_rewards = 0\n",
	"\n",
	"for n in range(observations):\n",
	" bandit = 0\n",
	" max_upper_bound = 0\n",
	" \n",
	" for i in range(machines):\n",
	" if (numbers_of_selections_of_each_machine[i] > 0): \n",
	" average_reward = sums_of_rewards_for_each_machine[i] / numbers_of_selections_of_each_machine[i] \n",
	" di = math.sqrt(3/2 * math.log(n + 1) / numbers_of_selections_of_each_machine[i]) \n",
	" upper_bound = average_reward + di\n",
	" \n",
	" else:\n",
	" upper_bound = 1e400\n",
	" \n",
	" if upper_bound > max_upper_bound:\n",
	" max_upper_bound = upper_bound\n",
	" bandit = i\n",
	" \n",
	" machines_selected.append(bandit) \n",
	" numbers_of_selections_of_each_machine[bandit] = numbers_of_selections_of_each_machine[bandit] + 1\n",
	" reward = data.values[n, bandit]\n",
	" sums_of_rewards_for_each_machine[bandit] = sums_of_rewards_for_each_machine[bandit] + reward\n",
	" total_rewards = total_rewards + reward\n",
	"\n",
	"print(\"\\n\\nRewards By Machine = \", sums_of_rewards_for_each_machine)\n",
	"print(\"\\nTotal Rewards by UCB = \", total_rewards)\n",
	"print(\"\\nMachine Selected At Each Round By UCB: \\n\", machines_selected)\n",
	"\n",
	"\n",
	"#Visualizing the rewards of each machine\n",
	"plt.bar(['B1','B2','B3','B4','B5'],sums_of_rewards_for_each_machine)\n",
	"plt.title('MABP With UCB')\n",
	"plt.xlabel('Bandits')\n",
	"plt.ylabel('Rewards By Each Machine')\n",
	"plt.show()\n",
	"\n",
	"#Visualizing the selections of each machine\n",
	"plt.bar(['B1','B2','B3','B4','B5'],numbers_of_selections_of_each_machine)\n",
	"plt.title('Machines selected the most')\n",
	"plt.xlabel('Bandits')\n",
	"plt.ylabel('Number Of Times Each Bandit Was Selected To Play')\n",
	"plt.show()"
	],
	"execution_count": 0,
	"outputs": []
	}
	]
	}