Skip to content

Instantly share code, notes, and snippets.

@analyticsindiamagazine
Last active June 7, 2019 10:33
Show Gist options
  • Save analyticsindiamagazine/d65442a50e09420ed0f130b09afcb904 to your computer and use it in GitHub Desktop.
Save analyticsindiamagazine/d65442a50e09420ed0f130b09afcb904 to your computer and use it in GitHub Desktop.
Implementing UCB in Python
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "ucb_bandits.ipynb",
"version": "0.3.2",
"provenance": [],
"collapsed_sections": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "YEQuX7Iue3h1",
"colab_type": "text"
},
"source": [
"# Implementing Upper Confidence Bound "
]
},
{
"cell_type": "code",
"metadata": {
"id": "a55x_KGEeXYF",
"colab_type": "code",
"colab": {}
},
"source": [
"# Upper Confidence Bound\n",
"import math\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"\n",
"data = pd.read_csv(\"UCBbandits.csv\")\n",
"\n",
"observations = 200\n",
"machines = 5\n",
"numbers_of_selections_of_each_machine = [0] * machines\n",
"sums_of_rewards_for_each_machine = [0] * machines\n",
"machines_selected = []\n",
"total_rewards = 0\n",
"\n",
"for n in range(observations):\n",
" bandit = 0\n",
" max_upper_bound = 0\n",
" \n",
" for i in range(machines):\n",
" if (numbers_of_selections_of_each_machine[i] > 0): \n",
" average_reward = sums_of_rewards_for_each_machine[i] / numbers_of_selections_of_each_machine[i] \n",
" di = math.sqrt(3/2 * math.log(n + 1) / numbers_of_selections_of_each_machine[i]) \n",
" upper_bound = average_reward + di\n",
" \n",
" else:\n",
" upper_bound = 1e400\n",
" \n",
" if upper_bound > max_upper_bound:\n",
" max_upper_bound = upper_bound\n",
" bandit = i\n",
" \n",
" machines_selected.append(bandit) \n",
" numbers_of_selections_of_each_machine[bandit] = numbers_of_selections_of_each_machine[bandit] + 1\n",
" reward = data.values[n, bandit]\n",
" sums_of_rewards_for_each_machine[bandit] = sums_of_rewards_for_each_machine[bandit] + reward\n",
" total_rewards = total_rewards + reward\n",
"\n",
"print(\"\\n\\nRewards By Machine = \", sums_of_rewards_for_each_machine)\n",
"print(\"\\nTotal Rewards by UCB = \", total_rewards)\n",
"print(\"\\nMachine Selected At Each Round By UCB: \\n\", machines_selected)\n",
"\n",
"\n",
"#Visualizing the rewards of each machine\n",
"plt.bar(['B1','B2','B3','B4','B5'],sums_of_rewards_for_each_machine)\n",
"plt.title('MABP With UCB')\n",
"plt.xlabel('Bandits')\n",
"plt.ylabel('Rewards By Each Machine')\n",
"plt.show()\n",
"\n",
"#Visualizing the selections of each machine\n",
"plt.bar(['B1','B2','B3','B4','B5'],numbers_of_selections_of_each_machine)\n",
"plt.title('Machines selected the most')\n",
"plt.xlabel('Bandits')\n",
"plt.ylabel('Number Of Times Each Bandit Was Selected To Play')\n",
"plt.show()"
],
"execution_count": 0,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment