Last active
September 26, 2018 09:47
-
-
Save conormm/6d72fea77169d9b21c91269868d6a83d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import pandas as pd | |
sns.set_style("whitegrid") | |
get_ipython().run_line_magic('matplotlib', 'inline') | |
from IPython.core.display import display, HTML | |
display(HTML("<style>.container { width:80% !important; }</style>")) | |
class Environment: | |
def __init__(self, variants, payouts, n_trials): | |
self.variants = variants | |
self.payouts = payouts | |
self.n_trials = n_trials | |
self.total_reward = 0 | |
self.n_k = len(variants) | |
self.shape = (self.n_k, n_trials) | |
def run(self, agent): | |
"""Run the simulation with the agent. | |
agent must be a class with choose_k and update methods.""" | |
for i in range(self.n_trials): | |
# agent makes a choice | |
x_chosen = agent.choose_k() | |
# Environment returns reward | |
reward = np.random.binomial(1, p=self.payouts[x_chosen]) | |
# agent learns of reward | |
agent.reward = reward | |
# agent updates parameters based on the data | |
agent.update() | |
self.total_reward += reward | |
agent.collect_data() | |
return self.total_reward | |
class ThompsonSampler: | |
def __init__(self, env): | |
self.env = env | |
self.n_samples = 100 | |
self.shape = (env.n_k, self.n_samples) | |
self.variants = env.variants | |
self.n_trials = env.n_trials | |
self.payouts = env.payouts | |
self.ad_i = np.zeros(env.n_trials) | |
self.r_i = np.zeros(env.n_trials) | |
self.regret_i = np.zeros(env.n_trials) | |
self.total_reward = 0 | |
self.a = np.ones(env.n_k) | |
self.b = np.ones(env.n_k) | |
self.beta_post = np.random.uniform(0, 1, size=self.shape) | |
self.thetam = np.zeros(env.n_k) | |
self.data = None | |
self.reward = 0 | |
self.k = 0 | |
self.i = 0 | |
def choose_k(self): | |
self.beta_post[self.k, :] = np.random.beta(self.a[self.k], self.b[self.k], size=self.shape)[self.k] | |
for self.k in range(self.env.n_k): | |
# sample from posterior (this is the thompson sampling approach) | |
# this leads to more exploration because machines with > uncertainty can then be selected as the machine | |
#xpost[k, :] = xpost[k, :][np.round(self.beta_post[k, :], 3) != 0] | |
self.thetam[self.k] = np.random.choice(self.beta_post[self.k, :]) | |
# select machine with highest posterior p of payout | |
self.k = self.variants[np.argmax(self.thetam)] | |
return self.k | |
def update(self): | |
self.regret_i[self.i] = np.max(self.beta_post) - self.thetam[self.k] | |
#update dist (a, b) = (a, b) + (r, 1 - r) | |
self.a[self.k] += self.reward | |
self.b[self.k] += 1 - self.reward # i.e. only increment b when it's a swing and a miss. 1 - 0 = 1, 1 - 1 = 0 | |
self.total_reward += self.reward | |
self.ad_i[self.i] = self.k | |
self.r_i[self.i] = self.reward | |
self.i += 1 | |
def collect_data(self): | |
self.data = pd.DataFrame(dict(ad=self.ad_i, reward=self.r_i, regret=self.regret_i)) | |
machines = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] | |
payouts = [0.023, 0.001, 0.029, 0.001, 0.002, 0.04, 0.0234, 0.002, 0.01, 0.0121, .3] | |
en = Environment(machines, payouts, 10000) | |
tsa = ThompsonSampler(env=en) | |
en.run(agent=tsa) | |
plt.figure(figsize=(22, 14)) | |
# plot 1 | |
en = Environment(machines, payouts, 10) | |
tsa = ThompsonSampler(env=en) | |
n_rounds = 0 | |
plt.subplot(231) | |
for i in range(len(machines)): | |
sns.distplot(tsa.beta_post[i], hist=False, label=str(i)) | |
plt.title(f"Prior distribution for each variant (uniform between 0 and 1)") | |
plt.legend(); | |
# plot 2 | |
en = Environment(machines, payouts, n_rounds) | |
tsa = ThompsonSampler(env=en) | |
en.run(agent=tsa) | |
n_rounds = 500 | |
plt.subplot(232) | |
for i in range(len(machines)): | |
sns.distplot(tsa.beta_post[i], hist=False, label=str(i)) | |
plt.title(f"Beta distributions after {n_rounds}") | |
plt.legend(); | |
# plot 3 | |
en = Environment(machines, payouts, n_rounds) | |
tsa = ThompsonSampler(env=en) | |
en.run(agent=tsa) | |
n_rounds = 1000 | |
plt.subplot(233) | |
for i in range(len(machines)): | |
sns.distplot(tsa.beta_post[i], hist=False, label=str(i)) | |
plt.title(f"Beta distributions after {n_rounds}") | |
plt.legend(); | |
# plot 4 | |
en = Environment(machines, payouts, n_rounds) | |
tsa = ThompsonSampler(env=en) | |
en.run(agent=tsa) | |
n_rounds = 5000 | |
plt.subplot(234) | |
for i in range(len(machines)): | |
sns.distplot(tsa.beta_post[i], hist=False, label=str(i)) | |
plt.title(f"Beta distributions after {n_rounds}") | |
plt.legend(); | |
# plot 5 | |
en = Environment(machines, payouts, n_rounds) | |
tsa = ThompsonSampler(env=en) | |
en.run(agent=tsa) | |
n_rounds = 10000 | |
plt.subplot(235) | |
for i in range(len(machines)): | |
sns.distplot(tsa.beta_post[i], hist=False, label=str(i)) | |
plt.title(f"Beta distributions after {n_rounds}") | |
plt.legend(); | |
# plot 6 | |
en = Environment(machines, payouts, n_rounds) | |
tsa = ThompsonSampler(env=en) | |
en.run(agent=tsa) | |
n_rounds = 20000 | |
plt.subplot(236) | |
for i in range(len(machines)): | |
sns.distplot(tsa.beta_post[i], hist=False, label=str(i)) | |
plt.title(f"Beta distributions after {n_rounds}") | |
plt.legend(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment