Created
September 21, 2018 16:38
-
-
Save conormm/d5149a6e2191be6403e8a954012b042c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# In[150]: | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
sns.set_style("whitegrid") | |
get_ipython().run_line_magic('matplotlib', 'inline') | |
# In[25]: | |
# we have four machines each with p of paying out (reward equal 1 or nothing (0)) | |
machines = [0, 1, 2, 3] | |
payoffs = [0.07, 0.11, 0.22, 0.24] | |
# In[26]: | |
# random selections | |
payouts = [] | |
n_payouts_random = 0 | |
for i in range(10000): | |
machine = np.random.choice([0, 1, 2, 3]) | |
m = np.random.binomial(1, p=payoffs[machine]) | |
n_payouts_random += m | |
print(f"Sum of the reward is: {n_payouts_random}") | |
# In[275]: | |
# greedy epsilon algorithm. More efficieient ways to do this. | |
# greedy epsilon selections | |
n_learning = 2000 | |
payoffs = [0.07, 0.11, 0.10, 0.08] | |
e = 0.02 | |
n_trials = 10000 | |
successes = np.zeros(4) | |
m_chosen = np.zeros(4) | |
m_prob = np.zeros(4) | |
n_payouts_greedy = 0 | |
t = 0 | |
regret = np.zeros(n_trials) | |
for i in range(n_trials): | |
t += 1 | |
payoffs[3] = .15 if i % 3 == 0 else .007 # add variability to payouts | |
#payoffs[3] = .25 if i % 3 == 0 else .20 # add variability to payouts | |
# randomly select a machine | |
#payoffs[3] = .4 if i % 3 == 0 else .04 # add variability to payouts | |
# after n_learning random draws select the machine that is paying out the most | |
machine = machine if i < n_learning else np.argmax(m_prob) | |
# 0.02% of the time take a random draw from machines | |
machine = np.random.choice(machines) if np.random.uniform(0, 1) > (1 - e) else machine | |
m = np.random.binomial(1, p=payoffs[machine]) | |
# every 100 trials update the successes | |
if i % 100: | |
# update the count of successes for the chosen machine | |
successes[machine] += m | |
# update the probability of payout for each machine | |
m_prob = successes/m_chosen | |
# how many times was machine m chosen | |
m_chosen[machine] += 1 | |
# count total reward (sum of payouts) | |
n_payouts_greedy += m | |
regret[i] = np.max(m_prob) - m_prob[machine] | |
print(f"Sum of the reward is: {n_payouts_greedy}") | |
# In[248]: | |
print(a) | |
print(b) | |
# In[277]: | |
# thompson sampling | |
payoffs = [0.07, 0.11, 0.10, 0.08] | |
n_trials = 10000 | |
shape = (4, 200) | |
m = 0 | |
k = 0 | |
a = np.ones(4) | |
b = np.ones(4) | |
m_prob = np.zeros(4) | |
thetam = np.zeros(4) | |
# priors for machine payout distribution - uniformly distributed | |
beta_post = np.random.uniform(0, 1, size=shape) | |
regret = np.zeros(n_trials) | |
total_reward = 0 | |
for i in range(n_trials): | |
# updated posterior | |
beta_post[m, :] = np.random.beta(a[m], b[m], size=shape)[m] | |
payoffs[3] = .15 if i % 3 == 0 else .007 # add variability to payouts | |
#payoffs[0] = 0.07 if i < 4000 else .8 | |
for k in range(len(machines)): | |
# gives mean of beta distribution | |
#thetam[k] = a[k]/(a[k] + b[k]) | |
# sample from posterior (this is the thompson sampling approach) | |
# this leads to more exploration because machines with > uncertainty can then be selected as the machine | |
thetam[k] = np.random.choice(beta_post[k, :]) | |
# select machine with highest posterior p of payout | |
m = machines[np.argmax(thetam)] | |
# play machine - payout is binomial e [0, 1] with p payout | |
reward = np.random.binomial(1, p=payoffs[m]) | |
regret[i] = np.max(thetam) - thetam[k] | |
#update dist | |
# (a, b) = (a, b) + (r, 1 - r) | |
a[m] += reward | |
b[m] += 1 - reward | |
total_reward += reward | |
print(total_reward) | |
print(a) | |
print(b) | |
# In[278]: | |
plt.figure(figsize=(14, 4)) | |
plt.subplot(121) | |
for i in range(len(machines)): | |
plt.plot(beta_post[i, :], alpha=.4, label=i) | |
plt.subplot(122) | |
for i in range(len(machines)): | |
sns.distplot(beta_post[i, :], hist=False, label=i); | |
# In[210]: | |
np.random.choice(beta_post[k, :]) | |
# In[241]: | |
b | |
# In[242]: | |
a | |
# In[211]: | |
beta_post[k, :] | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment