Skip to content

Instantly share code, notes, and snippets.

@eigenfoo
Last active March 6, 2022 17:25
Show Gist options
  • Save eigenfoo/e9a9933d94524e6dee717276c6b6f732 to your computer and use it in GitHub Desktop.
Save eigenfoo/e9a9933d94524e6dee717276c6b6f732 to your computer and use it in GitHub Desktop.
Solving Poisson multi-armed bandit with Thompson sampling
def make_bandits(params):
def pull(arm, size=None):
while True:
# Poisson distributed rewards
reward = np.random.poisson(lam=params[arm], size=size)
yield reward
return pull, len(params)
def bayesian_strategy(pull, num_bandits):
num_rewards = np.ones(num_bandits)
num_trials = np.ones(num_bandits)
while True:
# Sample from the bandits' priors, and choose largest
choice = np.argmax(np.random.gamma(num_rewards,
scale=1/num_trials))
# Sample the chosen bandit
reward = next(pull(choice))
# Update
num_rewards[choice] += reward
num_trials[choice] += 1
yield choice, reward, num_rewards, num_trials
if __name__ == '__main__':
pull, num_bandits = make_bandits([4.0, 4.5, 5.0])
play = bayesian_strategy(pull, num_bandits)
for _ in range(100):
choice, reward, num_rewards, num_trials = next(play)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment