eigenfoo · March 6, 2022 17:25
diff --git a/gamma_poisson_bandit.py b/gamma_poisson_bandit.py
 def make_bandits(params):
    def pull(arm, size=None):
        while True:
            # Poisson distributed rewards
            reward = np.random.poisson(lam=params[arm], size=size)
            yield reward
    
    return pull, len(params)


 def bayesian_strategy(pull, num_bandits):
    num_rewards = np.ones(num_bandits)
    num_trials = np.ones(num_bandits)
    
    while True:
        # Sample from the bandits' priors, and choose largest
        choice = np.argmax(np.random.gamma(num_rewards,
                                           scale=1/num_trials))
        
        # Sample the chosen bandit
        reward = next(pull(choice))
        
        # Update
        num_rewards[choice] += reward
        num_trials[choice] += 1
        
        yield choice, reward, num_rewards, num_trials
        
        
 if __name__ == '__main__':
    pull, num_bandits = make_bandits([4.0, 4.5, 5.0])
    play = bayesian_strategy(pull, num_bandits)
    
    for _ in range(100):
        choice, reward, num_rewards, num_trials = next(play)
	def make_bandits(params):
	def pull(arm, size=None):
	while True:
	# Poisson distributed rewards
	reward = np.random.poisson(lam=params[arm], size=size)
	yield reward

	return pull, len(params)


	def bayesian_strategy(pull, num_bandits):
	num_rewards = np.ones(num_bandits)
	num_trials = np.ones(num_bandits)

	while True:
	# Sample from the bandits' priors, and choose largest
	choice = np.argmax(np.random.gamma(num_rewards,
	scale=1/num_trials))

	# Sample the chosen bandit
	reward = next(pull(choice))

	# Update
	num_rewards[choice] += reward
	num_trials[choice] += 1

	yield choice, reward, num_rewards, num_trials


	if __name__ == '__main__':
	pull, num_bandits = make_bandits([4.0, 4.5, 5.0])
	play = bayesian_strategy(pull, num_bandits)

	for _ in range(100):
	choice, reward, num_rewards, num_trials = next(play)