Created
July 10, 2020 06:44
-
-
Save fbparis/dcda3f98b4c9351415388d5b2dba8c29 to your computer and use it in GitHub Desktop.
Very good multi K-Armed Bandits solver, no need parameter!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class RandomUCI(): | |
"""Random Upper Confidence Interval | |
""" | |
def __init__(self, bandit): | |
self.bandit = bandit | |
self.arm_count = bandit.arm_count | |
self.mean = np.ones(self.arm_count) / 2 | |
self.variance = np.ones(self.arm_count) / 4 | |
self.N = np.ones(self.arm_count) | |
@staticmethod | |
def name(): | |
return 'Random-UCI' | |
def get_reward_regret(self, arm): | |
reward, regret = self.bandit.get_reward_regret(arm) | |
self._update_params(arm, reward) | |
return reward, regret | |
def get_action(self): | |
return np.argmax(self.mean + np.random.random() * np.sqrt(self.variance) / self.N) | |
def _update_params(self, arm, reward): | |
self.N[arm] += 1 | |
delta = reward - self.mean[arm] | |
self.mean[arm] += delta / self.N[arm] | |
self.variance[arm] += delta * (reward - self.mean[arm]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment