Last active
September 28, 2021 13:40
-
-
Save comckay/42bab0272cd4a353f170e881da56cbd5 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import List | |
import numpy as np | |
class LinUCB: | |
def __init__(self, models: List[str], n_features: int, alpha: float): | |
self.models, self.n_models = models, len(models) | |
self.alpha = alpha | |
self.model_tries = np.zeros((self.n_models)) | |
self.covariance_matrices = np.tile( | |
np.identity(n_features), (self.n_models, 1, 1) | |
) | |
self.reward_matrix = np.zeros((self.n_models, n_features)) | |
def _increment_model_tries(self, model: str) -> None: | |
self.model_tries[self.models.index(model)] += 1 | |
def _update_covariance_matrix(self, model: str, context: np.ndarray) -> None: | |
model_index = self.models.index(model) | |
self.covariance_matrices[model_index] = self.covariance_matrices[ | |
model_index | |
] + np.dot(context, context.T) | |
def _get_model_with_max_ucb(self, context: np.ndarray) -> str: | |
inverse_covariance_matrices = np.linalg.inv(self.covariance_matrices) | |
linucb_estimates = [] | |
for covariance_matrix, reward_vector in zip( | |
inverse_covariance_matrices, self.reward_matrix | |
): | |
arm_coefficients = np.dot(covariance_matrix, reward_vector) | |
pointwise_estimate = np.dot(arm_coefficients, context) | |
upper_bound = self.alpha * np.sqrt( | |
np.dot(np.dot(context.T, covariance_matrix), context) | |
) | |
linucb_estimates.append(pointwise_estimate + upper_bound) | |
return self.models[np.nanargmax(linucb_estimates)] | |
def select_model(self, context: np.ndarray) -> str: | |
untested_models = np.nonzero(self.model_tries == 0)[0] | |
if untested_models.size == 0: | |
best_model_so_far = self._get_model_with_max_ucb(context) | |
self._increment_model_tries(best_model_so_far) | |
self._update_covariance_matrix(best_model_so_far, context) | |
return best_model_so_far | |
else: | |
untested_model = self.models[untested_models[0]] | |
self._increment_model_tries(untested_model) | |
self._update_covariance_matrix(untested_model, context) | |
return untested_model | |
def reward_model( | |
self, model: str, context: np.ndarray, reward: float = 1.0 | |
) -> None: | |
if model not in self.models: | |
raise ValueError(f"model {model} not recognized") | |
model_index = self.models.index(model) | |
self.reward_matrix[model_index] += reward * context |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment