Created
January 10, 2020 20:36
-
-
Save thismlguy/a7f085f4e2b5985e69bc97728dcca2b5 to your computer and use it in GitHub Desktop.
Offline Estimates of Online Metrics using Causal Inference
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# set ads | |
num_ads = 3 | |
ads = np.asarray(["ad_{}".format(i) for i in range(num_ads)]) | |
# assign random priors to contexts | |
ad_interaction_priors = np.asarray([0.1, 0.3, 0.6]) | |
user_context_priors = {context:np.random.permutation(ad_interaction_priors) for context in user_contexts} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# use the same context ids as logged data: | |
df_new_models_matching = df_random_serving.copy() | |
def sample_ad_for_context_n_model(context_id, model_priors): | |
# get ad interaction priors for the given context | |
interaction_priors = user_context_priors.get(context_id) | |
# get the selection prior for the given model based on interaction priors | |
selection_priors = model_priors[np.argsort(np.argsort(interaction_priors))] | |
# select an ad using the priors and log the selection probability | |
selected_ad = np.random.choice(ads, None, replace=False, p=selection_priors) | |
# selected_ad_prior = selection_priors[ads.tolist().index(selected_ad)] | |
return selected_ad | |
for policy_name, model_prior in zip(new_model_names, new_model_priors): | |
df_new_models_matching.loc[:, policy_name] = df_new_models_matching["context_id"].apply(lambda x: sample_ad_for_context_n_model(x, model_prior)) | |
df_new_models_matching.sample(5) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# match and estimate: | |
estimates_matching = [] | |
for i in range(len(new_model_names)): | |
model = "model_{}".format(i) | |
matching_mask = (df_new_models_matching["selected_ad"] == df_new_models_matching[model].values).astype(int) | |
# the logging policy was random so we know P(w) = 1/3 | |
estimate = (df_new_models_matching["user_interaction"] * matching_mask / 0.333).sum() / df_new_models_matching.shape[0] | |
estimates_matching.append(estimate) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
plt.figure(figsize=(10,5)) | |
plt.plot(expected_interaction_rates, label="expected rate") | |
plt.xticks(range(10), labels=new_model_names, rotation=30) | |
plt.plot(estimates_matching | |
, label="actual rate") | |
plt.legend() | |
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# use the same context ids as logged data: | |
df_new_models_weighting = df_random_serving.copy() | |
def sample_prior_for_context_n_model(context_id, model_priors, selected_ad): | |
# get ad interaction priors for the given context | |
interaction_priors = user_context_priors.get(context_id) | |
# get the selection prior for the given model based on interaction priors | |
selection_priors = model_priors[np.argsort(np.argsort(interaction_priors))] | |
# get prior of the selected ad | |
selected_ad_prior = selection_priors[ads.tolist().index(selected_ad)] | |
return selected_ad_prior | |
for model_name, model_prior in zip(new_model_names, new_model_priors): | |
df_new_models_weighting.loc[:, model_name] = df_new_models_weighting.apply(lambda x: sample_prior_for_context_n_model(x["context_id"], model_prior, x["selected_ad"]), axis=1) | |
df_new_models_weighting.sample(5) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# match and estimate: | |
estimates_weighting = [] | |
for i in range(len(new_model_names)): | |
model = "model_{}".format(i) | |
# the logging policy was random so we know P(w) = 1/3 | |
estimate = (df_new_models_weighting["user_interaction"] * df_new_models_weighting[model] / 0.333).sum() / df_new_models_weighting.shape[0] | |
estimates_weighting.append(estimate) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
plt.figure(figsize=(10,5)) | |
plt.plot(expected_interaction_rates, label="expected rate") | |
plt.xticks(range(10), labels=new_model_names, rotation=30) | |
plt.plot(estimates_weighting, label="actual rate") | |
plt.legend() | |
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# expected interaction rate: | |
expected_interaction_rates = np.dot(new_model_priors, np.atleast_2d(ad_interaction_priors).T) | |
expected_interaction_rates.ravel() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import matplotlib.pylab as plt | |
from uuid import uuid4 | |
%matplotlib inline |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
new_model_priors = np.atleast_2d([ | |
[0.8, 0.1, 0.1], | |
[0.7, 0.2, 0.1], | |
[0.6, 0.2, 0.2], | |
[0.5, 0.3, 0.2], | |
[0.5, 0.2, 0.3], | |
[0.4, 0.3, 0.3], | |
[0.4, 0.2, 0.4], | |
[0.3, 0.3, 0.4], | |
[0.2, 0.35, 0.45], | |
[0.2, 0.2, 0.6] | |
]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
new_model_names = np.asarray(["model_{}".format(i) for i in range(new_model_priors.shape[0])]) | |
pd.DataFrame( | |
data=np.hstack([np.atleast_2d(new_model_names).T, new_model_priors]), | |
columns=["model_id", "prob_low", "prob_med", "prob_high"] | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
num_iterations = 100000 | |
# create empty df for storing logs | |
df_random_serving = pd.DataFrame( | |
columns = ["log_id", "context_id", "selected_ad", "user_interaction"] | |
) | |
# create unique ID for each log entry | |
df_random_serving["log_id"] = [uuid4() for _ in range(num_iterations)] | |
# assign a context id to each log entry | |
df_random_serving["context_id"] = np.random.choice(user_contexts, size=num_iterations, replace=True, p=user_context_selection_prior) | |
# randomly sample an ad to show in that context | |
df_random_serving["selected_ad"] = np.random.choice(ads, size=num_iterations, replace=True) | |
# for each log entry, sample an action or click or not using the click probability assigned to the context-ad pair in step 1 | |
def sample_action_for_ad(context_id, ad_id): | |
prior = user_context_priors.get(context_id)[np.where(ads == ad_id)[0][0]] | |
return np.random.binomial(1, prior) | |
df_random_serving["user_interaction"] = df_random_serving.apply(lambda x: sample_action_for_ad(x["context_id"], x["selected_ad"]), axis=1) | |
# a snapshot of the data | |
df_random_serving.sample(10) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# set user contexts | |
num_contexts = 10000 | |
user_contexts = np.asarray(["context_{}".format(i) for i in range(num_contexts)]) | |
# assign selection prior to these contexts | |
def random_normal_sample_sum_to_1(size): | |
sample = np.random.normal(0, 1, size) | |
sample_adjusted = sample - sample.min() | |
return sample_adjusted / sample_adjusted.sum() | |
user_context_selection_prior = random_normal_sample_sum_to_1(num_contexts) | |
plt.hist(user_context_selection_prior, bins=100) | |
assert user_context_selection_prior.sum().round(2) == 1.0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment