This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
select | |
some_column | |
, another_column | |
, yet_another_column | |
from some_table |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
actual_specialness_result = score_test_proportions_2indep(NUM_CONVERTING_VARIANT_USERS, NUM_VARIANT_USERS, NUM_CONVERTING_CONTROL_USERS, NUM_CONTROL_USERS, alternative="larger") | |
print(f"actual 'specialness' result: {actual_specialness_result.pvalue:.2%}") | |
#> actual 'specialness' result: 3.83% |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sampling_specialness_result = (sampled_diffs >= observed_diff_in_rates).sum() / sampled_diffs.shape[0] | |
print(f"sampled 'specialness' result: {sampling_specialness_result:.2%}") | |
#> sampled 'specialness' result: 3.41% |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
plot_hist(sampled_diffs, | |
bins=50, | |
observed_rate=observed_diff_in_rates, | |
title="Our range of pure randomness") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
NUM_SIMULATIONS = 10_000 | |
sampled_diffs = sample_diffs_in_rates(all_users, NUM_CONTROL_USERS, NUM_SIMULATIONS) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@njit(parallel=True) | |
def sample_diffs_in_rates(all_users, num_control_users, num_simulations): | |
results = np.zeros(num_simulations) | |
for i in prange(num_simulations): | |
# numpy random shuffling appears to be slower when using numba | |
random.shuffle(all_users) | |
control_rate = all_users[:num_control_users].mean() | |
# we assume the rest of the users are variant users | |
variant_rate = all_users[num_control_users:].mean() | |
results[i] = variant_rate - control_rate |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
NUM_CONTROL_USERS = 1_000_000 | |
NUM_CONVERTING_CONTROL_USERS = 26_000 | |
NUM_VARIANT_USERS = 1_000_000 | |
NUM_CONVERTING_VARIANT_USERS = 26_400 | |
# create our arrays of users | |
control_users = np.zeros(NUM_CONTROL_USERS) | |
control_users[:NUM_CONVERTING_CONTROL_USERS] = 1.0 | |
control_conversion_rate = control_users.mean() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
OBSERVED_DIFF_IN_RATES = 0.167 # this is our experiment result | |
num_diffs_gte_observed = (simulated_diffs_in_rates >= OBSERVED_DIFF_IN_RATES).sum() | |
num_samples = simulated_diffs_in_rates.shape[0] | |
print(f"{num_diffs_gte_observed:,} out of {num_samples:,} random samples show differences in rates greater than or equal to {OBSERVED_DIFF_IN_RATES:.1%}") | |
print(f"percentage of random noise distribution with difference in rates greater than or equal to {OBSERVED_DIFF_IN_RATES:.1%}: {num_diffs_gte_observed / num_samples:.2%}") | |
#> 1,267 out of 10,000 random samples show differences in rates greater than or equal to 16.7% | |
#> percentage of random noise distribution with difference in rates greater than or equal to 16.7%: 12.67% |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
plot_hist(simulated_diffs_in_rates, | |
bins=15, | |
observed_rate=0.167, | |
title="Our range of pure randomness") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def plot_hist(experiment_results: np.ndarray, | |
bins=100, | |
observed_rate: float = None, | |
title: str = None) -> None: | |
sns.histplot(experiment_results, bins=bins) | |
if observed_rate: | |
plt.axvline(observed_rate, color='r', label='Diff in rates observed in experiment') | |
plt.legend(bbox_to_anchor=(0.5, -0.2), loc="lower center") | |
if title: | |
plt.title(title) |
NewerOlder