Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save Sandy4321/2ec61b5eea8687fe7bad41f148a79f50 to your computer and use it in GitHub Desktop.
Save Sandy4321/2ec61b5eea8687fe7bad41f148a79f50 to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
raw_test_data = pd.read_csv('medium_ppc_inc_evaluate_dataset.csv')
# You can access the csv above in this link: https://drive.google.com/open?id=1IOuneJr-QFDYGsJPRjQ8ra-5C4VczeOW
raw_test_data['diff_conversions'] = raw_test_data['test_conversions'] - raw_test_data['ctl_conversions']
N = len(raw_test_data)
sd_mult = 1.64485
boot_array = []
for x in range(10000):
boot_sample = raw_test_data.sample(n=N, replace=True)
boot_sample_mean = float(boot_sample['diff_conversions'].sum()) / float(boot_sample['ctl_conversions'].sum())
boot_array.append(boot_sample_mean)
boot_mean = np.mean(boot_array)
boot_mean_std = np.std(boot_array)
CI_90_B = (round(boot_mean - (sd_mult * boot_mean_std), 2),
round(boot_mean + (sd_mult * boot_mean_std), 2))
print("point estimator: " + str(boot_mean))
print("std_dev: " + str(boot_mean_std))
print("CI: " + str(CI_90_B))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment