-
-
Save dast1/57ca4f53070da4447c80a54601f705e5 to your computer and use it in GitHub Desktop.
Simple Random Sampling vs Thompson Algorithm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Question 3 | |
# Import the libraries | |
import numpy as np | |
import pandas as pd | |
# Recreate the dataset function | |
def recreate_dataset(set_size, conversion_ratios): | |
import random | |
dataset = [[0]*len(conversion_ratios) for _ in range(set_size)] | |
dataset_idx = list(range(set_size)) | |
for i in range(0, len(conversion_ratios)): | |
conversion_ratio = conversion_ratios[i] | |
random.shuffle(dataset_idx) # randomly shufle the dataset index | |
# and select the required number of indeces from the begining | |
random_idx = dataset_idx[0:round(set_size * conversion_ratio)] | |
# change the values of the dataset to 1 for the random index | |
for idx in random_idx: | |
dataset[idx][i] = 1 | |
# reduce the dataset index to exclude previously selected indeces | |
dataset_idx = [e for e in dataset_idx if e not in random_idx] | |
return dataset | |
# Simple random sampling function | |
def simple_random(dataset): | |
N = len(dataset) | |
d = len(dataset.columns) | |
from random import randint | |
random_selections = [] | |
random_matches = [] | |
for n in range(0, N): | |
random_selection = randint(0,d-1) | |
random_selections.append(random_selection) | |
if dataset.loc[n, list(dataset)[random_selection]] == 1: | |
random_matches.append(random_selection) | |
else: | |
random_matches.append(None) | |
all_selections = [] | |
numbers_of_rewards_1 = [] | |
for n in range(0,len(conversion_ratios)): | |
all_selections.append(len([selection for selection in random_selections if selection == n])) | |
numbers_of_rewards_1.append(len([selection for selection in random_matches if selection == n])) | |
return [numbers_of_rewards_1, all_selections] | |
# Thompson sampling function | |
def thompson(dataset): | |
N = len(dataset) | |
d = len(dataset.columns) | |
import random | |
thompson_selections = [0] * d | |
numbers_of_rewards_1 = [0] * d | |
numbers_of_rewards_0 = [0] * d | |
total_reward = 0 | |
for n in range(0, N): | |
variant = 0 | |
max_random = 0 | |
for i in range(0, d): | |
random_beta = random.betavariate(numbers_of_rewards_1[i] + 1, numbers_of_rewards_0[i] + 1) | |
if random_beta > max_random: | |
max_random = random_beta | |
variant = i | |
thompson_selections.append(variant) | |
reward = dataset.values[n, variant] | |
if reward == 1: | |
numbers_of_rewards_1[variant] = numbers_of_rewards_1[variant] + 1 | |
else: | |
numbers_of_rewards_0[variant] = numbers_of_rewards_0[variant] + 1 | |
total_reward = total_reward + reward | |
all_selections = [sum(x) for x in zip(*[numbers_of_rewards_1,numbers_of_rewards_0])] | |
return [numbers_of_rewards_1, all_selections] | |
# Set parameters | |
variant_names = ['baseline','variant1','variant2','variant3','variant4'] | |
views = [595, 599, 622, 606, 578] | |
quotes = [32, 30, 18, 51, 38] | |
conversion_ratios = [quotes[i] / views[i] for i in range(0, len(views))] | |
# Recreate dataset | |
dataset = recreate_dataset(30000, # total | |
conversion_ratios, | |
) | |
dataset = pd.DataFrame(dataset, columns = variant_names) # convert to Pandas dataframe and add headers | |
all_quotes = dataset.sum() | |
# Run Simple Random Sampling algorithm | |
random_selections = simple_random(dataset) | |
# Run Thompson Sampling algorithm | |
thompson_selections = thompson(dataset) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment