Skip to content

Instantly share code, notes, and snippets.

View lylayang's full-sized avatar

Lyla Yang lylayang

View GitHub Profile
samplesize_list=[]
baseline=0.1
#set lift range: 5%~30% with 1% incrementality of baseline(0.1)
deltas=np.arange(0.005, 0.03, 0.001)
for delta in deltas:
prob2=baseline+delta
effect_size=sms.proportion_effectsize(baseline, prob2)
sample_size=sms.NormalIndPower().solve_power(effect_size=effect_size, power=0.8, alpha=0.05, ratio=1)
samplesize_list.append(sample_size)
plt.plot(deltas, samplesize_list)
import scipy.stats as stats
CA=[364, 245, 284, 172, 198, 239, 259, 168, 188, 256, 400, 329, 198, 209, 412, 358, 593, 261, 209, 245, 329, 136, 358]
NY=[564, 345, 484, 172, 298, 259, 219, 198, 234, 356, 127, 427, 298, 229, 171, 600, 172, 280, 209, 245, 188, 256, 400]
TX=[364, 245, 284, 172, 198, 239, 259, 168, 188, 256, 400, 329, 198, 209, 412, 358, 593, 261, 239, 245, 329, 136, 358]
VA=[356, 127, 427, 298, 229, 171, 600, 172, 280, 364, 245, 284, 172, 198, 239, 259, 168, 188, 256, 400, 329]
IL=[364, 245, 284, 172, 198, 239, 259, 168, 188, 256, 400, 329, 198, 209, 412, 358, 284, 172, 198, 239, 259, 168]
stats.f_oneway(CA, NY, TX, VA, IL)
#Output:
#F_onewayResult(statistic=0.3838385795048716, pvalue=0.8197813592833273)
import numpy as np
import matplotlib.pyplot as plt
# score in test are 10% greater than ctrl (per record)
# ctrl has 5x the number of records as test and10% lift in test
lift = 1.1
test = np.random.binomial(100, p=0.2 * lift, size=10000) * 1.0
ctrl = np.random.binomial(100, p=0.2, size=50000) * 1.0
bins = np.linspace(0, 40, 20)
plt.hist(ctrl, bins=bins, label='Control')
plt.hist(test, bins=bins, label='Test', color='orange')
import bootstrapped.bootstrap as bs
import bootstrapped.compare_functions as bs_compare
import bootstrapped.stats_functions as bs_stats
# run an a/b test simulation ignoring the lengths of the series (average)
# just what is the 'typical' value
# use percent change to compare test and control
print(bs_compare.percent_change(test.mean(), ctrl.mean()))
print(bs.bootstrap_ab(test, ctrl, bs_stats.mean, bs_compare.percent_change))
print(len(test))
print(len(ctrl))
from keras.models import Sequential
from keras.layers import Dense
def model_dnn(look_back):
model=Sequential()
model.add(Dense(units=32, input_dim=look_back, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam',metrics = ['mse', 'mae'])
return model
def convert2matrix(data_arr, look_back):
X, Y =[], []
for i in range(len(data_arr)-look_back):
d=i+look_back
X.append(data_arr[i:d,])
Y.append(data_arr[d,])
return np.array(X), np.array(Y)
def model_loss(history):
plt.figure(figsize=(8,4))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Test Loss')
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epochs')
plt.legend(loc='upper right')
plt.show();
def prediction_plot(testY, test_predict):
len_prediction=[x for x in range(len(testY))]
plt.figure(figsize=(8,4))
plt.plot(len_prediction, testY[:l], marker='.', label="actual")
plt.plot(len_prediction, test_predict[:l], 'r', label="prediction")
plt.tight_layout()
sns.despine(top=True)
plt.subplots_adjust(left=0.07)
plt.ylabel('Ads Daily Spend', size=15)
plt.xlabel('Time step', size=15)
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN
from keras.callbacks import EarlyStopping
def model_rnn(look_back):
model=Sequential()
model.add(SimpleRNN(units=32, input_shape=(1,look_back), activation="relu"))
model.add(Dense(8, activation='relu'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam',metrics = ['mse', 'mae'])
return model
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from keras.callbacks import EarlyStopping
def model_lstm(look_back):
model=Sequential()
model.add(LSTM(100, input_shape=(1, look_back), activation='relu'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam',metrics = ['mse', 'mae'])
return model