This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def confidence_intervals(data, confidence_level=0.99): | |
| low_end = (1 - confidence_level) / 2 | |
| high_end = 1 - low_end | |
| bottom_percentile = np.round(data.iloc[:, 0].quantile(low_end), 4) | |
| top_percentile = np.round(data.iloc[:, 0].quantile(high_end), 4) | |
| print('The {}% confidence interval is [{}, {}]'.format( | |
| confidence_level * 100, bottom_percentile, top_percentile)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def bootstrap(data, col, st_dev=False, rep=1000): | |
| if not st_dev: | |
| means = [] | |
| n = len(data) | |
| for i in range(rep): | |
| sample = data.sample(n=n, replace=True) | |
| mean = sample[col].mean() | |
| means.append(mean) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def permutation_test(control, treatment, alpha, r=1000): | |
| """ | |
| Runs a permutation test to check whether the difference in means | |
| between control and treatment is statistically significant. | |
| Parameters: | |
| control: pd.Series | |
| A pandas series with all the control (A) observations | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from __future__ import division | |
| import numpy as np | |
| import pandas as pd | |
| import random | |
| from datetime import date | |
| def expected(A, B): | |
| """ | |
| Calculate expected score of team A in a match against team B |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def plot_learning_curves(estimator, X_train, y_train, X_val, y_val, | |
| suptitle='', title='', xlabel='', ylabel=''): | |
| """ | |
| Plots learning curves for a given estimator. | |
| Parameters | |
| ---------- | |
| estimator : sklearn estimator | |
| X_train : pd.DataFrame |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # convert X and y to numpy arrays | |
| X = X.as_matrix() | |
| y = y.as_matrix() | |
| # create stratified k-fold split generators for inner and outer loops | |
| outer_kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=12) | |
| inner_kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=12) | |
| # set up hyperparameter tuning | |
| Cs = 10.0 ** np.arange(-4,3) |