Rhys Kilian rtkilian

Data Analytics Lead @ L'Oreal | Formerly Data Scientist @ Movember and Management Consultant @ Kearney | https://www.linkedin.com/in/rtkilian

rtkilian / analysis.py

Created July 25, 2022 23:31

Scheduling a Amazon SageMaker Notebook Instance example.

	# Packages
	import pandas as pd
	import matplotlib.pyplot as plt
	import boto3

	# Default parameters
	bucket = 'rtkilian-writing'
	image_name = 'covid_cumulative_aus_state.png'

	# Read

rtkilian / sktime_forecast_xgboost_3.py

Created July 11, 2022 22:56

	from sktime.forecasting.model_selection import temporal_train_test_split, SingleWindowSplitter, ForecastingRandomizedSearchCV
	from sktime.forecasting.base import ForecastingHorizon
	from sktime.forecasting.compose import make_reduction, TransformedTargetForecaster
	from sktime.utils.plotting import plot_series
	from sktime.performance_metrics.forecasting import mean_absolute_percentage_error
	from sktime.transformations.series.detrend import Deseasonalizer, Detrender
	from sktime.forecasting.trend import PolynomialTrendForecaster

	from xgboost import XGBRegressor

rtkilian / sktime_forecast_xgboost_2.py

Created July 11, 2022 22:44

	from sktime.forecasting.model_selection import temporal_train_test_split, SingleWindowSplitter, ForecastingRandomizedSearchCV
	from sktime.forecasting.base import ForecastingHorizon
	from sktime.forecasting.compose import make_reduction
	from sktime.utils.plotting import plot_series
	from sktime.performance_metrics.forecasting import mean_absolute_percentage_error

	from xgboost import XGBRegressor

	# Create an exogenous dataframe indicating the month
	X = pd.DataFrame({'month': y.index.month}, index=y.index)

rtkilian / sktime_forecast_xgboost_1.py

Last active March 19, 2024 15:41

	from sktime.forecasting.model_selection import temporal_train_test_split
	from sktime.forecasting.base import ForecastingHorizon
	from sktime.forecasting.compose import make_reduction
	from sktime.utils.plotting import plot_series
	from sktime.performance_metrics.forecasting import mean_absolute_percentage_error

	from xgboost import XGBRegressor

	# Create an exogenous dataframe indicating the month
	X = pd.DataFrame({'month': y.index.month}, index=y.index)

rtkilian / sktime_forecast_linear_regression.py

Created July 9, 2022 23:40

Linear regression forecaster with sktime

	from sktime.forecasting.model_selection import temporal_train_test_split
	from sktime.forecasting.base import ForecastingHorizon
	from sktime.forecasting.compose import make_reduction
	from sktime.utils.plotting import plot_series
	from sktime.performance_metrics.forecasting import mean_absolute_percentage_error

	from sklearn.linear_model import LinearRegression

	# Split data
	y_train, y_test = temporal_train_test_split(y, test_size=26) # Predict from 1st July 2019

rtkilian / poisson_e_test.py

Created May 12, 2022 21:45

	from statsmodels.stats.rates import test_poisson_2indep

	# Example inputs taken from Gu, Ng, Tang, Schucany 2008: Testing the Ratio of Two Poisson Rates
	count1 = 60
	exposure1 = 51477.5
	count2 = 30
	exposure2 = 54308.7

	# Calculate test statistic and p-value
	stat, p = test_poisson_2indep(count1, exposure1, count2, exposure2, method='etest-wald')

rtkilian / fishers_exact_test.py

Created May 12, 2022 21:44

	from scipy.stats import fisher_exact

	# Example contingency table
	table = [[100, 80],[150, 20]]

	# Calculate test statistic and p-value
	stat, p = fisher_exact(table)

	# Interpreation
	print('stat=%.3f, p=%.3f' % (stat, p))

rtkilian / chi_squared_test.py

Created May 12, 2022 21:42

	from scipy.stats import chi2_contingency

	# Example contingency table
	table = [[100, 80, 70],[150, 20, 80]]

	# Calculate test statistic and p-value
	stat, p, dof, expected = chi2_contingency(table)

	# Interpreation
	print('stat=%.3f, p=%.3f' % (stat, p))

rtkilian / kruskal_wallis_h_test.py

Created May 12, 2022 21:40

	from scipy.stats import kruskal

	# Randomly generate the data
	x1 = rng.normal(loc=0.25, scale=1, size=100)
	x2 = rng.normal(loc=0.00, scale=1, size=100)
	x3 = rng.normal(loc=0.00, scale=1, size=100)

	# Calculate test statistic and p-value
	stat, p = kruskal(x1, x2, x3)

rtkilian / one_way_anova_test.py

Created May 12, 2022 21:37

	# Example of the Analysis of Variance Test
	from scipy.stats import f_oneway

	# Randomly generate the data
	x1 = rng.normal(loc=0.25, scale=1, size=100)
	x2 = rng.normal(loc=0.00, scale=1, size=100)
	x3 = rng.normal(loc=0.00, scale=1, size=100)

	# Calculate test statistic and p-value
	stat, p = f_oneway(x1, x2, x3)