Skip to content

Instantly share code, notes, and snippets.

View rtkilian's full-sized avatar

Rhys Kilian rtkilian

View GitHub Profile
@rtkilian
rtkilian / analysis.py
Created July 25, 2022 23:31
Scheduling a Amazon SageMaker Notebook Instance example.
# Packages
import pandas as pd
import matplotlib.pyplot as plt
import boto3
# Default parameters
bucket = 'rtkilian-writing'
image_name = 'covid_cumulative_aus_state.png'
# Read
from sktime.forecasting.model_selection import temporal_train_test_split, SingleWindowSplitter, ForecastingRandomizedSearchCV
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.compose import make_reduction, TransformedTargetForecaster
from sktime.utils.plotting import plot_series
from sktime.performance_metrics.forecasting import mean_absolute_percentage_error
from sktime.transformations.series.detrend import Deseasonalizer, Detrender
from sktime.forecasting.trend import PolynomialTrendForecaster
from xgboost import XGBRegressor
from sktime.forecasting.model_selection import temporal_train_test_split, SingleWindowSplitter, ForecastingRandomizedSearchCV
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.compose import make_reduction
from sktime.utils.plotting import plot_series
from sktime.performance_metrics.forecasting import mean_absolute_percentage_error
from xgboost import XGBRegressor
# Create an exogenous dataframe indicating the month
X = pd.DataFrame({'month': y.index.month}, index=y.index)
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.compose import make_reduction
from sktime.utils.plotting import plot_series
from sktime.performance_metrics.forecasting import mean_absolute_percentage_error
from xgboost import XGBRegressor
# Create an exogenous dataframe indicating the month
X = pd.DataFrame({'month': y.index.month}, index=y.index)
@rtkilian
rtkilian / sktime_forecast_linear_regression.py
Created July 9, 2022 23:40
Linear regression forecaster with sktime
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.compose import make_reduction
from sktime.utils.plotting import plot_series
from sktime.performance_metrics.forecasting import mean_absolute_percentage_error
from sklearn.linear_model import LinearRegression
# Split data
y_train, y_test = temporal_train_test_split(y, test_size=26) # Predict from 1st July 2019
from statsmodels.stats.rates import test_poisson_2indep
# Example inputs taken from Gu, Ng, Tang, Schucany 2008: Testing the Ratio of Two Poisson Rates
count1 = 60
exposure1 = 51477.5
count2 = 30
exposure2 = 54308.7
# Calculate test statistic and p-value
stat, p = test_poisson_2indep(count1, exposure1, count2, exposure2, method='etest-wald')
from scipy.stats import fisher_exact
# Example contingency table
table = [[100, 80],[150, 20]]
# Calculate test statistic and p-value
stat, p = fisher_exact(table)
# Interpreation
print('stat=%.3f, p=%.3f' % (stat, p))
from scipy.stats import chi2_contingency
# Example contingency table
table = [[100, 80, 70],[150, 20, 80]]
# Calculate test statistic and p-value
stat, p, dof, expected = chi2_contingency(table)
# Interpreation
print('stat=%.3f, p=%.3f' % (stat, p))
from scipy.stats import kruskal
# Randomly generate the data
x1 = rng.normal(loc=0.25, scale=1, size=100)
x2 = rng.normal(loc=0.00, scale=1, size=100)
x3 = rng.normal(loc=0.00, scale=1, size=100)
# Calculate test statistic and p-value
stat, p = kruskal(x1, x2, x3)
# Example of the Analysis of Variance Test
from scipy.stats import f_oneway
# Randomly generate the data
x1 = rng.normal(loc=0.25, scale=1, size=100)
x2 = rng.normal(loc=0.00, scale=1, size=100)
x3 = rng.normal(loc=0.00, scale=1, size=100)
# Calculate test statistic and p-value
stat, p = f_oneway(x1, x2, x3)