Skip to content

Instantly share code, notes, and snippets.

View kperry2215's full-sized avatar

Kirsten Perry kperry2215

View GitHub Profile
from alpha_vantage.timeseries import TimeSeries
import pandas as pd
import matplotlib.pyplot as plt
alpha_vantage_api_key = "YOUR API KEY HERE"
def pull_intraday_time_series_alpha_vantage(alpha_vantage_api_key, ticker_name, data_interval = '15min'):
"""
Pull intraday time series data by stock ticker name.
Args:
def plot_results(mean_predicted_values, confidence_interval_predicted_values, time_series):
"""
This function plots actual time series data against SARIMA model-predicted values.
We include the confidence interval for the predictions.
Args:
mean_predicted_values: Series of float values. The model-predicted values.
confidence_interval_predicted_values: Pandas dataframe, containing the lower and
upper confidence intervals.
time_series: Series of float values. Actual time series values that we want to graph
Outputs:
def fit_predictions(model_fit, steps_out_to_predict, actual_values):
"""
This function predicts the SARIMA model out a certain designated number of steps,
and compares the predictions to the actual values. The root mean squared error and
the mean absolute error are calculated, comparing the predicted and actual values.
The function returns the predicted values and their respective confidence intervals.
Args:
model_fit: SARIMA model.
steps_out_to_predict: Int. Number of steps out to predict the time series.
actual_values: Series of actual time series values.
def seasonal_arima_model(time_series, order, seasonal_order, trend):
"""
Generate a seasonal ARIMA model using a set of hyperparameters. Returns the model fit, and the
associated model AIC and BIC values.
"""
try:
model = sm_api.tsa.SARIMAX(time_series,
order=order,
seasonal_order=seasonal_order,
trend = trend,
def time_series_train_test_split(time_series, train_split_fraction):
"""
Split the data into training and test set.
"""
split_index = int(round(time_series.shape[0]*train_split_fraction, 0))
train_set = time_series[:split_index]
test_set = time_series[:-split_index]
return train_set, test_set
### EXECUTE IN MAIN FUNCTION ###
def sarima_parameter_search(search_range, seasonal = [12]):
"""
Get all of the parameter combinations for a SARIMA model.
"""
p = q = d = range(0, search_range)
trend = ['n','c','t','ct']
pdq = list(itertools.product(p, d, q))
pdq_combinations = [(x[0], x[1], x[2], x[3], x[4]) for x in list(itertools.product(p, d, q, seasonal, trend))]
return pdq, seasonal_pdq_combinations
from statsmodels.tsa.seasonal import seasonal_decompose
def decompose_time_series(series, frequency):
"""
Decompose a time series and plot it in the console
Arguments:
series: series. Time series that we want to decompose
Outputs:
Decomposition plot in the console
"""
import eia
import pandas as pd
import matplotlib.pyplot as plt
def retrieve_time_series(api, series_ID):
"""
Return the time series dataframe, based on API and unique Series ID
"""
#Retrieve Data By Series ID
series_search = api.data_by_series(series=series_ID)
def mann_whitney_u_test(distribution_1, distribution_2):
"""
Perform the Mann-Whitney U Test, comparing two different distributions.
Args:
distribution_1: List.
distribution_2: List.
Outputs:
u_statistic: Float. U statisitic for the test.
p_value: Float.
"""
#Subset the data into salary categories
df_less_than_50k = df[df['salary'] == ' <=50K']
df_greater_than_50k = df[df['salary'] == ' >50K']
#Plot the histogram for the distribution for data <=$50K
generate_distribution_histogram(df_less_than_50k, 'age',
title = 'Age Distribution: US Population',
x_axis_label = 'Age (years)',
y_axis_label = 'Frequency',
label_name = '<=$50K')
#Plot the histogram for the distribution for data >$50K