This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from alpha_vantage.timeseries import TimeSeries | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
alpha_vantage_api_key = "YOUR API KEY HERE" | |
def pull_intraday_time_series_alpha_vantage(alpha_vantage_api_key, ticker_name, data_interval = '15min'): | |
""" | |
Pull intraday time series data by stock ticker name. | |
Args: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def plot_results(mean_predicted_values, confidence_interval_predicted_values, time_series): | |
""" | |
This function plots actual time series data against SARIMA model-predicted values. | |
We include the confidence interval for the predictions. | |
Args: | |
mean_predicted_values: Series of float values. The model-predicted values. | |
confidence_interval_predicted_values: Pandas dataframe, containing the lower and | |
upper confidence intervals. | |
time_series: Series of float values. Actual time series values that we want to graph | |
Outputs: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def fit_predictions(model_fit, steps_out_to_predict, actual_values): | |
""" | |
This function predicts the SARIMA model out a certain designated number of steps, | |
and compares the predictions to the actual values. The root mean squared error and | |
the mean absolute error are calculated, comparing the predicted and actual values. | |
The function returns the predicted values and their respective confidence intervals. | |
Args: | |
model_fit: SARIMA model. | |
steps_out_to_predict: Int. Number of steps out to predict the time series. | |
actual_values: Series of actual time series values. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def seasonal_arima_model(time_series, order, seasonal_order, trend): | |
""" | |
Generate a seasonal ARIMA model using a set of hyperparameters. Returns the model fit, and the | |
associated model AIC and BIC values. | |
""" | |
try: | |
model = sm_api.tsa.SARIMAX(time_series, | |
order=order, | |
seasonal_order=seasonal_order, | |
trend = trend, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def time_series_train_test_split(time_series, train_split_fraction): | |
""" | |
Split the data into training and test set. | |
""" | |
split_index = int(round(time_series.shape[0]*train_split_fraction, 0)) | |
train_set = time_series[:split_index] | |
test_set = time_series[:-split_index] | |
return train_set, test_set | |
### EXECUTE IN MAIN FUNCTION ### |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def sarima_parameter_search(search_range, seasonal = [12]): | |
""" | |
Get all of the parameter combinations for a SARIMA model. | |
""" | |
p = q = d = range(0, search_range) | |
trend = ['n','c','t','ct'] | |
pdq = list(itertools.product(p, d, q)) | |
pdq_combinations = [(x[0], x[1], x[2], x[3], x[4]) for x in list(itertools.product(p, d, q, seasonal, trend))] | |
return pdq, seasonal_pdq_combinations |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from statsmodels.tsa.seasonal import seasonal_decompose | |
def decompose_time_series(series, frequency): | |
""" | |
Decompose a time series and plot it in the console | |
Arguments: | |
series: series. Time series that we want to decompose | |
Outputs: | |
Decomposition plot in the console | |
""" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import eia | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
def retrieve_time_series(api, series_ID): | |
""" | |
Return the time series dataframe, based on API and unique Series ID | |
""" | |
#Retrieve Data By Series ID | |
series_search = api.data_by_series(series=series_ID) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def mann_whitney_u_test(distribution_1, distribution_2): | |
""" | |
Perform the Mann-Whitney U Test, comparing two different distributions. | |
Args: | |
distribution_1: List. | |
distribution_2: List. | |
Outputs: | |
u_statistic: Float. U statisitic for the test. | |
p_value: Float. | |
""" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Subset the data into salary categories | |
df_less_than_50k = df[df['salary'] == ' <=50K'] | |
df_greater_than_50k = df[df['salary'] == ' >50K'] | |
#Plot the histogram for the distribution for data <=$50K | |
generate_distribution_histogram(df_less_than_50k, 'age', | |
title = 'Age Distribution: US Population', | |
x_axis_label = 'Age (years)', | |
y_axis_label = 'Frequency', | |
label_name = '<=$50K') | |
#Plot the histogram for the distribution for data >$50K |