Skip to content

Instantly share code, notes, and snippets.

View kperry2215's full-sized avatar

Kirsten Perry kperry2215

View GitHub Profile
def decompose_time_series(series, desired_frequency):
"""
Perform STL decomposition on the time series.
Arguments:
series: Pandas series. Time series sequence that we wish to decompose.
desired_frequency: Integer. Time frequency of the series. If we want to detect
a yearly trend, we'd set the value equal to 365.
Outputs:
Plot of time series STL decomposition.
"""
def isolation_forest_anomaly_detection(df,
column_name,
outliers_fraction):
"""
In this definition, time series anomalies are detected using an Isolation Forest algorithm.
Arguments:
df: Pandas dataframe
column_name: string. Name of the column that we want to detect anomalies in
outliers_fraction: float. Percentage of outliers allowed in the sequence.
Outputs:
def low_pass_filter_anomaly_detection(df,
column_name,
number_of_stdevs_away_from_mean):
"""
Implement a low-pass filter to detect anomalies in a time series, and save the filter outputs
(True/False) to a new column in the dataframe.
Arguments:
df: Pandas dataframe
column_name: string. Name of the column that we want to detect anomalies in
number_of_stdevs_away_from_mean: float. Number of standard deviations away from
#Add in a couple anomalous data points for detection by the algorithm
anomaly_dictionary={80: 3.1,
200: 3,
333: 1,
600: 2.6,
710: 2.1,
890: 2.3,
1100: 1,
1211: 2.6,
1309: 2.3}
import pandas as pd
import matplotlib.pyplot as plt
import eia
def retrieve_time_series(api, series_ID):
"""
Return the time series dataframe, based on API and unique Series ID
Arguments:
api: API that we're connected to
series_ID: string. Name of the series that we want to pull from the EIA API
#Create EIA API using your specific API key
api_key = 'YOUR API KEY HERE'
api = eia.API(api_key)
#Pull the oil WTI price data
series_ID='PET.RWTC.D'
#Retrieve Data By Series ID
series_search = api.data_by_series(series=series_ID)
##Create a pandas dataframe from the retrieved time series
price_df = pd.DataFrame(series_search)
def retrieve_time_series(api, series_ID):
"""
Return the time series dataframe, based on API and unique Series ID
api: API that we're connected to
series_ID: string. Name of the series that we want to pull from the EIA API
"""
#Retrieve Data By Series ID
series_search = api.data_by_series(series=series_ID)
##Create a pandas dataframe from the retrieved time series
df = pd.DataFrame(series_search)
def retrieve_time_series(api, series_ID):
"""
Return the time series dataframe, based on API and unique Series ID
api: API that we're connected to
series_ID: string. Name of the series that we want to pull from the EIA API
"""
#Retrieve Data By Series ID
series_search = api.data_by_series(series=series_ID)
##Create a pandas dataframe from the retrieved time series
df = pd.DataFrame(series_search)
#Obtain feature importances in the model
feature_importances = pd.DataFrame(rf.feature_importances_,
index = feature_list,
columns=['importance']).sort_values('importance',
ascending=False)
print(feature_importances)
#Plug in optimized model parameters into final RF model
rf = RandomForestClassifier(n_estimators=1100,
max_depth=100,
random_state = 1500)
#Fit the model
rf.fit(train_features, train_labels)
# Use the forest's predict method on the test data
print(confusion_matrix(test_labels,
rf.predict(test_features),
labels=['Hour 8', 'Hour 9', 'Hour 10',