Created
August 24, 2019 00:28
-
-
Save kperry2215/ef1ffa5a64488f0eafb9d04bc84cf657 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def sesd_anomaly_detection(dataframe, | |
column_name, | |
desired_frequency, | |
max_anomalies, | |
alpha_level): | |
""" | |
In this definition, time series anomalies are detected using the S-ESD algorithm. | |
Arguments: | |
dataframe: Pandas dataframe | |
column_name: string. Name of the column that we want to detect anomalies in | |
desired_frequency: Integer. Time frequency of the series. If we want to detect | |
a yearly trend, we'd set the value equal to 365. | |
max_anomalies: Integer. Max number of anomalies to look for in the time series | |
sequence. | |
alpha_level: The significance level. | |
Outputs: | |
df: Pandas dataframe with column for detected S-ESD anomalies (True/False) | |
""" | |
series=np.array(dataframe[column_name]) | |
#Implement SESD algorithm on the time series | |
outliers_indices = sesd.seasonal_esd(series, | |
hybrid=False, | |
seasonality=desired_frequency, | |
max_anomalies=max_anomalies, | |
alpha=alpha_level) | |
#Create a column for SESD anomalies | |
sesd_anomaly_column=column_name+'_SESD_Anomaly' | |
#Create a detected anomaly column, and mark as False if normal, and True if anomalous | |
dataframe[sesd_anomaly_column]=False | |
dataframe.loc[dataframe.index.isin(outliers_indices), sesd_anomaly_column]=True | |
return dataframe | |
##EXECUTE IN MAIN BLOCK | |
#Implement the SESD algorithm on the time series | |
gasoline_price_df=sesd_anomaly_detection(dataframe=gasoline_price_df, | |
column_name='Gasoline_Price', | |
desired_frequency=365, | |
max_anomalies=20, | |
alpha_level=10) | |
#Re-plot time series with color coding for anomaly column | |
scatterplot_with_color_coding(gasoline_price_df['Date'], | |
gasoline_price_df['Gasoline_Price'], | |
gasoline_price_df['Gasoline_Price_SESD_Anomaly'], | |
'Date', | |
'Gasoline Price (Dollars Per Gallon)', | |
'Gasoline Prices, Color-Coded on SESD Anomalies') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment