Created
August 24, 2019 00:23
-
-
Save kperry2215/b27fa6b95c820d00d1efec7f6f1b2d55 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def low_pass_filter_anomaly_detection(df, | |
column_name, | |
number_of_stdevs_away_from_mean): | |
""" | |
Implement a low-pass filter to detect anomalies in a time series, and save the filter outputs | |
(True/False) to a new column in the dataframe. | |
Arguments: | |
df: Pandas dataframe | |
column_name: string. Name of the column that we want to detect anomalies in | |
number_of_stdevs_away_from_mean: float. Number of standard deviations away from | |
the mean that we want to flag anomalies at. For example, if | |
number_of_stdevs_away_from_mean=2, | |
then all data points more than 2 standard deviations away from the mean are flagged as | |
anomalies. | |
Outputs: | |
df: Pandas dataframe. Dataframe containing column for low pass filter anomalies | |
(True/False) | |
""" | |
#60-day rolling average | |
df[column_name+'_Rolling_Average']=df[column_name].rolling(window=60, center=True).mean() | |
#60-day standard deviation | |
df[column_name+'_Rolling_StDev']=df[column_name].rolling(window=60, center=True).std() | |
#Detect anomalies by determining how far away from the mean (in terms of standard deviation) | |
#each data point is | |
df[column_name+'_Low_Pass_Filter_Anomaly']=(abs(df[column_name]-df[ | |
column_name+'_Rolling_Average'])>( | |
number_of_stdevs_away_from_mean*df[ | |
column_name+'_Rolling_StDev'])) | |
return df | |
##EXECUTE IN MAIN BLOCK | |
#APPLY LOW PASS FILTER (ROLLING AVERAGE+ 2 Z-SCORE FILTER) TO DETECT ANOMALIES | |
gasoline_price_df=low_pass_filter_anomaly_detection(df=gasoline_price_df, | |
column_name='Gasoline_Price', | |
number_of_stdevs_away_from_mean=3) | |
#Re-plot time series with color coding for anomaly column | |
scatterplot_with_color_coding(gasoline_price_df['Date'], | |
gasoline_price_df['Gasoline_Price'], | |
gasoline_price_df['Gasoline_Price_Low_Pass_Filter_Anomaly'], | |
'Date', | |
'Gasoline Price (Dollars Per Gallon)', | |
'Gasoline Prices, Color-Coded on Low-Pass Filter Anomalies') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment