Created
August 24, 2019 00:25
-
-
Save kperry2215/c8d0ee3f9ec821a64be0c13dd1c89c09 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def isolation_forest_anomaly_detection(df, | |
column_name, | |
outliers_fraction): | |
""" | |
In this definition, time series anomalies are detected using an Isolation Forest algorithm. | |
Arguments: | |
df: Pandas dataframe | |
column_name: string. Name of the column that we want to detect anomalies in | |
outliers_fraction: float. Percentage of outliers allowed in the sequence. | |
Outputs: | |
df: Pandas dataframe with column for detected Isolation Forest anomalies (True/False) | |
""" | |
#Scale the column that we want to flag for anomalies | |
min_max_scaler = preprocessing.StandardScaler() | |
np_scaled = min_max_scaler.fit_transform(df[[column_name]]) | |
scaled_time_series = pd.DataFrame(np_scaled) | |
# train isolation forest | |
model = IsolationForest(contamination = outliers_fraction, behaviour='new') | |
model.fit(scaled_time_series) | |
#Generate column for Isolation Forest-detected anomalies | |
isolation_forest_anomaly_column = column_name+'_Isolation_Forest_Anomaly' | |
df[isolation_forest_anomaly_column] = model.predict(scaled_time_series) | |
df[isolation_forest_anomaly_column] = df[isolation_forest_anomaly_column].map( {1: False, -1: True} ) | |
return df | |
##EXECUTE IN MAIN BLOCK | |
#APPLY ISOLATION FOREST TO DETECT ANOMALIES | |
gasoline_price_df=isolation_forest_anomaly_detection(df=gasoline_price_df, | |
column_name='Gasoline_Price', | |
outliers_fraction=.04) | |
#Re-plot time series with color coding for anomaly column | |
scatterplot_with_color_coding(gasoline_price_df['Date'], | |
gasoline_price_df['Gasoline_Price'], | |
gasoline_price_df['Gasoline_Price_Isolation_Forest_Anomaly'], | |
'Date', | |
'Gasoline Price (Dollars Per Gallon)', | |
'Gasoline Prices, Color-Coded on Isolation Forest Anomalies') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment