Created
August 24, 2019 00:29
-
-
Save kperry2215/0f2d8e7e3d5cefb7e8736f6a88329f68 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def one_class_SVM_anomaly_detection(dataframe, columns_to_filter_by, outliers_fraction): | |
""" | |
In this definition, time series anomalies are detected | |
using a One Class SVM algorithm. | |
Arguments: | |
df: Pandas dataframe | |
columns_to_filter_by: string, or list of strings. Name of the column(s) that | |
we want to use in the One Class SVM to detect time series anomalies | |
outliers_fraction: float. Percentage of outliers allowed in the sequence. | |
Outputs: | |
df: Pandas dataframe with column for detected One Class SVM anomalies (True/False) | |
""" | |
#Subset the dataframe by desired columns | |
dataframe_filtered_columns=dataframe[columns_to_filter_by] | |
#Transform the data using the fit_transform() function | |
min_max_scaler = preprocessing.StandardScaler() | |
np_scaled = min_max_scaler.fit_transform(dataframe_filtered_columns) | |
scaled_dataframe = pd.DataFrame(np_scaled) | |
#Remove any NaN's from the dataframe | |
scaled_dataframe =scaled_dataframe.dropna() | |
#Train the One Class SVM | |
model = svm.OneClassSVM(nu=outliers_fraction, kernel="rbf", gamma=0.01) | |
model.fit(scaled_dataframe) | |
#Create a column for the anomaly | |
one_class_svm_anomaly_column='One_Class_SVM_Anomaly' | |
scaled_dataframe[one_class_svm_anomaly_column] = pd.Series(model.predict( | |
scaled_dataframe)).map({1: False, -1: True}) | |
dataframe[one_class_svm_anomaly_column] = scaled_dataframe[one_class_svm_anomaly_column] | |
return dataframe | |
##EXECUTE IN MAIN BLOCK | |
#APPLY ONE CLASS SVM ALGORITHM TO DETECT ANOMALIES | |
#Create some columns to cluster against | |
#Create 6-value rolling average for gasoline prices | |
gasoline_price_df['Gasoline_Price_Rolling_Average_6_pt']=gasoline_price_df['Gasoline_Price'].rolling(window=6, center=True).mean() | |
#Calculate difference between gas price and rolling average value | |
gasoline_price_df['Gasoline_Price_Diff_From_Rolling_Avg']=gasoline_price_df['Gasoline_Price']-gasoline_price_df['Gasoline_Price_Rolling_Average_6_pt'] | |
#Implement One Class SVM to time series | |
gasoline_price_df=one_class_SVM_anomaly_detection(dataframe=gasoline_price_df, | |
columns_to_filter_by=['Gasoline_Price_Diff_From_Rolling_Avg', | |
'Gasoline_Price'], | |
outliers_fraction=.1) | |
#Re-plot time series with color coding for anomaly column | |
scatterplot_with_color_coding(gasoline_price_df['Date'], | |
gasoline_price_df['Gasoline_Price'], | |
gasoline_price_df['One_Class_SVM_Anomaly'], | |
'Date', | |
'Gasoline Price (Dollars Per Gallon)', | |
'Gasoline Prices, Color-Coded on One Class SVM Anomalies') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment