Skip to content

Instantly share code, notes, and snippets.

@kperry2215
Created August 24, 2019 00:29
Show Gist options
  • Save kperry2215/0f2d8e7e3d5cefb7e8736f6a88329f68 to your computer and use it in GitHub Desktop.
Save kperry2215/0f2d8e7e3d5cefb7e8736f6a88329f68 to your computer and use it in GitHub Desktop.
def one_class_SVM_anomaly_detection(dataframe, columns_to_filter_by, outliers_fraction):
"""
In this definition, time series anomalies are detected
using a One Class SVM algorithm.
Arguments:
df: Pandas dataframe
columns_to_filter_by: string, or list of strings. Name of the column(s) that
we want to use in the One Class SVM to detect time series anomalies
outliers_fraction: float. Percentage of outliers allowed in the sequence.
Outputs:
df: Pandas dataframe with column for detected One Class SVM anomalies (True/False)
"""
#Subset the dataframe by desired columns
dataframe_filtered_columns=dataframe[columns_to_filter_by]
#Transform the data using the fit_transform() function
min_max_scaler = preprocessing.StandardScaler()
np_scaled = min_max_scaler.fit_transform(dataframe_filtered_columns)
scaled_dataframe = pd.DataFrame(np_scaled)
#Remove any NaN's from the dataframe
scaled_dataframe =scaled_dataframe.dropna()
#Train the One Class SVM
model = svm.OneClassSVM(nu=outliers_fraction, kernel="rbf", gamma=0.01)
model.fit(scaled_dataframe)
#Create a column for the anomaly
one_class_svm_anomaly_column='One_Class_SVM_Anomaly'
scaled_dataframe[one_class_svm_anomaly_column] = pd.Series(model.predict(
scaled_dataframe)).map({1: False, -1: True})
dataframe[one_class_svm_anomaly_column] = scaled_dataframe[one_class_svm_anomaly_column]
return dataframe
##EXECUTE IN MAIN BLOCK
#APPLY ONE CLASS SVM ALGORITHM TO DETECT ANOMALIES
#Create some columns to cluster against
#Create 6-value rolling average for gasoline prices
gasoline_price_df['Gasoline_Price_Rolling_Average_6_pt']=gasoline_price_df['Gasoline_Price'].rolling(window=6, center=True).mean()
#Calculate difference between gas price and rolling average value
gasoline_price_df['Gasoline_Price_Diff_From_Rolling_Avg']=gasoline_price_df['Gasoline_Price']-gasoline_price_df['Gasoline_Price_Rolling_Average_6_pt']
#Implement One Class SVM to time series
gasoline_price_df=one_class_SVM_anomaly_detection(dataframe=gasoline_price_df,
columns_to_filter_by=['Gasoline_Price_Diff_From_Rolling_Avg',
'Gasoline_Price'],
outliers_fraction=.1)
#Re-plot time series with color coding for anomaly column
scatterplot_with_color_coding(gasoline_price_df['Date'],
gasoline_price_df['Gasoline_Price'],
gasoline_price_df['One_Class_SVM_Anomaly'],
'Date',
'Gasoline Price (Dollars Per Gallon)',
'Gasoline Prices, Color-Coded on One Class SVM Anomalies')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment