Skip to content

Instantly share code, notes, and snippets.

@josepsmartinez
Created February 19, 2021 20:14
Show Gist options
  • Save josepsmartinez/bf0437a92f97c94a98df9eaa4347cb32 to your computer and use it in GitHub Desktop.
Save josepsmartinez/bf0437a92f97c94a98df9eaa4347cb32 to your computer and use it in GitHub Desktop.
Anomaly Detection with OneClassSVM and Isolation Forest
import numpy as np
import pandas as pd
from sklearn.svm import OneClassSVM
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
def df2np(df, dtype=np.float64):
return df.to_numpy().astype(dtype)
limit_samples = None
kf = KFold()
# classifier = OneClassSVM()
classifier = IsolationForest()
df = pd.read_csv("features.csv")
# filter NA values
df = df[df['det_conf'].notnull()]
# split given label
ok_df = df[df['label']=="ok"]
nok_df = df[df['label']=="nok"]
# select only features columns
feature_columns = ['det_width', 'det_height', 'det_ar', 'det_conf',
'det_lm_0_x', 'det_lm_1_x', 'det_lm_2_x', 'det_lm_3_x', 'det_lm_4_x',
'det_lm_0_y', 'det_lm_1_y', 'det_lm_2_y', 'det_lm_3_y', 'det_lm_4_y']
ok_df = ok_df[feature_columns]
nok_df = nok_df[feature_columns]
# build numpy array
X_ok = df2np(ok_df)[:limit_samples]
X_nok = df2np(nok_df)[:limit_samples]
# cross-validation
ok_accs = []
nok_accs = []
for train_index, test_index in kf.split(X_ok):
X_train, X_test = X_ok[train_index], X_ok[test_index]
classifier.fit(X_train)
ok_preds = classifier.predict(X_test)
ok_accs.append(accuracy_score([1 for _ in ok_preds], ok_preds))
nok_preds = classifier.predict(X_nok)
nok_accs.append(accuracy_score([-1 for _ in nok_preds], nok_preds))
print(f"OK Accuracy: {np.mean(ok_accs), np.std(ok_accs)}")
print(f"NOK Accuracy: {np.mean(nok_accs), np.std(nok_accs)}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment