Created
October 11, 2018 16:40
-
-
Save gvanhorn38/a24c9269c260f6a396b814d62524e489 to your computer and use it in GitHub Desktop.
Simple iNaturalist Classifier from Features
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import os | |
import numpy as np | |
from sklearn.metrics import accuracy_score | |
from sklearn.svm import LinearSVC | |
# File Paths | |
DATA_FOLDER = '' # Fill in | |
TRAIN_DATASET_FP = os.path.join(DATA_FOLDER, 'train2018.json'); assert os.path.exists(TRAIN_DATASET_FP), "Train json file not found" | |
VAL_DATASET_FP = os.path.join(DATA_FOLDER, 'val2018.json'); assert os.path.exists(VAL_DATASET_FP), "Validation json file not found" | |
TRAIN_FEATURES_FP = os.path.join(DATA_FOLDER, 'train2018_prelogits.npz'); assert os.path.exists(TRAIN_FEATURES_FP), "Train feature file not found" | |
VAL_FEATURES_FP = os.path.join(DATA_FOLDER, 'val2018_prelogits.npz'); assert os.path.exists(VAL_FEATURES_FP), "Validation feature file not found" | |
# Load in the train and validation datasets | |
# Data format can be found here: https://github.com/visipedia/inat_comp#annotation-format | |
with open(TRAIN_DATASET_FP) as f: | |
train_dataset = json.load(f) | |
with open(VAL_DATASET_FP) as f: | |
val_dataset = json.load(f) | |
# Make a map from image id to the class label. The feature files contain image ids and not class labels | |
train_image_id_to_class_id = {anno['image_id'] : anno['category_id'] for anno in train_dataset['annotations']} | |
val_image_id_to_class_id = {anno['image_id'] : anno['category_id'] for anno in val_dataset['annotations']} | |
# Load in the features extracted from a NN | |
# Train Features | |
train_features = np.load(TRAIN_FEATURES_FP) | |
train_X = train_features['PreLogits'] | |
train_image_ids = train_features['ids'].astype(np.int) | |
train_y = np.array([train_image_id_to_class_id[image_id] for image_id in train_image_ids]) | |
# Val Features | |
val_features = np.load(VAL_FEATURES_FP) | |
val_X = val_features['PreLogits'] | |
val_image_ids = val_features['ids'].astype(np.int) | |
val_y = np.array([val_image_id_to_class_id[image_id] for image_id in val_image_ids]) | |
# Train a linear SVM | |
model = LinearSVC( | |
penalty='l2', | |
loss='squared_hinge', | |
dual=False, | |
tol=0.001, | |
C=1.0, | |
multi_class='ovr', | |
max_iter=100, | |
verbose=1 | |
) | |
model.fit(train_X, train_y) | |
val_pred_y = model.predict(val_X) | |
print("Accuracy: %0.3f" % (accuracy_score(val_y, val_pred_y),) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment