gvanhorn38 · October 11, 2018 16:40
diff --git a/inat_classifier_from_prelogits.py b/inat_classifier_from_prelogits.py
 import json
 import os

 import numpy as np
 from sklearn.metrics import accuracy_score
 from sklearn.svm import LinearSVC

 # File Paths
 DATA_FOLDER = '' # Fill in
 TRAIN_DATASET_FP = os.path.join(DATA_FOLDER, 'train2018.json'); assert os.path.exists(TRAIN_DATASET_FP), "Train json file not found"
 VAL_DATASET_FP = os.path.join(DATA_FOLDER, 'val2018.json'); assert os.path.exists(VAL_DATASET_FP), "Validation json file not found"
 TRAIN_FEATURES_FP = os.path.join(DATA_FOLDER, 'train2018_prelogits.npz'); assert os.path.exists(TRAIN_FEATURES_FP), "Train feature file not found"
 VAL_FEATURES_FP = os.path.join(DATA_FOLDER, 'val2018_prelogits.npz'); assert os.path.exists(VAL_FEATURES_FP), "Validation feature file not found"

 # Load in the train and validation datasets
 # Data format can be found here: https://github.com/visipedia/inat_comp#annotation-format
 with open(TRAIN_DATASET_FP) as f:
    train_dataset = json.load(f)
 with open(VAL_DATASET_FP) as f:
    val_dataset = json.load(f)

 # Make a map from image id to the class label. The feature files contain image ids and not class labels
 train_image_id_to_class_id = {anno['image_id'] : anno['category_id'] for anno in train_dataset['annotations']}
 val_image_id_to_class_id = {anno['image_id'] : anno['category_id'] for anno in val_dataset['annotations']}

 # Load in the features extracted from a NN
 # Train Features
 train_features = np.load(TRAIN_FEATURES_FP)
 train_X = train_features['PreLogits']
 train_image_ids = train_features['ids'].astype(np.int)
 train_y = np.array([train_image_id_to_class_id[image_id] for image_id in train_image_ids])

 # Val Features
 val_features = np.load(VAL_FEATURES_FP)
 val_X = val_features['PreLogits']
 val_image_ids = val_features['ids'].astype(np.int)
 val_y = np.array([val_image_id_to_class_id[image_id] for image_id in val_image_ids])

 # Train a linear SVM
 model = LinearSVC(
    penalty='l2',
    loss='squared_hinge',
    dual=False,
    tol=0.001,
    C=1.0,
    multi_class='ovr',
    max_iter=100,
    verbose=1
 )

 model.fit(train_X, train_y)
 val_pred_y = model.predict(val_X)

 print("Accuracy: %0.3f" % (accuracy_score(val_y, val_pred_y),)
	import json
	import os

	import numpy as np
	from sklearn.metrics import accuracy_score
	from sklearn.svm import LinearSVC

	# File Paths
	DATA_FOLDER = '' # Fill in
	TRAIN_DATASET_FP = os.path.join(DATA_FOLDER, 'train2018.json'); assert os.path.exists(TRAIN_DATASET_FP), "Train json file not found"
	VAL_DATASET_FP = os.path.join(DATA_FOLDER, 'val2018.json'); assert os.path.exists(VAL_DATASET_FP), "Validation json file not found"
	TRAIN_FEATURES_FP = os.path.join(DATA_FOLDER, 'train2018_prelogits.npz'); assert os.path.exists(TRAIN_FEATURES_FP), "Train feature file not found"
	VAL_FEATURES_FP = os.path.join(DATA_FOLDER, 'val2018_prelogits.npz'); assert os.path.exists(VAL_FEATURES_FP), "Validation feature file not found"

	# Load in the train and validation datasets
	# Data format can be found here: https://github.com/visipedia/inat_comp#annotation-format
	with open(TRAIN_DATASET_FP) as f:
	train_dataset = json.load(f)
	with open(VAL_DATASET_FP) as f:
	val_dataset = json.load(f)

	# Make a map from image id to the class label. The feature files contain image ids and not class labels
	train_image_id_to_class_id = {anno['image_id'] : anno['category_id'] for anno in train_dataset['annotations']}
	val_image_id_to_class_id = {anno['image_id'] : anno['category_id'] for anno in val_dataset['annotations']}

	# Load in the features extracted from a NN
	# Train Features
	train_features = np.load(TRAIN_FEATURES_FP)
	train_X = train_features['PreLogits']
	train_image_ids = train_features['ids'].astype(np.int)
	train_y = np.array([train_image_id_to_class_id[image_id] for image_id in train_image_ids])

	# Val Features
	val_features = np.load(VAL_FEATURES_FP)
	val_X = val_features['PreLogits']
	val_image_ids = val_features['ids'].astype(np.int)
	val_y = np.array([val_image_id_to_class_id[image_id] for image_id in val_image_ids])

	# Train a linear SVM
	model = LinearSVC(
	penalty='l2',
	loss='squared_hinge',
	dual=False,
	tol=0.001,
	C=1.0,
	multi_class='ovr',
	max_iter=100,
	verbose=1
	)

	model.fit(train_X, train_y)
	val_pred_y = model.predict(val_X)

	print("Accuracy: %0.3f" % (accuracy_score(val_y, val_pred_y),)