Last active
September 20, 2018 17:19
-
-
Save eldrin/713d5a97d218209108a3018b281c5003 to your computer and use it in GitHub Desktop.
A simple script for visualizing given feature vectors (`.npy` format) in 2-dimensional hyperspace.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from os.path import join, basename, splitext | |
import argparse | |
import numpy as np | |
from sklearn.decomposition import PCA | |
import matplotlib.pyplot as plt | |
# setup parser | |
parser = argparse.ArgumentParser() | |
parser.add_argument("X", help="filename of the feature file (`.npy`) to visualize") | |
parser.add_argument("y", help="filename of the label file (`.csv` or `.npy`) to visualize classes") | |
parser.add_argument("out_fn", help="filename for the outputing image (`.pdf`)") | |
args = parser.parse_args() | |
# load the feature file | |
X = np.load(args.X) | |
# load the label file | |
ext = splitext(args.y)[1] | |
if ext == '.csv': | |
with open(args.y) as f: | |
y = np.array([l.split('\n')[0] for l in f]) | |
elif ext == '.npy': | |
y = np.load(args.y) | |
else: | |
raise NotImplementedError('{} is not supported!'.format(ext)) | |
# check shape | |
if X.shape[0] != len(y): | |
raise ValueError('Feature & label should have same number of samples!') | |
# run the PCA | |
pca = PCA(2) | |
z = pca.fit_transform(X) | |
# markers | |
markers = ['o', '.', ',', 'x', '+', 'v', '^', '<', '>', 's', 'd'] | |
# visualize per label | |
for k, label in enumerate(set(y)): | |
idx = np.where(y == label)[0] | |
plt.scatter(z[idx, 0], z[idx, 1], label=label, | |
marker=markers[(len(markers) % (k + 1)) - 1]) | |
# save fig | |
plt.legend() | |
plt.tight_layout() | |
plt.savefig(args.out_fn) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment