Skip to content

Instantly share code, notes, and snippets.

@eldrin
Last active September 20, 2018 17:19
Show Gist options
  • Save eldrin/713d5a97d218209108a3018b281c5003 to your computer and use it in GitHub Desktop.
Save eldrin/713d5a97d218209108a3018b281c5003 to your computer and use it in GitHub Desktop.
A simple script for visualizing given feature vectors (`.npy` format) in 2-dimensional hyperspace.
from os.path import join, basename, splitext
import argparse
import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
# setup parser
parser = argparse.ArgumentParser()
parser.add_argument("X", help="filename of the feature file (`.npy`) to visualize")
parser.add_argument("y", help="filename of the label file (`.csv` or `.npy`) to visualize classes")
parser.add_argument("out_fn", help="filename for the outputing image (`.pdf`)")
args = parser.parse_args()
# load the feature file
X = np.load(args.X)
# load the label file
ext = splitext(args.y)[1]
if ext == '.csv':
with open(args.y) as f:
y = np.array([l.split('\n')[0] for l in f])
elif ext == '.npy':
y = np.load(args.y)
else:
raise NotImplementedError('{} is not supported!'.format(ext))
# check shape
if X.shape[0] != len(y):
raise ValueError('Feature & label should have same number of samples!')
# run the PCA
pca = PCA(2)
z = pca.fit_transform(X)
# markers
markers = ['o', '.', ',', 'x', '+', 'v', '^', '<', '>', 's', 'd']
# visualize per label
for k, label in enumerate(set(y)):
idx = np.where(y == label)[0]
plt.scatter(z[idx, 0], z[idx, 1], label=label,
marker=markers[(len(markers) % (k + 1)) - 1])
# save fig
plt.legend()
plt.tight_layout()
plt.savefig(args.out_fn)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment