eldrin · September 20, 2018 17:19
diff --git a/visualize_pca_2d.py b/visualize_pca_2d.py
 from os.path import join, basename, splitext
 import argparse

 import numpy as np
 from sklearn.decomposition import PCA
 import matplotlib.pyplot as plt

 # setup parser
 parser = argparse.ArgumentParser()
 parser.add_argument("X", help="filename of the feature file (`.npy`) to visualize")
 parser.add_argument("y", help="filename of the label file (`.csv` or `.npy`) to visualize classes")
 parser.add_argument("out_fn", help="filename for the outputing image (`.pdf`)")
 args = parser.parse_args()

 # load the feature file
 X = np.load(args.X)

 # load the label file
 ext = splitext(args.y)[1]
 if ext == '.csv':
    with open(args.y) as f:
        y = np.array([l.split('\n')[0] for l in f])
 elif ext == '.npy':
    y = np.load(args.y)
 else:
    raise NotImplementedError('{} is not supported!'.format(ext))

 # check shape
 if X.shape[0] != len(y):
    raise ValueError('Feature & label should have same number of samples!')

 # run the PCA
 pca = PCA(2)
 z = pca.fit_transform(X)

 # markers
 markers = ['o', '.', ',', 'x', '+', 'v', '^', '<', '>', 's', 'd']

 # visualize per label
 for k, label in enumerate(set(y)):
    idx = np.where(y == label)[0]
    plt.scatter(z[idx, 0], z[idx, 1], label=label,
                marker=markers[(len(markers) % (k + 1)) - 1])
    
 # save fig
 plt.legend()
 plt.tight_layout()
 plt.savefig(args.out_fn)
	from os.path import join, basename, splitext
	import argparse

	import numpy as np
	from sklearn.decomposition import PCA
	import matplotlib.pyplot as plt

	# setup parser
	parser = argparse.ArgumentParser()
	parser.add_argument("X", help="filename of the feature file (`.npy`) to visualize")
	parser.add_argument("y", help="filename of the label file (`.csv` or `.npy`) to visualize classes")
	parser.add_argument("out_fn", help="filename for the outputing image (`.pdf`)")
	args = parser.parse_args()

	# load the feature file
	X = np.load(args.X)

	# load the label file
	ext = splitext(args.y)[1]
	if ext == '.csv':
	with open(args.y) as f:
	y = np.array([l.split('\n')[0] for l in f])
	elif ext == '.npy':
	y = np.load(args.y)
	else:
	raise NotImplementedError('{} is not supported!'.format(ext))

	# check shape
	if X.shape[0] != len(y):
	raise ValueError('Feature & label should have same number of samples!')

	# run the PCA
	pca = PCA(2)
	z = pca.fit_transform(X)

	# markers
	markers = ['o', '.', ',', 'x', '+', 'v', '^', '<', '>', 's', 'd']

	# visualize per label
	for k, label in enumerate(set(y)):
	idx = np.where(y == label)[0]
	plt.scatter(z[idx, 0], z[idx, 1], label=label,
	marker=markers[(len(markers) % (k + 1)) - 1])

	# save fig
	plt.legend()
	plt.tight_layout()
	plt.savefig(args.out_fn)