Skip to content

Instantly share code, notes, and snippets.

@eileen-code4fun
Created June 11, 2021 14:03
Show Gist options
  • Save eileen-code4fun/d313de50d86d46b320684b7d920c758a to your computer and use it in GitHub Desktop.
Save eileen-code4fun/d313de50d86d46b320684b7d920c758a to your computer and use it in GitHub Desktop.
CIFAR10 PCA
import numpy as np
from sklearn import decomposition
# Reuse the same ds from Tensorflow Dataset
train_data = list(ds['train'].map(lambda x, y: tf.reshape(x, [-1])))
test_data = list(ds['test'].map(lambda x, y: tf.reshape(x, [-1])))
X = tf.concat([train_data, test_data], 0).numpy()
print(np.shape(X))
# Output: (60000, 3072)
pca = decomposition.PCA(n_components=300)
pca.fit(X)
X = pca.transform(X)
print(np.shape(X))
# Output: (60000, 300)
train_labels = list(ds['train'].map(lambda x, y: y))
test_labels = list(ds['test'].map(lambda x, y: y))
labels = tf.concat([train_labels, test_labels], 0).numpy()
def csv_dump(X, labels, filename):
with open(filename, 'w') as f:
for x, label in zip(X, labels):
f.write('{}'.format(label))
f.write(','.join(str(e) for e in x))
f.write('\n')
csv_dump(X[:50000, :], labels[:50000], 'train_pca.csv')
csv_dump(X[50000:, :], labels[50000:], 'test_pca.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment