Cute fish out of MNIST Handwritten Dataset [OC]
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
PCA Dimentionality Reduction of Handwritten Dataset from 784 to 2, normalizing and vizualizing.
def main():
normalization_constant = 255
# Loading Train Dataset
dataFrame_train = pd.read_csv('a3_datasets/datasets/digits/train.csv')
train = np.array(dataFrame_train.iloc[0:, 1:] / normalization_constant)
train_label = np.array(dataFrame_train['label'])
# data = {1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 0: []}
# for pixels, label in zip(train, train_label):
# if len(data[label]) < num_of_samples:
# data[label].append(pixels)
# Count the number of digits for each label
# count = {1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 0: 0}
# for i in train_label:
# count[i] += 1
# print count
pca = PCA(2, whiten=True)
train = pca.fit_transform(train)
label_color = {1:'r', 2: 'b', 3: 'g', 4: 'c', 5: 'm', 6: 'y', 7: '0.75', 8: 'w', 9: '#87fc70', 0: '#ffc0cb'}
plt.scatter(train[:, 0], train[:, 1])
for label, x, y in zip(train_label, train[:, 0], train[:, 1]):
xy = (x, y), xytext = (0,0),
textcoords = 'offset points', ha = 'right', va = 'bottom',
bbox = dict(boxstyle = 'round, pad=0.5', fc = label_color[label], alpha = 0.5),
arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'))
# Graph
plt.title('MNIST Dataset reduced to 2 Components using PCA')
plt.xlabel('Component 1')
plt.ylabel('Component 2')
if __name__ == '__main__':
filename: 2000.png
Digits 0, and 1 are easily distinguishable but on the other hand
the only thing stopping me from stating 2 and 3 also as distinguishable
is the stark contrast in color I've set.
