Skip to content

Instantly share code, notes, and snippets.

@piyush01123
Last active March 16, 2023 01:37
Show Gist options
  • Save piyush01123/5229e673b4be04f58281d15905df228d to your computer and use it in GitHub Desktop.
Save piyush01123/5229e673b4be04f58281d15905df228d to your computer and use it in GitHub Desktop.
Naive Bayes classification of MNIST images http://web.iitd.ac.in/~bspanda/BY.pdf
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
import scipy.stats
digits = datasets.load_digits()
trainX, testX, trainY, testY = train_test_split(digits.images, digits.target, test_size=.2)
Y_freq = {y: sum(trainY==y) for y in range(10)}
X_stats = {y: {"X_mean": np.mean(trainX[trainY==y], axis=0), \
"X_std": np.std(trainX[trainY==y], axis=0) \
} \
for y in range(10)
}
P = np.zeros((testX.shape[0], 10))
for row, x in enumerate(testX):
probs = []
for y in range(10):
mean, std = X_stats[y]["X_mean"], X_stats[y]["X_std"]
A = scipy.stats.norm(mean, std).pdf(x)
B = np.where(np.all([x==mean, std==0], axis=0), 1, A)
C = np.where(np.all([x!=mean, std==0], axis=0), 1e-4, B)
probs.append(np.product(C)*Y_freq[y])
P[row] = probs
pred = np.argmax(P, axis=1)
acc = np.sum(pred==testY)/testY.shape[0]
print(acc)
@piyush01123
Copy link
Author

Note: Acc obtained is 88.88%

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment