piyush01123 · March 16, 2023 01:37 · piyush01123 · Mar 5, 2020
diff --git a/naiveBayes.py b/naiveBayes.py

 import numpy as np
 from sklearn import datasets
 from sklearn.model_selection import train_test_split
 import scipy.stats

 digits = datasets.load_digits()
 trainX, testX, trainY, testY = train_test_split(digits.images, digits.target, test_size=.2)

 Y_freq = {y: sum(trainY==y) for y in range(10)}
 X_stats = {y: {"X_mean": np.mean(trainX[trainY==y], axis=0), \
               "X_std": np.std(trainX[trainY==y], axis=0) \
              } \
           for y in range(10)
          }


 P = np.zeros((testX.shape[0], 10))
 for row, x in enumerate(testX):
    probs = []
    for y in range(10):
        mean, std = X_stats[y]["X_mean"], X_stats[y]["X_std"]
        A = scipy.stats.norm(mean, std).pdf(x)
        B = np.where(np.all([x==mean, std==0], axis=0), 1, A)
        C = np.where(np.all([x!=mean, std==0], axis=0), 1e-4, B)
        probs.append(np.product(C)*Y_freq[y])
    P[row] = probs

 pred = np.argmax(P, axis=1)

 acc = np.sum(pred==testY)/testY.shape[0]

 print(acc)

	import numpy as np
	from sklearn import datasets
	from sklearn.model_selection import train_test_split
	import scipy.stats

	digits = datasets.load_digits()
	trainX, testX, trainY, testY = train_test_split(digits.images, digits.target, test_size=.2)

	Y_freq = {y: sum(trainY==y) for y in range(10)}
	X_stats = {y: {"X_mean": np.mean(trainX[trainY==y], axis=0), \
	"X_std": np.std(trainX[trainY==y], axis=0) \
	} \
	for y in range(10)
	}


	P = np.zeros((testX.shape[0], 10))
	for row, x in enumerate(testX):
	probs = []
	for y in range(10):
	mean, std = X_stats[y]["X_mean"], X_stats[y]["X_std"]
	A = scipy.stats.norm(mean, std).pdf(x)
	B = np.where(np.all([x==mean, std==0], axis=0), 1, A)
	C = np.where(np.all([x!=mean, std==0], axis=0), 1e-4, B)
	probs.append(np.product(C)*Y_freq[y])
	P[row] = probs

	pred = np.argmax(P, axis=1)

	acc = np.sum(pred==testY)/testY.shape[0]

	print(acc)