Created
October 25, 2017 19:56
-
-
Save sergiks/f3197087b53eeca153b9c4cad3e25a7e to your computer and use it in GitHub Desktop.
Naive Bayes attempt to classify boolean data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Feature is 512 1-s or 0-s, label is 1 or 0 | |
Data represents some bio research on components added to a mixture, | |
all are independant one from another. | |
Outcome is passing some threshold for a further research. | |
""" | |
import numpy as np | |
from sklearn.naive_bayes import GaussianNB | |
def getData(): | |
features = np.reshape( | |
np.fromfile("bytes/data600.dat", dtype=np.uint8), | |
[-1,512] | |
) | |
labels = np.fromfile("bytes/labels600.dat", dtype=np.uint8) | |
split = 599900 | |
return features[:split], labels[:split], features[split:], labels[split:] | |
# Initialize | |
Xtrain, Ytrain, Xtest, Ytest = getData() | |
naiveBayes = GaussianNB() | |
# Train | |
naiveBayes.fit(Xtrain, Ytrain) | |
# Make prediction | |
prediction = naiveBayes.predict(Xtest) | |
# Evaluate | |
mismatched = np.logical_xor(prediction, Ytest) | |
accuracy = (len(Ytest) - np.count_nonzero(mismatched)) / len(Ytest) | |
print("Accuracy:", accuracy) | |
print("Expectation:", Ytest[:20],"Reality:", prediction[:20]) | |
""" | |
Accuracy: 0.47 | |
Expectation: [1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0] | |
Reality: [1 0 0 1 1 1 1 0 0 0 0 1 0 1 1 1 1 1 1 1] | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment