Created
October 9, 2015 15:58
-
-
Save edubart/96020963f91283e26c2a to your computer and use it in GitHub Desktop.
Gaussian Naive Bayers Classifier
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'torch' | |
_ = require 'moses' | |
csv2tensor = require 'csv2tensor' | |
local dataset = csv2tensor.load('pima-indians-diabetes.data') | |
local trainRatio = 0.67 | |
local dataSize = dataset:size(1) | |
local trainSize = torch.floor(dataSize * trainRatio) | |
local testSize = dataSize - trainSize | |
local trainData = dataset:narrow(1, 1, trainSize) | |
local testData = dataset:narrow(1, trainSize+1, testSize) | |
local featuresSize = trainData:size(2)-1 | |
local featuresTrain = trainData:narrow(2,1,featuresSize) | |
local labelsTrain = trainData:select(2, featuresSize+1) | |
local featuresTest = testData:narrow(2,1,featuresSize) | |
local labelsTest = testData:select(2, featuresSize+1) | |
local classCount = {} | |
local classFeatures = {} | |
local pXC = {} | |
local pC = {} | |
local numClasses = 0 | |
print(string.format('split %d rows into train=%d and test=%d rows', dataSize, trainSize, testSize)) | |
function calcProbability(x, mean, std) | |
return torch.cdiv(torch.exp(-torch.cdiv(torch.pow(x-mean, 2),torch.pow(std,2)*2)), std*torch.sqrt(2*math.pi)) | |
end | |
for i=1,trainSize do | |
local class = labelsTrain[i] | |
classCount[class] = (classCount[class] or 0) + 1 | |
end | |
for class,classSize in pairs(classCount) do | |
classFeatures[class] = torch.Tensor(classSize, featuresSize) | |
classCount[class] = 1 | |
numClasses = numClasses + 1 | |
end | |
for i=1,trainSize do | |
local class = labelsTrain[i] | |
local classIndex = classCount[class] | |
classFeatures[class][classIndex] = featuresTrain[i] | |
classCount[class] = classIndex + 1 | |
end | |
for class,features in pairs(classFeatures) do | |
local n = features:size(1) | |
pXC[class] = { | |
mean = torch.mean(features,1):select(1,1), | |
std = torch.std(features,1):select(1,1) | |
} | |
pC[class] = n/trainSize | |
end | |
local accuracy = 0 | |
for i=1,featuresTest:size(1) do | |
--local p = _.clone(pC) | |
local p = _.map(pC, function(k,v) return 1 end) | |
for class,pp in pairs(p) do | |
local fp = pXC[class] | |
p[class] = pp * torch.prod(calcProbability(featuresTest[i], fp.mean, fp.std)) | |
end | |
local outP = 0 | |
local out = nil | |
local totalP = 0 | |
for class,pp in pairs(p) do | |
totalP = totalP + pp | |
if pp > outP then | |
out = class | |
outP = pp | |
end | |
end | |
outP = outP / totalP | |
if out == labelsTest[i] then | |
accuracy = accuracy + 1 | |
end | |
end | |
accuracy = 100 * accuracy / testSize | |
print('accuracy:', accuracy) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment