Skip to content

Instantly share code, notes, and snippets.

@edubart
Created October 9, 2015 15:58
Show Gist options
  • Save edubart/96020963f91283e26c2a to your computer and use it in GitHub Desktop.
Save edubart/96020963f91283e26c2a to your computer and use it in GitHub Desktop.
Gaussian Naive Bayers Classifier
require 'torch'
_ = require 'moses'
csv2tensor = require 'csv2tensor'
local dataset = csv2tensor.load('pima-indians-diabetes.data')
local trainRatio = 0.67
local dataSize = dataset:size(1)
local trainSize = torch.floor(dataSize * trainRatio)
local testSize = dataSize - trainSize
local trainData = dataset:narrow(1, 1, trainSize)
local testData = dataset:narrow(1, trainSize+1, testSize)
local featuresSize = trainData:size(2)-1
local featuresTrain = trainData:narrow(2,1,featuresSize)
local labelsTrain = trainData:select(2, featuresSize+1)
local featuresTest = testData:narrow(2,1,featuresSize)
local labelsTest = testData:select(2, featuresSize+1)
local classCount = {}
local classFeatures = {}
local pXC = {}
local pC = {}
local numClasses = 0
print(string.format('split %d rows into train=%d and test=%d rows', dataSize, trainSize, testSize))
function calcProbability(x, mean, std)
return torch.cdiv(torch.exp(-torch.cdiv(torch.pow(x-mean, 2),torch.pow(std,2)*2)), std*torch.sqrt(2*math.pi))
end
for i=1,trainSize do
local class = labelsTrain[i]
classCount[class] = (classCount[class] or 0) + 1
end
for class,classSize in pairs(classCount) do
classFeatures[class] = torch.Tensor(classSize, featuresSize)
classCount[class] = 1
numClasses = numClasses + 1
end
for i=1,trainSize do
local class = labelsTrain[i]
local classIndex = classCount[class]
classFeatures[class][classIndex] = featuresTrain[i]
classCount[class] = classIndex + 1
end
for class,features in pairs(classFeatures) do
local n = features:size(1)
pXC[class] = {
mean = torch.mean(features,1):select(1,1),
std = torch.std(features,1):select(1,1)
}
pC[class] = n/trainSize
end
local accuracy = 0
for i=1,featuresTest:size(1) do
--local p = _.clone(pC)
local p = _.map(pC, function(k,v) return 1 end)
for class,pp in pairs(p) do
local fp = pXC[class]
p[class] = pp * torch.prod(calcProbability(featuresTest[i], fp.mean, fp.std))
end
local outP = 0
local out = nil
local totalP = 0
for class,pp in pairs(p) do
totalP = totalP + pp
if pp > outP then
out = class
outP = pp
end
end
outP = outP / totalP
if out == labelsTest[i] then
accuracy = accuracy + 1
end
end
accuracy = 100 * accuracy / testSize
print('accuracy:', accuracy)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment