Created
March 24, 2013 23:23
-
-
Save sir-deenicus/5234043 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
type Variable = Strings of float | Number of float * float * float | |
type Probability = Map<string , Map<string,Variable> * float> | |
let inline pairAdd (a,b) (c,d) = a + c , b + d | |
let inline div (a,b) = if b = 0. then a else a / b | |
let strOfVar = function Strings(n) -> n | _ -> 0. | |
let numOfVar = function Number (var,mean, n) -> (var,mean,n) | _ -> (0.,0.,0.) | |
let mapAddFeature (m,col) item = | |
let key, n,isnum = match isDouble item with | |
| Some n -> string col, n, true | |
| None -> item, 0., false | |
(if Map.containsKey key m then | |
m.Add(key,match m.[key] with | |
| Strings(v) -> Strings(v + 1.) | |
| Number(var, mean, count) -> Number(online_variance_Mean var mean count n )) | |
else m.Add(key, (if isnum then Number(online_variance_Mean 0. n 1. n) else Strings(1.)))), col + 1 | |
let mapAddClass m a x = mapAddGeneric m a (fun (_, i) -> x,i + 1.) (x,1.) | |
let tryGetClassProb (probabilities:Probability) dclass = mapGet probabilities dclass (Map.empty , 0.) | |
let trainRow (probabilities : Probability) (datum : string) = | |
let data, dclass = splitDataClass datum [|","|] | |
let conditionalFeatures, classCount = tryGetClassProb probabilities dclass | |
(data |> Array.fold mapAddFeature (conditionalFeatures,0)) |> fst | |
|> mapAddClass probabilities dclass | |
let trainData p data = data |> Array.fold trainRow p | |
let pi = System.Math.PI | |
let inline gaussian x mean stddev = 1./(stddev * sqrt(2. * pi)) * exp(-0.5 * ((x - mean)/stddev)**2.0) | |
let conditionalProbCounts cp featName col = | |
let condprob = fst cp | |
let key, x, isnum = match isDouble featName with Some x -> string col,x, true |_-> featName , 0., false | |
let p = if Map.containsKey key condprob then | |
match condprob.[key] with | |
| Strings v-> v | |
| Number (var,mean, n) -> let dof , variance = (log n) ** 2., if var = 0. then 1. else var/(n-1.) | |
if dof <= 30. then | |
let student = StudentT (mean, sqrt variance, dof) in student.Density(x) | |
else gaussian x mean (sqrt variance) | |
else 0. | |
if isnum then p ,0. else 0.5 + p, 1.0 + (cp |> snd) | |
let conditionalProb cp featName col = conditionalProbCounts cp featName col |> div | |
let classProbability className (p:Probability) = | |
p |> Map.fold(fun (n,d) k v -> let fv = snd v | |
if k = className then (fv + n, fv + d) | |
else (n, fv + d)) (0.0,0.0) |> div | |
let featureProb feature (p : Probability) = | |
p |> Map.fold (fun (pr,c) cName fdist -> (pairAdd (conditionalProbCounts fdist feature c) pr), c + 1) ((0. , 0.), 0) |> fst |> div | |
let naiveBayes dclass (features : string []) (p:Probability) = | |
let cp = tryGetClassProb p dclass | |
let fprob,_ = Array.fold (fun (prob, col) feat -> prob * conditionalProb cp feat col, col + 1) (1.,0) features | |
(classProbability dclass p) * fprob | |
let naiveBayesProbability dclass (features : string []) (p:Probability) = | |
p |> Map.fold (fun (n,d) curClass _ -> | |
let p = naiveBayes curClass features p | |
if curClass = dclass then (p, p + d) else (n , d + p)) (0.,0.) |> div | |
let classMap p features nb = p |> Map.fold (fun listOfProbabilities className _ -> (className, nb className features p) :: listOfProbabilities) [] | |
let classifyProbabilities p features = classMap p features naiveBayesProbability | |
let classify p features = classMap p features naiveBayes |> List.maxBy snd | |
////////////////// | |
type NaiveBayesClass () = | |
inherit ClassifierAbstract () | |
let mutable probability = Map.empty | |
override self.Train data = | |
probability <- trainData probability (dataAsStr data) | |
override self.Classify feature = classify probability (feature |> strArrFromDPoint) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment