Created
November 23, 2011 03:59
-
-
Save lucidjargon/1387847 to your computer and use it in GitHub Desktop.
Simple naive bayes in F#
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
type Probability = Map<string , Map<string,float> * float> | |
let mapAddGeneric m a f i = | |
if m |> Map.containsKey a then | |
Map.add a (f m.[a]) m | |
else Map.add a i m | |
let inline pairAdd (a,b) (c,d) = a + c , b + d | |
let inline div (a,b) = a / b | |
let mapAddFeature m a = mapAddGeneric m a ((+) 1.) 1. | |
let mapAddClass m a x = mapAddGeneric m a (fun (_, i) -> x,i + 1.) (x,1.) | |
let mapGet m k i = if Map.containsKey k m then m.[k] else i | |
let tryGetClassProb (probabilities:Probability) dclass = mapGet probabilities dclass (Map.empty , 0.) | |
let splitDataClass (str:string) = | |
let spl = str.Split(',') | |
spl.[0..(spl.Length - 2)], spl.[spl.Length - 1] | |
let trainRow (probabilities : Probability) (datum : string) = | |
let data, dclass = splitDataClass datum | |
let conditionalFeatures, classCount = tryGetClassProb probabilities dclass | |
(data |> Array.fold (fun cmap word -> mapAddFeature cmap word) conditionalFeatures) | |
|> mapAddClass probabilities dclass | |
let trainData p data = data |> Array.fold trainRow p |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
let inline conditionalProbCounts cp featName = 0.5 + (mapGet (cp|> fst) featName 0.0) , 1.0 + (cp |> snd) | |
let conditionalProb cp featName = conditionalProbCounts cp featName |> div | |
let classProbability className (p:Probability) = | |
p |> Map.fold(fun (n,d) k v -> let fv = snd v | |
if k = className then (fv + n, fv + d) | |
else (n, fv + d)) (0.0,0.0) |> div | |
let featureProb feature (p : Probability) = | |
p |> Map.fold (fun pr cName fdist -> (pairAdd (conditionalProbCounts fdist feature) pr)) (0. , 0.) |> div | |
let naiveBayes dclass (features : string []) (p:Probability) = | |
let cp = tryGetClassProb p dclass | |
let fprob = Array.foldBack ((conditionalProb cp) >> ( * )) features 1. | |
(classProbability dclass p) * fprob | |
let naiveBayesProbability dclass (features : string []) (p:Probability) = | |
p |> Map.fold (fun (n,d) curClass _ -> | |
let p = naiveBayes curClass features p | |
if curClass = dclass then (p, p + d) else (n , d + p)) (0.,0.) |> div | |
let classMap p features nb = p |> Map.fold (fun listOfProbabilities className _ -> (className, nb className features p) :: listOfProbabilities) [] | |
let classifyProbabilities p features = classMap p features naiveBayesProbability | |
let classify p features = classMap p features naiveBayes |> List.maxBy snd | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
let simpledata = [|"A,C,B"; "A,B"; "C,B"; "C,D"; "A,D"|] | |
let p = trainData Map.empty simpledata | |
let p1 = naiveBayes "B" [|"C"|] p | |
let p2 = naiveBayesProbability "B" [|"C"; "A" |] p | |
let c = classProbability "B" p |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment