Skip to content

Instantly share code, notes, and snippets.

@lucidjargon
Created November 23, 2011 03:59
Show Gist options
  • Save lucidjargon/1387847 to your computer and use it in GitHub Desktop.
Save lucidjargon/1387847 to your computer and use it in GitHub Desktop.
Simple naive bayes in F#
type Probability = Map<string , Map<string,float> * float>
let mapAddGeneric m a f i =
if m |> Map.containsKey a then
Map.add a (f m.[a]) m
else Map.add a i m
let inline pairAdd (a,b) (c,d) = a + c , b + d
let inline div (a,b) = a / b
let mapAddFeature m a = mapAddGeneric m a ((+) 1.) 1.
let mapAddClass m a x = mapAddGeneric m a (fun (_, i) -> x,i + 1.) (x,1.)
let mapGet m k i = if Map.containsKey k m then m.[k] else i
let tryGetClassProb (probabilities:Probability) dclass = mapGet probabilities dclass (Map.empty , 0.)
let splitDataClass (str:string) =
let spl = str.Split(',')
spl.[0..(spl.Length - 2)], spl.[spl.Length - 1]
let trainRow (probabilities : Probability) (datum : string) =
let data, dclass = splitDataClass datum
let conditionalFeatures, classCount = tryGetClassProb probabilities dclass
(data |> Array.fold (fun cmap word -> mapAddFeature cmap word) conditionalFeatures)
|> mapAddClass probabilities dclass
let trainData p data = data |> Array.fold trainRow p
let inline conditionalProbCounts cp featName = 0.5 + (mapGet (cp|> fst) featName 0.0) , 1.0 + (cp |> snd)
let conditionalProb cp featName = conditionalProbCounts cp featName |> div
let classProbability className (p:Probability) =
p |> Map.fold(fun (n,d) k v -> let fv = snd v
if k = className then (fv + n, fv + d)
else (n, fv + d)) (0.0,0.0) |> div
let featureProb feature (p : Probability) =
p |> Map.fold (fun pr cName fdist -> (pairAdd (conditionalProbCounts fdist feature) pr)) (0. , 0.) |> div
let naiveBayes dclass (features : string []) (p:Probability) =
let cp = tryGetClassProb p dclass
let fprob = Array.foldBack ((conditionalProb cp) >> ( * )) features 1.
(classProbability dclass p) * fprob
let naiveBayesProbability dclass (features : string []) (p:Probability) =
p |> Map.fold (fun (n,d) curClass _ ->
let p = naiveBayes curClass features p
if curClass = dclass then (p, p + d) else (n , d + p)) (0.,0.) |> div
let classMap p features nb = p |> Map.fold (fun listOfProbabilities className _ -> (className, nb className features p) :: listOfProbabilities) []
let classifyProbabilities p features = classMap p features naiveBayesProbability
let classify p features = classMap p features naiveBayes |> List.maxBy snd
let simpledata = [|"A,C,B"; "A,B"; "C,B"; "C,D"; "A,D"|]
let p = trainData Map.empty simpledata
let p1 = naiveBayes "B" [|"C"|] p
let p2 = naiveBayesProbability "B" [|"C"; "A" |] p
let c = classProbability "B" p
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment