Skip to content

Instantly share code, notes, and snippets.

@paralax
Created May 4, 2017 19:55
Show Gist options
  • Save paralax/0801babfba22bfa1a0316863330c6667 to your computer and use it in GitHub Desktop.
Save paralax/0801babfba22bfa1a0316863330c6667 to your computer and use it in GitHub Desktop.
towards implementing the Jensen-Shannon divergence metric, IEEE TRANSACTIONS ON INFORMATION THEORY. VOL. 37, NO. I, JANUARY 1991
let entropy (s) : float =
let p = string(s).ToCharArray()
|> Seq.groupBy (fun x -> x)
|> Seq.map (fun (x,y) -> Seq.length y)
-1.0 * ([ for count in p ->
float(count)/float(String.length(s)) *
System.Math.Log(float(count)/float(String.length(s)), 2.0) ]
|> Seq.sum )
let ngrams (s : string) (n: int) : Map<string,int> =
s.ToCharArray()
|> Array.map string
|> Seq.windowed n
|> Seq.map (String.concat "")
|> Seq.groupBy (fun x -> x)
|> Seq.map (fun (x,y)-> x, Seq.length y)
|> Map.ofSeq
let distribution (s:string) : Map<string,int> =
// yields the distribution of characters over the string s
ngrams s 1
//let Q (i:string) (q : Map<string,int>) : double =
let prob (i:string) (q:Map<string,int>) : double =
// retrieves the frequency of i in q if found or returns .00001
let qq = Map.toList q |> List.map snd |> List.sum |> float
match (Map.tryFind i q) with
| Some(x) -> float(x)/qq
| None -> 0.00001
// https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence
let KullbackLeiblerD (p : Map<string,int>) (q : Map<string,int> ) : double =
(Map.toList p |> List.map fst) @ (Map.toList q |> List.map fst)
|> Set.ofList
|> Set.toSeq
|> Seq.map (fun x -> (x, prob x p))
|> Seq.map (fun (x,y) -> y * System.Math.Log(y/(prob x q), 2.0))
|> Seq.sum
// via Divergence Measures Based on the Shannon Entropy, Jianhua Lin
// IEEE TRANSACTIONS ON INFORMATION THEORY. VOL. 37, NO. I, JANUARY 1991, p145
let symbols (a:string) (b:string) : string list =
let letters (s:string) = s.ToCharArray() |> Array.map string |> Array.toList
(letters a) @ (letters b)
|> Set.ofList
|> Set.toList
let I (a:string) (b:string) : double = KullbackLeiblerD (ngrams a 1) (ngrams b 1)
let J (a:string) (b:string) : double =
I a b + I b a
let V (a:string) (b:string) : double =
let p1 = ngrams a 1
let p2 = ngrams b 1
symbols a b
|> List.map (fun x -> System.Math.Abs((prob x p1)-(prob x p2)))
|> List.sum
let K (a:string) (b:string) : double =
let p1 = ngrams a 1
let p2 = ngrams b 1
symbols a b
|> List.map (fun x -> (prob x p1) * System.Math.Log((prob x p1)/((prob x p1) * 0.5 + (prob x p2) * 0.5),2.0))
|> List.sum
let L (a:string) (b:string) : double = K a b + K b a
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment