Created
May 4, 2017 19:55
-
-
Save paralax/0801babfba22bfa1a0316863330c6667 to your computer and use it in GitHub Desktop.
towards implementing the Jensen-Shannon divergence metric, IEEE TRANSACTIONS ON INFORMATION THEORY. VOL. 37, NO. I, JANUARY 1991
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| let entropy (s) : float = | |
| let p = string(s).ToCharArray() | |
| |> Seq.groupBy (fun x -> x) | |
| |> Seq.map (fun (x,y) -> Seq.length y) | |
| -1.0 * ([ for count in p -> | |
| float(count)/float(String.length(s)) * | |
| System.Math.Log(float(count)/float(String.length(s)), 2.0) ] | |
| |> Seq.sum ) | |
| let ngrams (s : string) (n: int) : Map<string,int> = | |
| s.ToCharArray() | |
| |> Array.map string | |
| |> Seq.windowed n | |
| |> Seq.map (String.concat "") | |
| |> Seq.groupBy (fun x -> x) | |
| |> Seq.map (fun (x,y)-> x, Seq.length y) | |
| |> Map.ofSeq | |
| let distribution (s:string) : Map<string,int> = | |
| // yields the distribution of characters over the string s | |
| ngrams s 1 | |
| //let Q (i:string) (q : Map<string,int>) : double = | |
| let prob (i:string) (q:Map<string,int>) : double = | |
| // retrieves the frequency of i in q if found or returns .00001 | |
| let qq = Map.toList q |> List.map snd |> List.sum |> float | |
| match (Map.tryFind i q) with | |
| | Some(x) -> float(x)/qq | |
| | None -> 0.00001 | |
| // https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence | |
| let KullbackLeiblerD (p : Map<string,int>) (q : Map<string,int> ) : double = | |
| (Map.toList p |> List.map fst) @ (Map.toList q |> List.map fst) | |
| |> Set.ofList | |
| |> Set.toSeq | |
| |> Seq.map (fun x -> (x, prob x p)) | |
| |> Seq.map (fun (x,y) -> y * System.Math.Log(y/(prob x q), 2.0)) | |
| |> Seq.sum | |
| // via Divergence Measures Based on the Shannon Entropy, Jianhua Lin | |
| // IEEE TRANSACTIONS ON INFORMATION THEORY. VOL. 37, NO. I, JANUARY 1991, p145 | |
| let symbols (a:string) (b:string) : string list = | |
| let letters (s:string) = s.ToCharArray() |> Array.map string |> Array.toList | |
| (letters a) @ (letters b) | |
| |> Set.ofList | |
| |> Set.toList | |
| let I (a:string) (b:string) : double = KullbackLeiblerD (ngrams a 1) (ngrams b 1) | |
| let J (a:string) (b:string) : double = | |
| I a b + I b a | |
| let V (a:string) (b:string) : double = | |
| let p1 = ngrams a 1 | |
| let p2 = ngrams b 1 | |
| symbols a b | |
| |> List.map (fun x -> System.Math.Abs((prob x p1)-(prob x p2))) | |
| |> List.sum | |
| let K (a:string) (b:string) : double = | |
| let p1 = ngrams a 1 | |
| let p2 = ngrams b 1 | |
| symbols a b | |
| |> List.map (fun x -> (prob x p1) * System.Math.Log((prob x p1)/((prob x p1) * 0.5 + (prob x p2) * 0.5),2.0)) | |
| |> List.sum | |
| let L (a:string) (b:string) : double = K a b + K b a |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment