Created
January 22, 2016 01:50
-
-
Save sudipto80/e265f1eed458b48cecc3 to your computer and use it in GitHub Desktop.
tf idf
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
let sentence1 = "this is a a sample" | |
let sentence2 = "this is another another example example example" | |
let word = "example" | |
let numberOfDocs = 2. | |
let tf1 = sentence1.Split ' ' |> Array.filter ( fun t -> t = word) | |
|> Array.length | |
let tf2 = sentence2.Split ' ' |> Array.filter ( fun t -> t = word) | |
|> Array.length | |
let docs = [|sentence1;sentence2|] | |
let foundIn = docs |> Array.map ( fun t -> t.Split ' ' | |
|> Array.filter ( fun z -> z = word)) | |
|> Array.filter ( fun m -> m |> Array.length <> 0) | |
|> Array.length | |
let idf = Operators.log10 ( numberOfDocs / float foundIn) | |
let pr1 = float tf1 * idf | |
let pr2 = float tf2 * idf | |
printfn "%f %f" pr1 pr2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment