Last active
October 29, 2017 14:00
-
-
Save darcwader/413658f6dd5fcd4a6a2ebfb92870067e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| func idf(word:String) -> Double { | |
| if let pos = self.vocabulary[word] { | |
| return self.idf[pos] | |
| } else { | |
| return Double(0.0) | |
| } | |
| } | |
| func tfidf(sentence:String) -> [Int:Double] { | |
| let cv = countVector(sentence: sentence) | |
| var vec = [Int:Double]() | |
| cv?.forEach({ (key, value) in | |
| let i = self.idf[key] | |
| print(i) | |
| let t = Double(value) / Double(cv!.count) | |
| print(t) | |
| vec[key] = t * i | |
| }) | |
| //vec now is TFIDF, but is not normalized | |
| if self.norm { //L2 Norm | |
| var sum = vec.flatMap{ $1 }.reduce(0) { $0 + $1*$1 } | |
| sum = sqrt(sum) | |
| var n = [Int:Double]() | |
| vec.forEach({ (key, value) in | |
| n[key] = value / sum | |
| }) | |
| return n | |
| } | |
| return vec | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment