Last active
October 13, 2016 20:14
-
-
Save mdgriffith/703e57441973b8996dc0fc1448c0a150 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// LDA Model | |
{ | |
"kind": "lda", | |
"signature": "H9fdnifsd89" // A unique identifier for a given lda model. Could be a hash of num topics ++ input data | |
"tokenCache": ["word","in","cache"] // A token id is its index in the list. This might make more sense as a record. | |
"topics": { 1 : {1:431,2:32} // a where key == token id, and value == word count. Though it might make sense to normalize them before storing the lda. | |
, 2 : {1:32,2:0} | |
, "emailBackgroundNoise" : {1:32,2:0} | |
, "twitterBackgroundNoise" : {1:32,2:0} | |
}, | |
} | |
// Topic Model | |
{ | |
lda: "H9fdnifsd89", //signature from the lda model | |
topics: { 1 : 1.0 // Topic id ++ weight | |
, 2 : 0.05 // Background topics don't need to be present | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment