Last active
May 9, 2017 12:34
-
-
Save amn41/8e2d93b8b9d800ea633b1e33b5e9b5a1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys, os | |
from mitie import * | |
trainer = text_categorizer_trainer("/path/to/total_word_feature_extractor.dat") | |
data = {} # same as before - omitted for brevity | |
for label in training_examples.keys(): | |
for text in training_examples[label]["examples"]: | |
tokens = tokenize(text) | |
trainer.add_labeled_text(tokens,label) | |
trainer.num_threads = 4 | |
cat = trainer.train() | |
cat.save_to_disk("my_text_categorizer.dat") | |
# we can then use the categorizer to predict on new text | |
tokens = tokenize("somewhere that serves chinese food") | |
predicted_label, _ = cat(tokens) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi Allan, on line 8 you want data.keys() instead of training_example.keys()