Created
August 7, 2013 05:10
-
-
Save kleneway/6171372 to your computer and use it in GitHub Desktop.
Using NSLinguisticTagger to stem Haiku Deck tags
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// use the built-in ios linguistics functionality to stem the tags | |
+ (NSMutableArray *)stemTags:(NSMutableArray*)originalTags { | |
NSLinguisticTagger *tagger = [[NSLinguisticTagger alloc] | |
initWithTagSchemes:[NSArray arrayWithObjects:NSLinguisticTagSchemeLemma, nil] | |
options:(NSLinguisticTaggerOmitWhitespace | NSLinguisticTaggerOmitPunctuation)]; | |
NSMutableArray *stemmedTags = [[NSMutableArray alloc] init]; | |
// convert tags to string | |
[tagger setString:[originalTags componentsJoinedByString:@" "]]; | |
__block int i=0; | |
// loop through each tag and stem it, documentation found here: | |
// http://developer.apple.com/library/ios/#documentation/cocoa/reference/NSLinguisticTagger_Class/Reference/Reference.html | |
[tagger enumerateTagsInRange:NSMakeRange(0, allTags.length) | |
scheme:NSLinguisticTagSchemeLemma | |
options:(NSLinguisticTaggerOmitWhitespace | NSLinguisticTaggerOmitPunctuation) | |
usingBlock:^(NSString *tag, NSRange tokenRange, NSRange sentenceRange, BOOL *stop) { | |
// tag has been stemmed, add the stemmed version to the list | |
if(tag) { | |
[stemmedTags addObject:tag]; | |
} | |
// tag was not stemmed, add the original version to the list | |
else { | |
if(originalTags.count > i) { | |
[stemmedTags addObject:[originalTags objectAtIndex:i]]; | |
} | |
} | |
i++; | |
}]; | |
return stemmedTags; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment