Skip to content

Instantly share code, notes, and snippets.

Created April 18, 2012 12:45
Show Gist options
  • Save anonymous/2413356 to your computer and use it in GitHub Desktop.
Save anonymous/2413356 to your computer and use it in GitHub Desktop.
Linguistic tagger examples.
#import <Foundation/Foundation.h>
void taggerCounter(NSString* string, NSString* scheme)
{
NSLog(@"=== %@", scheme);
NSArray* schemes = [NSLinguisticTagger availableTagSchemesForLanguage:@"en"];
NSLinguisticTagger* tagger = [[NSLinguisticTagger alloc] initWithTagSchemes:schemes
options:0];
[tagger setString:string];
__block NSUInteger words = 0;
__block NSUInteger sentences = 0;
__block NSUInteger current_sentence = 0;
__block NSMutableSet* tags = [[NSMutableSet alloc] init];
[tagger enumerateTagsInRange:NSMakeRange(0, [string length])
scheme:scheme
options:0
usingBlock:^(NSString* tag, NSRange token, NSRange sentence, BOOL *stop) {
if (tag) [tags addObject:tag];
if ([tag isEqual:NSLinguisticTagWord]) ++words;
if (!sentences || current_sentence != sentence.location) ++sentences;
current_sentence = sentence.location;
NSLog(@"token %d-%d sentence %d-%d %@", token.location, token.length, sentence.location, sentence.length, tag);
}];
NSLog(@"%d sentences, tag list: %@", sentences, tags);
}
int main(int argc, char *argv[]) {
NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init];
NSString* coffee = @"What I want - is a proper cup ’o coffee,"
@" Made in a proper copper coffee pot."
@" Ik kan van mijn punt."
@" Аз не мога да ми."
@" Mais je veux du café o une tasse à partir d'un pot de café bon.";
// uncomment as desired...
//taggerCounter(coffee, NSLinguisticTagSchemeTokenType);
//taggerCounter(coffee, NSLinguisticTagSchemeLexicalClass);
//taggerCounter(coffee, NSLinguisticTagSchemeNameType);
//taggerCounter(coffee, NSLinguisticTagSchemeNameTypeOrLexicalClass);
//taggerCounter(coffee, NSLinguisticTagSchemeLemma);
taggerCounter(coffee, NSLinguisticTagSchemeLanguage);
//taggerCounter(coffee, NSLinguisticTagSchemeScript);
[pool release];
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment