Created
April 18, 2012 12:45
-
-
Save anonymous/2413356 to your computer and use it in GitHub Desktop.
Linguistic tagger examples.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#import <Foundation/Foundation.h> | |
void taggerCounter(NSString* string, NSString* scheme) | |
{ | |
NSLog(@"=== %@", scheme); | |
NSArray* schemes = [NSLinguisticTagger availableTagSchemesForLanguage:@"en"]; | |
NSLinguisticTagger* tagger = [[NSLinguisticTagger alloc] initWithTagSchemes:schemes | |
options:0]; | |
[tagger setString:string]; | |
__block NSUInteger words = 0; | |
__block NSUInteger sentences = 0; | |
__block NSUInteger current_sentence = 0; | |
__block NSMutableSet* tags = [[NSMutableSet alloc] init]; | |
[tagger enumerateTagsInRange:NSMakeRange(0, [string length]) | |
scheme:scheme | |
options:0 | |
usingBlock:^(NSString* tag, NSRange token, NSRange sentence, BOOL *stop) { | |
if (tag) [tags addObject:tag]; | |
if ([tag isEqual:NSLinguisticTagWord]) ++words; | |
if (!sentences || current_sentence != sentence.location) ++sentences; | |
current_sentence = sentence.location; | |
NSLog(@"token %d-%d sentence %d-%d %@", token.location, token.length, sentence.location, sentence.length, tag); | |
}]; | |
NSLog(@"%d sentences, tag list: %@", sentences, tags); | |
} | |
int main(int argc, char *argv[]) { | |
NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init]; | |
NSString* coffee = @"What I want - is a proper cup ’o coffee," | |
@" Made in a proper copper coffee pot." | |
@" Ik kan van mijn punt." | |
@" Аз не мога да ми." | |
@" Mais je veux du café o une tasse à partir d'un pot de café bon."; | |
// uncomment as desired... | |
//taggerCounter(coffee, NSLinguisticTagSchemeTokenType); | |
//taggerCounter(coffee, NSLinguisticTagSchemeLexicalClass); | |
//taggerCounter(coffee, NSLinguisticTagSchemeNameType); | |
//taggerCounter(coffee, NSLinguisticTagSchemeNameTypeOrLexicalClass); | |
//taggerCounter(coffee, NSLinguisticTagSchemeLemma); | |
taggerCounter(coffee, NSLinguisticTagSchemeLanguage); | |
//taggerCounter(coffee, NSLinguisticTagSchemeScript); | |
[pool release]; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment