Created
April 16, 2012 20:20
-
-
Save ianp/2401251 to your computer and use it in GitHub Desktop.
Word count functions and main wrapper.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#import <Foundation/Foundation.h> | |
NSUInteger scannerWordCount(NSString* string) | |
{ | |
NSScanner* scanner = [NSScanner scannerWithString:string]; | |
NSCharacterSet* ws = [NSCharacterSet whitespaceAndNewlineCharacterSet]; | |
NSUInteger words = 0; | |
while ([scanner scanUpToCharactersFromSet:ws intoString:nil]) | |
++words; | |
return words; | |
} | |
NSUInteger regexWordCount(NSString* string) | |
{ | |
NSRegularExpression* regex = [NSRegularExpression regularExpressionWithPattern:@"\\w+" options:0 error:nil]; | |
return [regex numberOfMatchesInString:string options:0 range:NSMakeRange(0, [string length])]; | |
} | |
NSUInteger componentsByStringWordCount(NSString* string) | |
{ | |
return [[string componentsSeparatedByString:@" "] count]; | |
} | |
NSUInteger componentsByCharsWordCount(NSString* string) | |
{ | |
NSCharacterSet* ws = [NSCharacterSet whitespaceAndNewlineCharacterSet]; | |
return [[string componentsSeparatedByCharactersInSet:ws] count]; | |
} | |
NSUInteger taggerWordCount(NSString* string) | |
{ | |
NSArray* schemes = [NSArray arrayWithObject:NSLinguisticTagSchemeTokenType]; | |
NSLinguisticTagger* tagger = [[NSLinguisticTagger alloc] initWithTagSchemes:schemes | |
options:0]; | |
[tagger setString:string]; | |
__block NSUInteger words = 0; | |
[tagger enumerateTagsInRange:NSMakeRange(0, [string length]) | |
scheme:NSLinguisticTagSchemeTokenType | |
options:0 | |
usingBlock:^(NSString* tag, NSRange token, NSRange sentence, BOOL *stop) { | |
if ([tag isEqualTo: NSLinguisticTagWord]) ++words; | |
}]; | |
return words; | |
} | |
int main(int argc, char *argv[]) { | |
NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init]; | |
NSString* string = @"Peter piper picked a peck of pickled pepper . No — really — he did!"; | |
printf("scanner (original)\t: %lu\n", scannerWordCount(string)); | |
printf("regular expression\t: %lu\n", regexWordCount(string)); | |
printf("components (string)\t: %lu\n", componentsByStringWordCount(string)); | |
printf("components (chars)\t: %lu\n", componentsByCharsWordCount(string)); | |
printf("linguistic tagger\t: %lu\n", taggerWordCount(string)); | |
[pool release]; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment