Skip to content

Instantly share code, notes, and snippets.

@Martin91
Created January 6, 2015 08:31
Show Gist options
  • Select an option

  • Save Martin91/ab313fe591cf9f386aa3 to your computer and use it in GitHub Desktop.

Select an option

Save Martin91/ab313fe591cf9f386aa3 to your computer and use it in GitHub Desktop.
Words counter in Objective-C
//
// main.m
// words-statistics
//
// Created by Martin Hong on 1/5/15.
// Calculate the total count of words in a speficified paragraph.
//
// Features:
// 1. 计算文章所有单词加标点符号的数量
// 2. 单独计算文章所有单词的数量
// 3. 单独计算多有标点符号的数量
// 4. 统计每个单词在文中出现的频率
// 5. 对4中的结果排序后输出
// 6. 测试1的结果与2 + 3的结果,验证正确性
//
// 校验程序: http://www.wordcounter.net/
//
// Copyright (c) 2015 Martin Hong. All rights reserved.
//
#import <Foundation/Foundation.h>
#import "words_counter.h"
int main(int argc, const char * argv[]) {
WordsCounter *words_counter;
words_counter = [[WordsCounter alloc] init];
words_counter.content = @"What is Word Counter? Wordcounter is a word count and a character count tool. Simply place your cursor into the box and begin typing. Word counter will automatically count the number of words and characters as you type. You can also copy and paste a document you have already written into the word counter box and it will display the word count and character numbers for that piece of writing. Knowing the number of words or characters in a document can be important. For example, if the author is required to write a minimum or maximum amount of words for an article or paper, word counter can help them know if their article meets these requirements. In addition, word counter automatically shows you the top 10 keywords and keyword density of the article you're writing. This allows you to know what keywords you use most often and what percentage each is used within the article. This can help you from over-using certain words in your writing and allow you to make sure you have the correct keyword distribution you're trying to obtain for any article you write. Word counts can also be important in defining typing and reading speeds. Word counter can help determine both of these. Simply set a timer and start typing and when the time is up, you'll instantly know how many words you have typed for that period of time. Disclaimer: We strive to make our word counter as accurate as possible but we cannot guarantee it will always be so.";
[words_counter analysis];
NSMutableDictionary *wordsCount = [words_counter wordsCount];
NSArray *sortedKeys = [words_counter sortKeywords];
[sortedKeys enumerateObjectsUsingBlock:^(id obj, NSUInteger idx, BOOL *stop) {
NSLog(@"%@: %i", obj, [[wordsCount valueForKey:obj] intValue]);
}];
NSLog(@"Total words: %i", [words_counter totalWords]);
NSLog(@"Total punctuations: %i", [words_counter totalSymbols]);
NSLog(@"Total words or punctuations: %i", [words_counter totalWordsWithSymbols]);
NSLog(@"The analysis result: %@", [words_counter testCalculationResults] ? @"true" : @"false");
return 0;
}
2015-01-06 16:28:19.460 words-statistics[17760:887721] you'll: 1
2015-01-06 16:28:19.461 words-statistics[17760:887721] required: 1
2015-01-06 16:28:19.461 words-statistics[17760:887721] meets: 1
2015-01-06 16:28:19.462 words-statistics[17760:887721] author: 1
2015-01-06 16:28:19.462 words-statistics[17760:887721] paste: 1
2015-01-06 16:28:19.462 words-statistics[17760:887721] but: 1
2015-01-06 16:28:19.462 words-statistics[17760:887721] strive: 1
2015-01-06 16:28:19.462 words-statistics[17760:887721] cursor: 1
2015-01-06 16:28:19.462 words-statistics[17760:887721] each: 1
2015-01-06 16:28:19.463 words-statistics[17760:887721] their: 1
2015-01-06 16:28:19.463 words-statistics[17760:887721] type: 1
2015-01-06 16:28:19.463 words-statistics[17760:887721] trying: 1
2015-01-06 16:28:19.463 words-statistics[17760:887721] You: 1
2015-01-06 16:28:19.463 words-statistics[17760:887721] instantly: 1
2015-01-06 16:28:19.463 words-statistics[17760:887721] an: 1
2015-01-06 16:28:19.463 words-statistics[17760:887721] 10: 1
2015-01-06 16:28:19.464 words-statistics[17760:887721] timer: 1
2015-01-06 16:28:19.464 words-statistics[17760:887721] shows: 1
2015-01-06 16:28:19.464 words-statistics[17760:887721] both: 1
2015-01-06 16:28:19.464 words-statistics[17760:887721] Knowing: 1
2015-01-06 16:28:19.478 words-statistics[17760:887721] In: 1
2015-01-06 16:28:19.478 words-statistics[17760:887721] over-using: 1
2015-01-06 16:28:19.478 words-statistics[17760:887721] minimum: 1
2015-01-06 16:28:19.479 words-statistics[17760:887721] so: 1
2015-01-06 16:28:19.479 words-statistics[17760:887721] What: 1
2015-01-06 16:28:19.479 words-statistics[17760:887721] addition: 1
2015-01-06 16:28:19.479 words-statistics[17760:887721] from: 1
2015-01-06 16:28:19.479 words-statistics[17760:887721] any: 1
2015-01-06 16:28:19.480 words-statistics[17760:887721] copy: 1
2015-01-06 16:28:19.480 words-statistics[17760:887721] accurate: 1
2015-01-06 16:28:19.480 words-statistics[17760:887721] written: 1
2015-01-06 16:28:19.480 words-statistics[17760:887721] piece: 1
2015-01-06 16:28:19.481 words-statistics[17760:887721] period: 1
2015-01-06 16:28:19.481 words-statistics[17760:887721] For: 1
2015-01-06 16:28:19.481 words-statistics[17760:887721] set: 1
2015-01-06 16:28:19.481 words-statistics[17760:887721] distribution: 1
2015-01-06 16:28:19.481 words-statistics[17760:887721] amount: 1
2015-01-06 16:28:19.482 words-statistics[17760:887721] obtain: 1
2015-01-06 16:28:19.482 words-statistics[17760:887721] display: 1
2015-01-06 16:28:19.482 words-statistics[17760:887721] determine: 1
2015-01-06 16:28:19.482 words-statistics[17760:887721] sure: 1
2015-01-06 16:28:19.482 words-statistics[17760:887721] use: 1
2015-01-06 16:28:19.483 words-statistics[17760:887721] density: 1
2015-01-06 16:28:19.483 words-statistics[17760:887721] most: 1
2015-01-06 16:28:19.483 words-statistics[17760:887721] how: 1
2015-01-06 16:28:19.483 words-statistics[17760:887721] correct: 1
2015-01-06 16:28:19.483 words-statistics[17760:887721] counts: 1
2015-01-06 16:28:19.484 words-statistics[17760:887721] up: 1
2015-01-06 16:28:19.484 words-statistics[17760:887721] used: 1
2015-01-06 16:28:19.484 words-statistics[17760:887721] already: 1
2015-01-06 16:28:19.484 words-statistics[17760:887721] maximum: 1
2015-01-06 16:28:19.484 words-statistics[17760:887721] we: 1
2015-01-06 16:28:19.484 words-statistics[17760:887721] paper: 1
2015-01-06 16:28:19.485 words-statistics[17760:887721] often: 1
2015-01-06 16:28:19.485 words-statistics[17760:887721] certain: 1
2015-01-06 16:28:19.485 words-statistics[17760:887721] requirements: 1
2015-01-06 16:28:19.485 words-statistics[17760:887721] when: 1
2015-01-06 16:28:19.485 words-statistics[17760:887721] percentage: 1
2015-01-06 16:28:19.486 words-statistics[17760:887721] our: 1
2015-01-06 16:28:19.486 words-statistics[17760:887721] within: 1
2015-01-06 16:28:19.486 words-statistics[17760:887721] guarantee: 1
2015-01-06 16:28:19.486 words-statistics[17760:887721] numbers: 1
2015-01-06 16:28:19.486 words-statistics[17760:887721] speeds: 1
2015-01-06 16:28:19.487 words-statistics[17760:887721] many: 1
2015-01-06 16:28:19.487 words-statistics[17760:887721] them: 1
2015-01-06 16:28:19.487 words-statistics[17760:887721] place: 1
2015-01-06 16:28:19.487 words-statistics[17760:887721] possible: 1
2015-01-06 16:28:19.487 words-statistics[17760:887721] always: 1
2015-01-06 16:28:19.487 words-statistics[17760:887721] We: 1
2015-01-06 16:28:19.488 words-statistics[17760:887721] Counter: 1
2015-01-06 16:28:19.488 words-statistics[17760:887721] allow: 1
2015-01-06 16:28:19.488 words-statistics[17760:887721] Wordcounter: 1
2015-01-06 16:28:19.488 words-statistics[17760:887721] top: 1
2015-01-06 16:28:19.488 words-statistics[17760:887721] begin: 1
2015-01-06 16:28:19.489 words-statistics[17760:887721] tool: 1
2015-01-06 16:28:19.489 words-statistics[17760:887721] defining: 1
2015-01-06 16:28:19.489 words-statistics[17760:887721] Disclaimer: 1
2015-01-06 16:28:19.489 words-statistics[17760:887721] reading: 1
2015-01-06 16:28:19.489 words-statistics[17760:887721] allows: 1
2015-01-06 16:28:19.489 words-statistics[17760:887721] cannot: 1
2015-01-06 16:28:19.490 words-statistics[17760:887721] example: 1
2015-01-06 16:28:19.490 words-statistics[17760:887721] start: 1
2015-01-06 16:28:19.490 words-statistics[17760:887721] typed: 1
2015-01-06 16:28:19.490 words-statistics[17760:887721] important: 2
2015-01-06 16:28:19.490 words-statistics[17760:887721] if: 2
2015-01-06 16:28:19.490 words-statistics[17760:887721] automatically: 2
2015-01-06 16:28:19.491 words-statistics[17760:887721] number: 2
2015-01-06 16:28:19.491 words-statistics[17760:887721] write: 2
2015-01-06 16:28:19.491 words-statistics[17760:887721] make: 2
2015-01-06 16:28:19.491 words-statistics[17760:887721] characters: 2
2015-01-06 16:28:19.491 words-statistics[17760:887721] it: 2
2015-01-06 16:28:19.492 words-statistics[17760:887721] these: 2
2015-01-06 16:28:19.492 words-statistics[17760:887721] also: 2
2015-01-06 16:28:19.492 words-statistics[17760:887721] you're: 2
2015-01-06 16:28:19.492 words-statistics[17760:887721] what: 2
2015-01-06 16:28:19.492 words-statistics[17760:887721] character: 2
2015-01-06 16:28:19.492 words-statistics[17760:887721] keyword: 2
2015-01-06 16:28:19.493 words-statistics[17760:887721] that: 2
2015-01-06 16:28:19.493 words-statistics[17760:887721] document: 2
2015-01-06 16:28:19.493 words-statistics[17760:887721] This: 2
2015-01-06 16:28:19.493 words-statistics[17760:887721] into: 2
2015-01-06 16:28:19.493 words-statistics[17760:887721] your: 2
2015-01-06 16:28:19.493 words-statistics[17760:887721] box: 2
2015-01-06 16:28:19.494 words-statistics[17760:887721] Simply: 2
2015-01-06 16:28:19.494 words-statistics[17760:887721] time: 2
2015-01-06 16:28:19.494 words-statistics[17760:887721] keywords: 2
2015-01-06 16:28:19.494 words-statistics[17760:887721] typing: 3
2015-01-06 16:28:19.494 words-statistics[17760:887721] be: 3
2015-01-06 16:28:19.495 words-statistics[17760:887721] in: 3
2015-01-06 16:28:19.495 words-statistics[17760:887721] writing: 3
2015-01-06 16:28:19.495 words-statistics[17760:887721] as: 3
2015-01-06 16:28:19.495 words-statistics[17760:887721] know: 3
2015-01-06 16:28:19.495 words-statistics[17760:887721] have: 3
2015-01-06 16:28:19.495 words-statistics[17760:887721] will: 3
2015-01-06 16:28:19.496 words-statistics[17760:887721] or: 3
2015-01-06 16:28:19.496 words-statistics[17760:887721] help: 3
2015-01-06 16:28:19.496 words-statistics[17760:887721] count: 4
2015-01-06 16:28:19.496 words-statistics[17760:887721] Word: 4
2015-01-06 16:28:19.496 words-statistics[17760:887721] for: 4
2015-01-06 16:28:19.496 words-statistics[17760:887721] is: 5
2015-01-06 16:28:19.497 words-statistics[17760:887721] to: 5
2015-01-06 16:28:19.497 words-statistics[17760:887721] article: 5
2015-01-06 16:28:19.497 words-statistics[17760:887721] words: 5
2015-01-06 16:28:19.497 words-statistics[17760:887721] a: 6
2015-01-06 16:28:19.497 words-statistics[17760:887721] word: 6
2015-01-06 16:28:19.498 words-statistics[17760:887721] counter: 6
2015-01-06 16:28:19.498 words-statistics[17760:887721] can: 6
2015-01-06 16:28:19.498 words-statistics[17760:887721] of: 7
2015-01-06 16:28:19.498 words-statistics[17760:887721] you: 10
2015-01-06 16:28:19.498 words-statistics[17760:887721] the: 11
2015-01-06 16:28:19.498 words-statistics[17760:887721] and: 12
2015-01-06 16:28:19.499 words-statistics[17760:887721] Total words: 255
2015-01-06 16:28:19.499 words-statistics[17760:887721] Total punctuations: 19
2015-01-06 16:28:19.499 words-statistics[17760:887721] Total words or punctuations: 274
2015-01-06 16:28:19.499 words-statistics[17760:887721] The analysis result: true
Program ended with exit code: 0
//
// words_counter.h
// words-statistics
//
// Created by Martin Hong on 1/5/15.
// Copyright (c) 2015 Martin Hong. All rights reserved.
//
#ifndef words_statistics_words_counter_h
#define words_statistics_words_counter_h
@interface WordsCounter : NSObject {
}
@property NSString *content;
@property NSArray *words;
@property NSArray *punctuations;
@property NSArray *wordsOrPunctuations;
- (int)totalWordsWithSymbols;
- (int)totalWords;
- (int)totalSymbols;
- (NSMutableDictionary *) wordsCount;
- (void)analysis;
- (NSArray *)sortKeywords;
- (BOOL)testCalculationResults;
@end
#endif
//
// words_counter.m
// words-statistics
//
// Created by Martin Hong on 1/5/15.
// Copyright (c) 2015 Martin Hong. All rights reserved.
//
#import <Foundation/Foundation.h>
#import "words_counter.h"
@implementation WordsCounter
- (int)totalWordsWithSymbols {
return (int) [self.wordsOrPunctuations count];
}
- (int)totalWords {
return (int) [self.words count];
}
- (int)totalSymbols {
return (int) [self.punctuations count];
}
- (void)analysis {
if(self.content == nil) {
return;
}
NSError *error = NULL;
NSRegularExpressionOptions regexOptions = NSRegularExpressionCaseInsensitive;
// 字符串按照正则表达式切分得到数组,单词考虑情况:1. hello; 2. something-appended; 3. You're
NSRegularExpression *wordsOrPunctuationsDetection = [NSRegularExpression regularExpressionWithPattern:@"['\\w-]+|[^\\s\\w]" options:0 error:&error];
NSRegularExpression *wordsDetection = [NSRegularExpression regularExpressionWithPattern:@"['\\w-]+"
options:regexOptions
error:&error];
NSRegularExpression *punctuationsDetection = [NSRegularExpression regularExpressionWithPattern:@"[^\\s\\w'-]"
options:0
error:&error];
// 开始进行匹配
self.wordsOrPunctuations = [wordsOrPunctuationsDetection matchesInString:self.content
options:0
range:NSMakeRange(0, [self.content length])];
self.words = [wordsDetection matchesInString:self.content
options:0
range:NSMakeRange(0, [self.content length])];
self.punctuations = [punctuationsDetection matchesInString:self.content
options:0
range:NSMakeRange(0, [self.content length])];
}
- (NSMutableDictionary *) wordsCount {
__block NSMutableDictionary *_wordsCount = [NSMutableDictionary dictionaryWithCapacity:30];
[self.words enumerateObjectsUsingBlock:^(id obj, NSUInteger idx, BOOL *stop) {
NSString *matchedString = [self.content substringWithRange:[obj range]];
if ([_wordsCount valueForKey:matchedString]) {
int counts = [[_wordsCount valueForKey:matchedString] intValue];
[_wordsCount setValue:[NSNumber numberWithInt:(counts + 1)] forKey:matchedString];
} else {
[_wordsCount setValue:[NSNumber numberWithInt:1] forKey:matchedString];
}
}];
return _wordsCount;
}
- (NSArray *)sortKeywords {
NSArray *sortedKeys = [[self wordsCount] keysSortedByValueUsingComparator:^NSComparisonResult(id obj1, id obj2) {
if ([obj1 integerValue] > [obj2 integerValue]) {
return (NSComparisonResult)NSOrderedDescending;
}
if ([obj1 integerValue] < [obj2 integerValue]) {
return (NSComparisonResult)NSOrderedAscending;
}
return (NSComparisonResult)NSOrderedSame;
}];
return sortedKeys;
}
- (BOOL)testCalculationResults{
return [self totalSymbols] + [self totalWords] == [self totalWordsWithSymbols];
}
@end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment