Created
December 28, 2013 20:52
-
-
Save luisgerhorst/8164085 to your computer and use it in GitHub Desktop.
Detect CSV delimiter in Objective-C
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define COMMA ',' | |
#define SEMICOLON ';' | |
#define COLON ':' | |
#define TAB '\t' | |
#define SPACE ' ' | |
BOOL delimiterOrNothing(unichar character) { | |
return character == COMMA || character == SEMICOLON || character == COLON || character == TAB || character == SPACE || character == 0; | |
} | |
/* | |
Detects the delimiter by finding the character that has the same number of occurrences in each line | |
If nothing is detected, comma is returned | |
Criticism: | |
- only supports comma, semicolon, colon, tab and space as delimiter | |
- works bad with small files | |
- not fast | |
- loads whole file into memory | |
*/ | |
unichar detectDelimiterOfCSVString(NSString *content) { | |
#define COMMA_STR @"," | |
#define SEMICOLON_STR @";" | |
#define COLON_STR @":" | |
#define TAB_STR @"\t" | |
#define SPACE_STR @" " | |
#define ZERO_NMB @0 | |
NSArray *lines = [content componentsSeparatedByString:@"\n"]; | |
// count per line: | |
/* | |
Array of Sets (one per line) | |
Sets contain number of occurrences of one char in that line with character as key | |
*/ | |
NSMutableArray *linesCounts = [NSMutableArray array]; | |
for (NSString *line in lines) { | |
NSMutableDictionary *counts = [NSMutableDictionary dictionaryWithDictionary:@{COMMA_STR: ZERO_NMB, | |
SEMICOLON_STR: ZERO_NMB, | |
COLON_STR: ZERO_NMB, | |
TAB_STR: ZERO_NMB, | |
SPACE_STR: ZERO_NMB}]; | |
NSUInteger length = [line length]; | |
for (NSUInteger i = 0; i < length; i++) { | |
unichar lastCharacter = i > 0 ? [line characterAtIndex:i-1] : 0; | |
unichar character = [line characterAtIndex:i]; | |
unichar nextCharacter = i < length-1 ? [line characterAtIndex:i+1] : 0; | |
BOOL quoteOpened = NO; | |
if (!quoteOpened && delimiterOrNothing(lastCharacter) && character == DOUBLE_QUOTE) { // opening quote | |
quoteOpened = YES; | |
} else if (quoteOpened && character == DOUBLE_QUOTE && delimiterOrNothing(nextCharacter)) { // closing quote | |
quoteOpened = NO; | |
} else if (!quoteOpened) { // delimiter | |
#define INCREMENT_COUNT(_key) counts[_key] = [NSNumber numberWithUnsignedInteger:[counts[_key] unsignedIntegerValue] + 1] | |
switch (character) { | |
case COMMA: | |
INCREMENT_COUNT(COMMA_STR); | |
break; | |
case SEMICOLON: | |
INCREMENT_COUNT(SEMICOLON_STR); | |
break; | |
case COLON: | |
INCREMENT_COUNT(COLON_STR); | |
break; | |
case TAB: | |
INCREMENT_COUNT(TAB_STR); | |
break; | |
case SPACE: | |
INCREMENT_COUNT(SPACE_STR); | |
break; | |
default: | |
// skip other chars | |
break; | |
} | |
} | |
} | |
[linesCounts addObject:counts]; | |
} | |
// detect possible counts: | |
/* | |
nil at start | |
value for key is NSNull if excluded | |
*/ | |
NSMutableDictionary *possibleCounts = nil; | |
for (NSMutableDictionary *lineCounts in linesCounts) { | |
if (!possibleCounts) // if nothing so far | |
possibleCounts = lineCounts; // use this to start | |
else | |
for (NSString *delimiter in lineCounts) // each delimiter | |
if (![possibleCounts[delimiter] isKindOfClass:[NSNull class]] && ![lineCounts[delimiter] isEqualToNumber:possibleCounts[delimiter]]) // if not excluded yet and is not equal to so far counts | |
possibleCounts[delimiter] = [NSNull null]; // exclude this delimiter | |
} | |
// choose delimiter: | |
for (NSString *delimiter in possibleCounts) { | |
NSNumber *count = possibleCounts[delimiter]; | |
if (![count isKindOfClass:[NSNull class]] && ![count isEqualToNumber:@0]) { // not NSNull and not @0 | |
if ([delimiter isEqualToString:COMMA_STR]) return COMMA; | |
if ([delimiter isEqualToString:SEMICOLON_STR]) return SEMICOLON; | |
if ([delimiter isEqualToString:COLON_STR]) return COLON; | |
if ([delimiter isEqualToString:TAB_STR]) return TAB; | |
if ([delimiter isEqualToString:SPACE_STR]) return SPACE; | |
} | |
} | |
// return default if nothing detected | |
return COMMA; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Note: I'm not very experienced in Objective-C, count per line and detect possible counts could be done in one step.