Skip to content

Instantly share code, notes, and snippets.

@luisgerhorst
Created December 28, 2013 20:52
Show Gist options
  • Save luisgerhorst/8164085 to your computer and use it in GitHub Desktop.
Save luisgerhorst/8164085 to your computer and use it in GitHub Desktop.
Detect CSV delimiter in Objective-C
#define COMMA ','
#define SEMICOLON ';'
#define COLON ':'
#define TAB '\t'
#define SPACE ' '
BOOL delimiterOrNothing(unichar character) {
return character == COMMA || character == SEMICOLON || character == COLON || character == TAB || character == SPACE || character == 0;
}
/*
Detects the delimiter by finding the character that has the same number of occurrences in each line
If nothing is detected, comma is returned
Criticism:
- only supports comma, semicolon, colon, tab and space as delimiter
- works bad with small files
- not fast
- loads whole file into memory
*/
unichar detectDelimiterOfCSVString(NSString *content) {
#define COMMA_STR @","
#define SEMICOLON_STR @";"
#define COLON_STR @":"
#define TAB_STR @"\t"
#define SPACE_STR @" "
#define ZERO_NMB @0
NSArray *lines = [content componentsSeparatedByString:@"\n"];
// count per line:
/*
Array of Sets (one per line)
Sets contain number of occurrences of one char in that line with character as key
*/
NSMutableArray *linesCounts = [NSMutableArray array];
for (NSString *line in lines) {
NSMutableDictionary *counts = [NSMutableDictionary dictionaryWithDictionary:@{COMMA_STR: ZERO_NMB,
SEMICOLON_STR: ZERO_NMB,
COLON_STR: ZERO_NMB,
TAB_STR: ZERO_NMB,
SPACE_STR: ZERO_NMB}];
NSUInteger length = [line length];
for (NSUInteger i = 0; i < length; i++) {
unichar lastCharacter = i > 0 ? [line characterAtIndex:i-1] : 0;
unichar character = [line characterAtIndex:i];
unichar nextCharacter = i < length-1 ? [line characterAtIndex:i+1] : 0;
BOOL quoteOpened = NO;
if (!quoteOpened && delimiterOrNothing(lastCharacter) && character == DOUBLE_QUOTE) { // opening quote
quoteOpened = YES;
} else if (quoteOpened && character == DOUBLE_QUOTE && delimiterOrNothing(nextCharacter)) { // closing quote
quoteOpened = NO;
} else if (!quoteOpened) { // delimiter
#define INCREMENT_COUNT(_key) counts[_key] = [NSNumber numberWithUnsignedInteger:[counts[_key] unsignedIntegerValue] + 1]
switch (character) {
case COMMA:
INCREMENT_COUNT(COMMA_STR);
break;
case SEMICOLON:
INCREMENT_COUNT(SEMICOLON_STR);
break;
case COLON:
INCREMENT_COUNT(COLON_STR);
break;
case TAB:
INCREMENT_COUNT(TAB_STR);
break;
case SPACE:
INCREMENT_COUNT(SPACE_STR);
break;
default:
// skip other chars
break;
}
}
}
[linesCounts addObject:counts];
}
// detect possible counts:
/*
nil at start
value for key is NSNull if excluded
*/
NSMutableDictionary *possibleCounts = nil;
for (NSMutableDictionary *lineCounts in linesCounts) {
if (!possibleCounts) // if nothing so far
possibleCounts = lineCounts; // use this to start
else
for (NSString *delimiter in lineCounts) // each delimiter
if (![possibleCounts[delimiter] isKindOfClass:[NSNull class]] && ![lineCounts[delimiter] isEqualToNumber:possibleCounts[delimiter]]) // if not excluded yet and is not equal to so far counts
possibleCounts[delimiter] = [NSNull null]; // exclude this delimiter
}
// choose delimiter:
for (NSString *delimiter in possibleCounts) {
NSNumber *count = possibleCounts[delimiter];
if (![count isKindOfClass:[NSNull class]] && ![count isEqualToNumber:@0]) { // not NSNull and not @0
if ([delimiter isEqualToString:COMMA_STR]) return COMMA;
if ([delimiter isEqualToString:SEMICOLON_STR]) return SEMICOLON;
if ([delimiter isEqualToString:COLON_STR]) return COLON;
if ([delimiter isEqualToString:TAB_STR]) return TAB;
if ([delimiter isEqualToString:SPACE_STR]) return SPACE;
}
}
// return default if nothing detected
return COMMA;
}
@luisgerhorst
Copy link
Author

Note: I'm not very experienced in Objective-C, count per line and detect possible counts could be done in one step.

@luisgerhorst
Copy link
Author

There's an improved version at https://gist.github.com/luisgerhorst/8715763

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment