Created
January 30, 2014 18:41
-
-
Save luisgerhorst/8715763 to your computer and use it in GitHub Desktop.
Detect delimiter of CSV file in Objective C.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static unichar const comma = ','; | |
static unichar const semicolon = ';'; | |
static unichar const colon = ':'; | |
static unichar const tab = '\t'; | |
static unichar const space = ' '; | |
NSRegularExpression *fieldRegExForDelimiter(unichar delimiter) { | |
NSString *fieldRegExPattern = [NSString stringWithFormat:@"(?<=^|%C)(\"(?:[^\"]|\"\")*\"|[^%C]*)", delimiter, delimiter]; // Via http://stackoverflow.com/questions/3268622/regex-to-split-line-csv-file - works very good. Handles double double quotes, fields containing a delimiter and starting and ending with double quotes, delimiter after double double quotes in field that starts and ends with double quotes. | |
return [NSRegularExpression regularExpressionWithPattern:fieldRegExPattern options:0 error:nil]; | |
} | |
unichar detectDelimiter(NSString const *csvString) { | |
NSArray const *lines = [csvString componentsSeparatedByString:@"\n"]; | |
NSInteger delimitersFieldCounts[] = {-1, -1, -1, -1, -1}; // Is -1 if field count wasn't the same in each line, if count was the same -> contains field count. Same order as in unichar delimiters array. | |
unichar delimiters[] = {comma, semicolon, colon, tab, space}; // Sorted by importance. You can modify this but make sure you also change delimitersFieldCounts (same length as delimiters, filled with -1) and delimitersCount (length of delimiters and delimitersFieldCounts). | |
NSUInteger delimitersCount = 5; | |
for (NSUInteger i = 0; i < delimitersCount; i++) { | |
unichar delimiter = delimiters[i]; | |
NSRegularExpression *fieldRegEx = fieldRegExForDelimiter(delimiter); | |
NSUInteger fieldCount; | |
BOOL fieldCountSet = NO; | |
BOOL allLinesHaveSameFieldCount = YES; | |
for (NSString *line in lines) { // For each line ... | |
NSMutableArray *lineArray = [NSMutableArray array]; // Will be filled with the fields. | |
NSArray *fieldMatches = [fieldRegEx matchesInString:line options:0 range:NSMakeRange(0,[line length])]; // Matches every field. | |
for (NSTextCheckingResult *fieldMatch in fieldMatches) { // Each field match ... | |
NSString *field = [NSMutableString stringWithString:[line substringWithRange:[fieldMatch range]]]; // Get field string. | |
[lineArray addObject:field]; // Add field string to line array. | |
} | |
if (!fieldCountSet) { // Set fieldCount in first line. | |
fieldCount = [lineArray count]; | |
fieldCountSet = YES; | |
} else if (fieldCount != [lineArray count]) { // End with negative result for this delimiter if fieldCount of this line isn't the same as in the previous ones. | |
allLinesHaveSameFieldCount = NO; | |
break; | |
} | |
} | |
if (allLinesHaveSameFieldCount) delimitersFieldCounts[i] = fieldCount; | |
} | |
unichar delimiterWithMaxFieldCount = delimiters[0]; // Delimiters should be sorted by importance, this makes the first one the default. | |
NSInteger maxFieldCount = 0; // Will be replaced if delimiter results in one field in each line. | |
for (NSInteger i = delimitersCount-1; i >= 0; i--) { // Go from behind because more common delimiters are at the beginning. | |
if (delimitersFieldCounts[i] > maxFieldCount) { // The delimiter with the higthest count always replaces other. | |
maxFieldCount = delimitersFieldCounts[i]; | |
delimiterWithMaxFieldCount = delimiters[i]; | |
} else if (delimitersFieldCounts[i] == maxFieldCount) // Replace delimiters at the end of array with newer delimiters with same field count. | |
delimiterWithMaxFieldCount = delimiters[i]; | |
} | |
return delimiterWithMaxFieldCount; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment