Created
September 21, 2022 13:19
-
-
Save brianmfear/67a18e6c3ad384c7a6da5ec2cc04aca9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class CsvParser { | |
String csv; | |
Integer rowCount; | |
Integer cellCount; | |
final static String END_LINE = '\r'; | |
final static String END_CELL = ','; | |
final static String ESCAPE_CHAR = '"'; | |
final static String SPACE = ' '; | |
public CsvParser(String csvString) { | |
csv = csvString.replace('\r\n','\r').replace('\n','\r'); | |
} | |
public List<List<String>> parseWithoutHeaders() { | |
List<List<String>> results = new List<List<String>>(); | |
cellCount = 1; | |
rowCount = 0; | |
Integer rowSize; | |
do { | |
String[] line = readLine(); | |
Integer lineSize = line.size(); | |
if(lineSize == 0) { | |
continue; | |
} | |
if(rowSize == null) { | |
rowSize = lineSize; | |
} | |
if(rowSize != lineSize) { | |
throwInvalidStateError('CSV row has '+lineSize+' cell(s), expected '+rowSize+' cell(s)'); | |
} | |
results.add(line); | |
rowCount++; | |
} while(csv != ''); | |
return results; | |
} | |
public List<List<String>> parseWithHeaders() { | |
List<List<String>> results = new List<List<String>>(); | |
List<String> headers = readLine(); | |
Set<String> uniqueHeaders = new Set<String>(); | |
cellCount = 1; | |
rowCount = 0; | |
for(String header: headers) { | |
if(!uniqueHeaders.add(header)) { | |
throwInvalidStateError('Duplicate header detected'); | |
} | |
} | |
rowCount++; | |
Integer headerCellCount = headers.size(); | |
do { | |
String[] line = readLine(); | |
Integer currentRowCellCount = line.size(); | |
if(currentRowCellCount == 0) { | |
continue; | |
} | |
if(headerCellCount != currentRowCellCount) { | |
cellCount = 0; | |
throwInvalidStateError('CSV row has '+currentRowCellCount+' cell(s), expected '+headerCellCount+' cell(s)'); | |
} | |
results.add(line); | |
rowCount++; | |
} while(csv != ''); | |
return results; | |
} | |
List<String> readLine() { | |
List<String> results = new List<String>(); | |
cellCount = 1; | |
while(csv != '' && csv.left(1) != END_LINE) { | |
results.add(readCell()); | |
if(csv != '') { | |
if(csv.indexOfAny(END_LINE+END_CELL) != 0) { | |
throwInvalidStateError('Unexpected CSV state while reading line'); | |
} | |
if(csv.left(1) == END_CELL) { | |
csv = csv.substring(1); | |
} | |
} | |
} | |
csv = csv.substring(1); | |
return results; | |
} | |
String readCell() { | |
csv = csv.replaceFirst('^ *',''); | |
return csv.left(1) == ESCAPE_CHAR? readQuotedCell(): readUnquotedCell(); | |
} | |
String readQuotedCell() { | |
String result = ''; | |
do { | |
Integer nextEscape = csv.indexOf(ESCAPE_CHAR,1); | |
if(nextEscape == -1) { | |
throwInvalidStateError('Quoted cell does not end in a quote'); | |
} | |
result += csv.substringBefore(ESCAPE_CHAR)+ESCAPE_CHAR; | |
csv = csv.substringAfter(ESCAPE_CHAR); | |
} while(csv != '' && !csv.left(1).containsAny(END_LINE+END_CELL)); | |
return result.removeStart(ESCAPE_CHAR).removeEnd(ESCAPE_CHAR).replace(ESCAPE_CHAR+ESCAPE_CHAR,ESCAPE_CHAR); | |
} | |
String readUnquotedCell() { | |
Integer nextCellBreak = csv.indexOfAny(END_CELL+END_LINE); | |
Integer nextEscape = csv.indexOf(ESCAPE_CHAR); | |
String result = csv; | |
if(nextCellBreak != -1 && nextEscape != -1 && nextCellBreak > nextEscape) { | |
throwInvalidStateError('You cannot include a quoted character in an unquoted cell'); | |
} | |
if(nextCellBreak != -1) { | |
result = csv.substring(0, nextCellBreak); | |
csv = csv.removeStart(result); | |
} | |
return result; | |
} | |
void throwInvalidStateError(String message) { | |
throw new InvalidStateException(String.format('{0} @ row {1} cell {2}', new String[] { message, ''+rowCount, ''+cellCount})); | |
} | |
public class InvalidStateException extends Exception { } | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment