Skip to content

Instantly share code, notes, and snippets.

@westc
Last active August 29, 2015 14:11
Show Gist options
  • Select an option

  • Save westc/2f6eb2ad7ba3a592b91a to your computer and use it in GitHub Desktop.

Select an option

Save westc/2f6eb2ad7ba3a592b91a to your computer and use it in GitHub Desktop.
Test a CSV generated by Excel that contains apostrophes to prove that they don't break the parser.
// CSV contents generated in Excel.
var csv = "First Name,Last Name\nPat,O'Rielly\nSam,O'Brien\nJohn,Carter";
/**
* Takes a string representation of a CSV and parses it into an array of arrays
* or an array of objects (dictionaries).
* @param {string} strCSV
* String representation of a CSV.
* @param {boolean=} opt_headerRow
* Optional boolean defaulting to false which if set to true indicates that
* the first row is to be collected as the header row and all rows returned
* will be objects keyed by those header names instead of arrays.
* @param {string=} opt_delimiter
* Optional string defaulting to "," which indicates the cell delimiter.
* @param {?function(string=,number,number,string=):*} opt_fnProcessCell
* Optional function used to pre-process the cell values. The values passed
* are the cell value found, the row index, the cell index, and the cell
* name (if opt_headerRow is true). The value returned will be stored in
* the array of rows returned.
* @return {Array.<Array,Object>}
* In the case that opt_headerRow is truish, an array of arrays will be
* returned with each sub-array representing a CSV row. In the case that
* opt_headerRow is not truish an array of objects keyed by the column names
* will be returned with each sub-object representing a CSV row.
*/
function parseCSV(strCSV, opt_headerRow, opt_delimiter, opt_fnProcessCell) {
opt_delimiter = opt_delimiter || ',';
var pattern = '([^"' + opt_delimiter + '\r\n]*|"((?:[^"]+|"")*)")(,|\r|\r?\n)';
var colNames = [], isHeaderRow = opt_headerRow, rowCount = 0;
var row = [], rows = opt_headerRow ? [] : [row], colIndex = 0;
(strCSV + opt_delimiter).replace(new RegExp(pattern, 'g'), function(match, cell, quoted, delimiter) {
cell = quoted ? quoted.replace(/""/g, '"') : cell;
if (isHeaderRow) {
colNames.push(cell);
}
else {
row[opt_headerRow ? colNames[colIndex] : colIndex] = opt_fnProcessCell
? opt_fnProcessCell(cell, rowCount, colIndex, colNames[colIndex])
: cell;
colIndex++;
}
if (delimiter != opt_delimiter) {
rowCount++;
rows.push(row = opt_headerRow ? {} : []);
colIndex = isHeaderRow = 0;
}
});
return rows;
}
console.log('CSV String:', csv);
console.log('CSV As Array of Arrays:', parseCSV(csv));
console.log('CSV As Array of Objects:', parseCSV(csv, true, undefined));
console.log('CSV As Array of Objects:', parseCSV(csv, true, undefined, function(value, rowIndex, colIndex, colName) {
return colName == 'Purchase Amount'
? 1 * value
: colName == 'Return Customer'
? value == 'Y'
: colName == 'Last Purchase Date'
? new Date(value)
: value;
}));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment