Created
December 16, 2016 11:59
-
-
Save ben8p/e3b958ce082c87c67f9c9bb785645f67 to your computer and use it in GitHub Desktop.
CSV parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* CSV Parser. Takes a string as input and returns | |
* an array of arrays (for each row). | |
* | |
* @param input String, CSV input | |
* @param options.delimiter String, single character used to separate fields. | |
* Defaults to null, if null, tries to guess it. | |
* @param doptions.quote String, single character used to quote non-simple fields. | |
* Defaults to "\"". | |
*/ | |
class CSVParser { | |
constructor(data, options = {delimiter: null, quote: '"'}) { | |
this.delimiter = options.delimiter || this._guessDelimiter(data); | |
this.quote = options.quote; | |
if(!data) { | |
this.data = [['']]; | |
return; | |
} | |
this.data = this._parse(data.split('')); | |
} | |
_addToStream(stream, c) { | |
if(c === this.quote && stream[stream.length - 1] === c) { return; } | |
stream.push(c) | |
} | |
_addStream(row, stream) { | |
row.push(stream.join('')); | |
stream.length = 0; | |
} | |
_addRow(rows, row, stream) { | |
this._addStream(row, stream); | |
rows.push(row); | |
return []; | |
} | |
_extractContent(data, stream, lastColumn) { | |
var c; | |
while((c = data.shift()) !== undefined) { | |
if(c === '\r' && data[0] === '\n') { continue; } | |
if(c === this.quote && (data[0] === undefined || (lastColumn === false && data[0] === this.delimiter) || (lastColumn === true && data[0] === '\n') || (lastColumn === undefined && (data[0] === this.delimiter || data[0] === '\n')))) { | |
return; | |
} | |
this._addToStream(stream, c); | |
} | |
} | |
_parse(data) { | |
var c, | |
row = [], | |
rows = [], | |
stream = []; | |
while((c = data.shift()) !== undefined) { | |
if(c === '\r' && data[0] === '\n') { continue; } | |
if(c === this.delimiter) { | |
this._addStream(row, stream); | |
} else if(c === '\n') { | |
row = this._addRow(rows, row, stream); | |
} else if(c === this.quote) { | |
this._extractContent(data, stream, rows[0] ? rows[0].length === row.length : undefined); | |
} else { | |
this._addToStream(stream, c); | |
} | |
} | |
this._addRow(rows, row, stream); | |
return rows; | |
} | |
_guessDelimiter(data) { | |
//guess delimiters using string occurences. | |
var map = {}, | |
count = [], | |
possibleDelimiters = [',', ';', '^', '$', '.', '|', '\t']; | |
data.split(/\n|\r/).every(function(line, index, array) { | |
possibleDelimiters.forEach(function(possibleDelimiter) { | |
map[possibleDelimiter] = (map[possibleDelimiter] || 0) + line.split(possibleDelimiter).length; | |
count[map[possibleDelimiter]] = possibleDelimiter; | |
}); | |
//only analyse 10% of the lines. It should be enough | |
if(index >= array.length * 0.1) { | |
return false; | |
} | |
return true; | |
}); | |
return count.pop(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment