Created
August 15, 2012 19:28
-
-
Save trevordixon/3362830 to your computer and use it in GitHub Desktop.
Javascript CSV Parser generated by PEG.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
var separator = ','; | |
} | |
start | |
= comma | |
comma | |
= & { return separator = ','; } sv:sv { return sv; } | |
tab | |
= & { return separator = '\t'; } sv:sv { return sv; } | |
sv | |
= [\n\r]* first:line rest:([\n\r]+ data:line { return data; })* [\n\r]* { rest.unshift(first); return rest; } | |
line | |
= first:field rest:(char:. & { return char == separator; } text:field { return text; })* | |
& { return !!first || rest.length; } | |
{ rest.unshift(first); return rest; } | |
field | |
= '"' text:char* '"' { return text.join(''); } | |
/ text:(char:[^\n\r] & { return char != separator; } { return char; })* | |
{ return text.join(''); } | |
char | |
= '"' '"' { return '"'; } | |
/ [^"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
csvParser = (function(){ | |
/* | |
* Generated by PEG.js 0.7.0. | |
* | |
* http://pegjs.majda.cz/ | |
*/ | |
function quote(s) { | |
/* | |
* ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a | |
* string literal except for the closing quote character, backslash, | |
* carriage return, line separator, paragraph separator, and line feed. | |
* Any character may appear in the form of an escape sequence. | |
* | |
* For portability, we also escape escape all control and non-ASCII | |
* characters. Note that "\0" and "\v" escape sequences are not used | |
* because JSHint does not like the first and IE the second. | |
*/ | |
return '"' + s | |
.replace(/\\/g, '\\\\') // backslash | |
.replace(/"/g, '\\"') // closing quote character | |
.replace(/\x08/g, '\\b') // backspace | |
.replace(/\t/g, '\\t') // horizontal tab | |
.replace(/\n/g, '\\n') // line feed | |
.replace(/\f/g, '\\f') // form feed | |
.replace(/\r/g, '\\r') // carriage return | |
.replace(/[\x00-\x07\x0B\x0E-\x1F\x80-\uFFFF]/g, escape) | |
+ '"'; | |
} | |
var result = { | |
/* | |
* Parses the input with a generated parser. If the parsing is successfull, | |
* returns a value explicitly or implicitly specified by the grammar from | |
* which the parser was generated (see |PEG.buildParser|). If the parsing is | |
* unsuccessful, throws |PEG.parser.SyntaxError| describing the error. | |
*/ | |
parse: function(input, startRule) { | |
var parseFunctions = { | |
"comma": parse_comma, | |
"tab": parse_tab, | |
"sv": parse_sv, | |
"line": parse_line, | |
"field": parse_field, | |
"char": parse_char | |
}; | |
if (startRule !== undefined) { | |
if (parseFunctions[startRule] === undefined) { | |
throw new Error("Invalid rule name: " + quote(startRule) + "."); | |
} | |
} else { | |
startRule = "comma"; | |
} | |
var pos = 0; | |
var reportFailures = 0; | |
var rightmostFailuresPos = 0; | |
var rightmostFailuresExpected = []; | |
function padLeft(input, padding, length) { | |
var result = input; | |
var padLength = length - input.length; | |
for (var i = 0; i < padLength; i++) { | |
result = padding + result; | |
} | |
return result; | |
} | |
function escape(ch) { | |
var charCode = ch.charCodeAt(0); | |
var escapeChar; | |
var length; | |
if (charCode <= 0xFF) { | |
escapeChar = 'x'; | |
length = 2; | |
} else { | |
escapeChar = 'u'; | |
length = 4; | |
} | |
return '\\' + escapeChar + padLeft(charCode.toString(16).toUpperCase(), '0', length); | |
} | |
function matchFailed(failure) { | |
if (pos < rightmostFailuresPos) { | |
return; | |
} | |
if (pos > rightmostFailuresPos) { | |
rightmostFailuresPos = pos; | |
rightmostFailuresExpected = []; | |
} | |
rightmostFailuresExpected.push(failure); | |
} | |
function parse_comma() { | |
var result0, result1; | |
var pos0, pos1; | |
pos0 = pos; | |
pos1 = pos; | |
result0 = (function(offset) { return separator = ','; })(pos) ? "" : null; | |
if (result0 !== null) { | |
result1 = parse_sv(); | |
if (result1 !== null) { | |
result0 = [result0, result1]; | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
if (result0 !== null) { | |
result0 = (function(offset, sv) { return sv; })(pos0, result0[1]); | |
} | |
if (result0 === null) { | |
pos = pos0; | |
} | |
return result0; | |
} | |
function parse_tab() { | |
var result0, result1; | |
var pos0, pos1; | |
pos0 = pos; | |
pos1 = pos; | |
result0 = (function(offset) { return separator = '\t'; })(pos) ? "" : null; | |
if (result0 !== null) { | |
result1 = parse_sv(); | |
if (result1 !== null) { | |
result0 = [result0, result1]; | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
if (result0 !== null) { | |
result0 = (function(offset, sv) { return sv; })(pos0, result0[1]); | |
} | |
if (result0 === null) { | |
pos = pos0; | |
} | |
return result0; | |
} | |
function parse_sv() { | |
var result0, result1, result2, result3, result4; | |
var pos0, pos1, pos2, pos3; | |
pos0 = pos; | |
pos1 = pos; | |
result0 = []; | |
if (/^[\n\r]/.test(input.charAt(pos))) { | |
result1 = input.charAt(pos); | |
pos++; | |
} else { | |
result1 = null; | |
if (reportFailures === 0) { | |
matchFailed("[\\n\\r]"); | |
} | |
} | |
while (result1 !== null) { | |
result0.push(result1); | |
if (/^[\n\r]/.test(input.charAt(pos))) { | |
result1 = input.charAt(pos); | |
pos++; | |
} else { | |
result1 = null; | |
if (reportFailures === 0) { | |
matchFailed("[\\n\\r]"); | |
} | |
} | |
} | |
if (result0 !== null) { | |
result1 = parse_line(); | |
if (result1 !== null) { | |
result2 = []; | |
pos2 = pos; | |
pos3 = pos; | |
if (/^[\n\r]/.test(input.charAt(pos))) { | |
result4 = input.charAt(pos); | |
pos++; | |
} else { | |
result4 = null; | |
if (reportFailures === 0) { | |
matchFailed("[\\n\\r]"); | |
} | |
} | |
if (result4 !== null) { | |
result3 = []; | |
while (result4 !== null) { | |
result3.push(result4); | |
if (/^[\n\r]/.test(input.charAt(pos))) { | |
result4 = input.charAt(pos); | |
pos++; | |
} else { | |
result4 = null; | |
if (reportFailures === 0) { | |
matchFailed("[\\n\\r]"); | |
} | |
} | |
} | |
} else { | |
result3 = null; | |
} | |
if (result3 !== null) { | |
result4 = parse_line(); | |
if (result4 !== null) { | |
result3 = [result3, result4]; | |
} else { | |
result3 = null; | |
pos = pos3; | |
} | |
} else { | |
result3 = null; | |
pos = pos3; | |
} | |
if (result3 !== null) { | |
result3 = (function(offset, data) { return data; })(pos2, result3[1]); | |
} | |
if (result3 === null) { | |
pos = pos2; | |
} | |
while (result3 !== null) { | |
result2.push(result3); | |
pos2 = pos; | |
pos3 = pos; | |
if (/^[\n\r]/.test(input.charAt(pos))) { | |
result4 = input.charAt(pos); | |
pos++; | |
} else { | |
result4 = null; | |
if (reportFailures === 0) { | |
matchFailed("[\\n\\r]"); | |
} | |
} | |
if (result4 !== null) { | |
result3 = []; | |
while (result4 !== null) { | |
result3.push(result4); | |
if (/^[\n\r]/.test(input.charAt(pos))) { | |
result4 = input.charAt(pos); | |
pos++; | |
} else { | |
result4 = null; | |
if (reportFailures === 0) { | |
matchFailed("[\\n\\r]"); | |
} | |
} | |
} | |
} else { | |
result3 = null; | |
} | |
if (result3 !== null) { | |
result4 = parse_line(); | |
if (result4 !== null) { | |
result3 = [result3, result4]; | |
} else { | |
result3 = null; | |
pos = pos3; | |
} | |
} else { | |
result3 = null; | |
pos = pos3; | |
} | |
if (result3 !== null) { | |
result3 = (function(offset, data) { return data; })(pos2, result3[1]); | |
} | |
if (result3 === null) { | |
pos = pos2; | |
} | |
} | |
if (result2 !== null) { | |
result3 = []; | |
if (/^[\n\r]/.test(input.charAt(pos))) { | |
result4 = input.charAt(pos); | |
pos++; | |
} else { | |
result4 = null; | |
if (reportFailures === 0) { | |
matchFailed("[\\n\\r]"); | |
} | |
} | |
while (result4 !== null) { | |
result3.push(result4); | |
if (/^[\n\r]/.test(input.charAt(pos))) { | |
result4 = input.charAt(pos); | |
pos++; | |
} else { | |
result4 = null; | |
if (reportFailures === 0) { | |
matchFailed("[\\n\\r]"); | |
} | |
} | |
} | |
if (result3 !== null) { | |
result0 = [result0, result1, result2, result3]; | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
if (result0 !== null) { | |
result0 = (function(offset, first, rest) { rest.unshift(first); return rest; })(pos0, result0[1], result0[2]); | |
} | |
if (result0 === null) { | |
pos = pos0; | |
} | |
return result0; | |
} | |
function parse_line() { | |
var result0, result1, result2, result3, result4; | |
var pos0, pos1, pos2, pos3; | |
pos0 = pos; | |
pos1 = pos; | |
result0 = parse_field(); | |
if (result0 !== null) { | |
result1 = []; | |
pos2 = pos; | |
pos3 = pos; | |
if (input.length > pos) { | |
result2 = input.charAt(pos); | |
pos++; | |
} else { | |
result2 = null; | |
if (reportFailures === 0) { | |
matchFailed("any character"); | |
} | |
} | |
if (result2 !== null) { | |
result3 = (function(offset, char) { return char == separator; })(pos, result2) ? "" : null; | |
if (result3 !== null) { | |
result4 = parse_field(); | |
if (result4 !== null) { | |
result2 = [result2, result3, result4]; | |
} else { | |
result2 = null; | |
pos = pos3; | |
} | |
} else { | |
result2 = null; | |
pos = pos3; | |
} | |
} else { | |
result2 = null; | |
pos = pos3; | |
} | |
if (result2 !== null) { | |
result2 = (function(offset, char, text) { return text; })(pos2, result2[0], result2[2]); | |
} | |
if (result2 === null) { | |
pos = pos2; | |
} | |
while (result2 !== null) { | |
result1.push(result2); | |
pos2 = pos; | |
pos3 = pos; | |
if (input.length > pos) { | |
result2 = input.charAt(pos); | |
pos++; | |
} else { | |
result2 = null; | |
if (reportFailures === 0) { | |
matchFailed("any character"); | |
} | |
} | |
if (result2 !== null) { | |
result3 = (function(offset, char) { return char == separator; })(pos, result2) ? "" : null; | |
if (result3 !== null) { | |
result4 = parse_field(); | |
if (result4 !== null) { | |
result2 = [result2, result3, result4]; | |
} else { | |
result2 = null; | |
pos = pos3; | |
} | |
} else { | |
result2 = null; | |
pos = pos3; | |
} | |
} else { | |
result2 = null; | |
pos = pos3; | |
} | |
if (result2 !== null) { | |
result2 = (function(offset, char, text) { return text; })(pos2, result2[0], result2[2]); | |
} | |
if (result2 === null) { | |
pos = pos2; | |
} | |
} | |
if (result1 !== null) { | |
result2 = (function(offset, first, rest) { return !!first || rest.length; })(pos, result0, result1) ? "" : null; | |
if (result2 !== null) { | |
result0 = [result0, result1, result2]; | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
if (result0 !== null) { | |
result0 = (function(offset, first, rest) { rest.unshift(first); return rest; })(pos0, result0[0], result0[1]); | |
} | |
if (result0 === null) { | |
pos = pos0; | |
} | |
return result0; | |
} | |
function parse_field() { | |
var result0, result1, result2; | |
var pos0, pos1, pos2; | |
pos0 = pos; | |
pos1 = pos; | |
if (input.charCodeAt(pos) === 34) { | |
result0 = "\""; | |
pos++; | |
} else { | |
result0 = null; | |
if (reportFailures === 0) { | |
matchFailed("\"\\\"\""); | |
} | |
} | |
if (result0 !== null) { | |
result1 = []; | |
result2 = parse_char(); | |
while (result2 !== null) { | |
result1.push(result2); | |
result2 = parse_char(); | |
} | |
if (result1 !== null) { | |
if (input.charCodeAt(pos) === 34) { | |
result2 = "\""; | |
pos++; | |
} else { | |
result2 = null; | |
if (reportFailures === 0) { | |
matchFailed("\"\\\"\""); | |
} | |
} | |
if (result2 !== null) { | |
result0 = [result0, result1, result2]; | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
if (result0 !== null) { | |
result0 = (function(offset, text) { return text.join(''); })(pos0, result0[1]); | |
} | |
if (result0 === null) { | |
pos = pos0; | |
} | |
if (result0 === null) { | |
pos0 = pos; | |
result0 = []; | |
pos1 = pos; | |
pos2 = pos; | |
if (/^[^\n\r]/.test(input.charAt(pos))) { | |
result1 = input.charAt(pos); | |
pos++; | |
} else { | |
result1 = null; | |
if (reportFailures === 0) { | |
matchFailed("[^\\n\\r]"); | |
} | |
} | |
if (result1 !== null) { | |
result2 = (function(offset, char) { return char != separator; })(pos, result1) ? "" : null; | |
if (result2 !== null) { | |
result1 = [result1, result2]; | |
} else { | |
result1 = null; | |
pos = pos2; | |
} | |
} else { | |
result1 = null; | |
pos = pos2; | |
} | |
if (result1 !== null) { | |
result1 = (function(offset, char) { return char; })(pos1, result1[0]); | |
} | |
if (result1 === null) { | |
pos = pos1; | |
} | |
while (result1 !== null) { | |
result0.push(result1); | |
pos1 = pos; | |
pos2 = pos; | |
if (/^[^\n\r]/.test(input.charAt(pos))) { | |
result1 = input.charAt(pos); | |
pos++; | |
} else { | |
result1 = null; | |
if (reportFailures === 0) { | |
matchFailed("[^\\n\\r]"); | |
} | |
} | |
if (result1 !== null) { | |
result2 = (function(offset, char) { return char != separator; })(pos, result1) ? "" : null; | |
if (result2 !== null) { | |
result1 = [result1, result2]; | |
} else { | |
result1 = null; | |
pos = pos2; | |
} | |
} else { | |
result1 = null; | |
pos = pos2; | |
} | |
if (result1 !== null) { | |
result1 = (function(offset, char) { return char; })(pos1, result1[0]); | |
} | |
if (result1 === null) { | |
pos = pos1; | |
} | |
} | |
if (result0 !== null) { | |
result0 = (function(offset, text) { return text.join(''); })(pos0, result0); | |
} | |
if (result0 === null) { | |
pos = pos0; | |
} | |
} | |
return result0; | |
} | |
function parse_char() { | |
var result0, result1; | |
var pos0, pos1; | |
pos0 = pos; | |
pos1 = pos; | |
if (input.charCodeAt(pos) === 34) { | |
result0 = "\""; | |
pos++; | |
} else { | |
result0 = null; | |
if (reportFailures === 0) { | |
matchFailed("\"\\\"\""); | |
} | |
} | |
if (result0 !== null) { | |
if (input.charCodeAt(pos) === 34) { | |
result1 = "\""; | |
pos++; | |
} else { | |
result1 = null; | |
if (reportFailures === 0) { | |
matchFailed("\"\\\"\""); | |
} | |
} | |
if (result1 !== null) { | |
result0 = [result0, result1]; | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
if (result0 !== null) { | |
result0 = (function(offset) { return '"'; })(pos0); | |
} | |
if (result0 === null) { | |
pos = pos0; | |
} | |
if (result0 === null) { | |
if (/^[^"]/.test(input.charAt(pos))) { | |
result0 = input.charAt(pos); | |
pos++; | |
} else { | |
result0 = null; | |
if (reportFailures === 0) { | |
matchFailed("[^\"]"); | |
} | |
} | |
} | |
return result0; | |
} | |
function cleanupExpected(expected) { | |
expected.sort(); | |
var lastExpected = null; | |
var cleanExpected = []; | |
for (var i = 0; i < expected.length; i++) { | |
if (expected[i] !== lastExpected) { | |
cleanExpected.push(expected[i]); | |
lastExpected = expected[i]; | |
} | |
} | |
return cleanExpected; | |
} | |
function computeErrorPosition() { | |
/* | |
* The first idea was to use |String.split| to break the input up to the | |
* error position along newlines and derive the line and column from | |
* there. However IE's |split| implementation is so broken that it was | |
* enough to prevent it. | |
*/ | |
var line = 1; | |
var column = 1; | |
var seenCR = false; | |
for (var i = 0; i < Math.max(pos, rightmostFailuresPos); i++) { | |
var ch = input.charAt(i); | |
if (ch === "\n") { | |
if (!seenCR) { line++; } | |
column = 1; | |
seenCR = false; | |
} else if (ch === "\r" || ch === "\u2028" || ch === "\u2029") { | |
line++; | |
column = 1; | |
seenCR = true; | |
} else { | |
column++; | |
seenCR = false; | |
} | |
} | |
return { line: line, column: column }; | |
} | |
var separator = ','; | |
var result = parseFunctions[startRule](); | |
/* | |
* The parser is now in one of the following three states: | |
* | |
* 1. The parser successfully parsed the whole input. | |
* | |
* - |result !== null| | |
* - |pos === input.length| | |
* - |rightmostFailuresExpected| may or may not contain something | |
* | |
* 2. The parser successfully parsed only a part of the input. | |
* | |
* - |result !== null| | |
* - |pos < input.length| | |
* - |rightmostFailuresExpected| may or may not contain something | |
* | |
* 3. The parser did not successfully parse any part of the input. | |
* | |
* - |result === null| | |
* - |pos === 0| | |
* - |rightmostFailuresExpected| contains at least one failure | |
* | |
* All code following this comment (including called functions) must | |
* handle these states. | |
*/ | |
if (result === null || pos !== input.length) { | |
var offset = Math.max(pos, rightmostFailuresPos); | |
var found = offset < input.length ? input.charAt(offset) : null; | |
var errorPosition = computeErrorPosition(); | |
throw new this.SyntaxError( | |
cleanupExpected(rightmostFailuresExpected), | |
found, | |
offset, | |
errorPosition.line, | |
errorPosition.column | |
); | |
} | |
return result; | |
}, | |
/* Returns the parser source code. */ | |
toSource: function() { return this._source; } | |
}; | |
/* Thrown when a parser encounters a syntax error. */ | |
result.SyntaxError = function(expected, found, offset, line, column) { | |
function buildMessage(expected, found) { | |
var expectedHumanized, foundHumanized; | |
switch (expected.length) { | |
case 0: | |
expectedHumanized = "end of input"; | |
break; | |
case 1: | |
expectedHumanized = expected[0]; | |
break; | |
default: | |
expectedHumanized = expected.slice(0, expected.length - 1).join(", ") | |
+ " or " | |
+ expected[expected.length - 1]; | |
} | |
foundHumanized = found ? quote(found) : "end of input"; | |
return "Expected " + expectedHumanized + " but " + foundHumanized + " found."; | |
} | |
this.name = "SyntaxError"; | |
this.expected = expected; | |
this.found = found; | |
this.message = buildMessage(expected, found); | |
this.offset = offset; | |
this.line = line; | |
this.column = column; | |
}; | |
result.SyntaxError.prototype = Error.prototype; | |
return result; | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
""
fails, which seems strange?